@temporalio/core-bridge 1.15.0 → 1.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/Cargo.lock +172 -70
  2. package/lib/native.d.ts +1 -1
  3. package/package.json +2 -2
  4. package/releases/aarch64-apple-darwin/index.node +0 -0
  5. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  6. package/releases/x86_64-apple-darwin/index.node +0 -0
  7. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  8. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  9. package/sdk-core/.github/workflows/per-pr.yml +6 -6
  10. package/sdk-core/AGENTS.md +41 -30
  11. package/sdk-core/Cargo.toml +3 -0
  12. package/sdk-core/README.md +15 -9
  13. package/sdk-core/crates/client/Cargo.toml +4 -0
  14. package/sdk-core/crates/client/README.md +139 -0
  15. package/sdk-core/crates/client/src/async_activity_handle.rs +297 -0
  16. package/sdk-core/crates/client/src/callback_based.rs +7 -0
  17. package/sdk-core/crates/client/src/errors.rs +294 -0
  18. package/sdk-core/crates/client/src/{raw.rs → grpc.rs} +280 -159
  19. package/sdk-core/crates/client/src/lib.rs +920 -1326
  20. package/sdk-core/crates/client/src/metrics.rs +24 -33
  21. package/sdk-core/crates/client/src/options_structs.rs +457 -0
  22. package/sdk-core/crates/client/src/replaceable.rs +5 -4
  23. package/sdk-core/crates/client/src/request_extensions.rs +8 -9
  24. package/sdk-core/crates/client/src/retry.rs +99 -54
  25. package/sdk-core/crates/client/src/{worker/mod.rs → worker.rs} +1 -1
  26. package/sdk-core/crates/client/src/workflow_handle.rs +826 -0
  27. package/sdk-core/crates/common/Cargo.toml +61 -2
  28. package/sdk-core/crates/common/build.rs +742 -12
  29. package/sdk-core/crates/common/protos/api_upstream/.github/workflows/ci.yml +2 -0
  30. package/sdk-core/crates/common/protos/api_upstream/Makefile +2 -1
  31. package/sdk-core/crates/common/protos/api_upstream/buf.yaml +0 -3
  32. package/sdk-core/crates/common/protos/api_upstream/cmd/check-path-conflicts/main.go +137 -0
  33. package/sdk-core/crates/common/protos/api_upstream/openapi/openapiv2.json +1166 -770
  34. package/sdk-core/crates/common/protos/api_upstream/openapi/openapiv3.yaml +1243 -750
  35. package/sdk-core/crates/common/protos/api_upstream/temporal/api/deployment/v1/message.proto +2 -2
  36. package/sdk-core/crates/common/protos/api_upstream/temporal/api/enums/v1/workflow.proto +4 -3
  37. package/sdk-core/crates/common/protos/api_upstream/temporal/api/failure/v1/message.proto +1 -0
  38. package/sdk-core/crates/common/protos/api_upstream/temporal/api/history/v1/message.proto +4 -0
  39. package/sdk-core/crates/common/protos/api_upstream/temporal/api/namespace/v1/message.proto +6 -0
  40. package/sdk-core/crates/common/protos/api_upstream/temporal/api/nexus/v1/message.proto +16 -1
  41. package/sdk-core/crates/common/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +64 -6
  42. package/sdk-core/crates/common/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +88 -33
  43. package/sdk-core/crates/common/protos/local/temporal/sdk/core/nexus/nexus.proto +4 -2
  44. package/sdk-core/crates/common/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +4 -0
  45. package/sdk-core/crates/common/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +5 -5
  46. package/sdk-core/crates/common/src/activity_definition.rs +20 -0
  47. package/sdk-core/crates/common/src/data_converters.rs +770 -0
  48. package/sdk-core/crates/common/src/envconfig.rs +5 -0
  49. package/sdk-core/crates/common/src/lib.rs +15 -211
  50. package/sdk-core/crates/common/src/payload_visitor.rs +648 -0
  51. package/sdk-core/crates/common/src/priority.rs +110 -0
  52. package/sdk-core/crates/common/src/protos/canned_histories.rs +3 -0
  53. package/sdk-core/crates/common/src/protos/history_builder.rs +45 -0
  54. package/sdk-core/crates/common/src/protos/history_info.rs +2 -0
  55. package/sdk-core/crates/common/src/protos/mod.rs +122 -27
  56. package/sdk-core/crates/common/src/protos/task_token.rs +3 -3
  57. package/sdk-core/crates/common/src/protos/utilities.rs +11 -0
  58. package/sdk-core/crates/{sdk-core → common}/src/telemetry/log_export.rs +5 -7
  59. package/sdk-core/crates/common/src/telemetry/metrics/core.rs +125 -0
  60. package/sdk-core/crates/common/src/telemetry/metrics.rs +268 -223
  61. package/sdk-core/crates/{sdk-core → common}/src/telemetry/otel.rs +8 -13
  62. package/sdk-core/crates/{sdk-core → common}/src/telemetry/prometheus_meter.rs +49 -50
  63. package/sdk-core/crates/{sdk-core → common}/src/telemetry/prometheus_server.rs +2 -3
  64. package/sdk-core/crates/common/src/telemetry.rs +264 -4
  65. package/sdk-core/crates/common/src/worker.rs +68 -603
  66. package/sdk-core/crates/common/src/workflow_definition.rs +60 -0
  67. package/sdk-core/crates/macros/Cargo.toml +5 -1
  68. package/sdk-core/crates/macros/src/activities_definitions.rs +585 -0
  69. package/sdk-core/crates/macros/src/fsm_impl.rs +507 -0
  70. package/sdk-core/crates/macros/src/lib.rs +138 -512
  71. package/sdk-core/crates/macros/src/macro_utils.rs +106 -0
  72. package/sdk-core/crates/macros/src/workflow_definitions.rs +1224 -0
  73. package/sdk-core/crates/sdk/Cargo.toml +19 -6
  74. package/sdk-core/crates/sdk/README.md +415 -0
  75. package/sdk-core/crates/sdk/src/activities.rs +417 -0
  76. package/sdk-core/crates/sdk/src/interceptors.rs +1 -1
  77. package/sdk-core/crates/sdk/src/lib.rs +757 -442
  78. package/sdk-core/crates/sdk/src/workflow_context/options.rs +45 -35
  79. package/sdk-core/crates/sdk/src/workflow_context.rs +1033 -289
  80. package/sdk-core/crates/sdk/src/workflow_future.rs +277 -213
  81. package/sdk-core/crates/sdk/src/workflows.rs +711 -0
  82. package/sdk-core/crates/sdk-core/Cargo.toml +57 -64
  83. package/sdk-core/crates/sdk-core/benches/workflow_replay_bench.rs +41 -35
  84. package/sdk-core/crates/sdk-core/machine_coverage/ActivityMachine_Coverage.puml +1 -1
  85. package/sdk-core/crates/sdk-core/src/abstractions.rs +6 -10
  86. package/sdk-core/crates/sdk-core/src/core_tests/activity_tasks.rs +6 -5
  87. package/sdk-core/crates/sdk-core/src/core_tests/mod.rs +13 -15
  88. package/sdk-core/crates/sdk-core/src/core_tests/queries.rs +21 -25
  89. package/sdk-core/crates/sdk-core/src/core_tests/replay_flag.rs +7 -10
  90. package/sdk-core/crates/sdk-core/src/core_tests/updates.rs +14 -17
  91. package/sdk-core/crates/sdk-core/src/core_tests/workers.rs +493 -26
  92. package/sdk-core/crates/sdk-core/src/core_tests/workflow_tasks.rs +4 -8
  93. package/sdk-core/crates/sdk-core/src/ephemeral_server/mod.rs +7 -7
  94. package/sdk-core/crates/sdk-core/src/histfetch.rs +20 -10
  95. package/sdk-core/crates/sdk-core/src/lib.rs +41 -111
  96. package/sdk-core/crates/sdk-core/src/pollers/mod.rs +4 -9
  97. package/sdk-core/crates/sdk-core/src/pollers/poll_buffer.rs +118 -19
  98. package/sdk-core/crates/sdk-core/src/protosext/mod.rs +2 -2
  99. package/sdk-core/crates/sdk-core/src/replay/mod.rs +14 -5
  100. package/sdk-core/crates/sdk-core/src/telemetry/metrics.rs +179 -196
  101. package/sdk-core/crates/sdk-core/src/telemetry/mod.rs +3 -280
  102. package/sdk-core/crates/sdk-core/src/test_help/integ_helpers.rs +6 -9
  103. package/sdk-core/crates/sdk-core/src/test_help/unit_helpers.rs +3 -6
  104. package/sdk-core/crates/sdk-core/src/worker/activities/local_activities.rs +11 -14
  105. package/sdk-core/crates/sdk-core/src/worker/activities.rs +16 -19
  106. package/sdk-core/crates/sdk-core/src/worker/client/mocks.rs +9 -5
  107. package/sdk-core/crates/sdk-core/src/worker/client.rs +103 -81
  108. package/sdk-core/crates/sdk-core/src/worker/heartbeat.rs +7 -11
  109. package/sdk-core/crates/sdk-core/src/worker/mod.rs +1124 -229
  110. package/sdk-core/crates/sdk-core/src/worker/nexus.rs +145 -23
  111. package/sdk-core/crates/sdk-core/src/worker/slot_provider.rs +2 -2
  112. package/sdk-core/crates/sdk-core/src/worker/tuner/fixed_size.rs +2 -2
  113. package/sdk-core/crates/sdk-core/src/worker/tuner/resource_based.rs +13 -13
  114. package/sdk-core/crates/sdk-core/src/worker/tuner.rs +28 -8
  115. package/sdk-core/crates/sdk-core/src/worker/workflow/driven_workflow.rs +9 -3
  116. package/sdk-core/crates/sdk-core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +21 -22
  117. package/sdk-core/crates/sdk-core/src/worker/workflow/machines/workflow_machines.rs +19 -4
  118. package/sdk-core/crates/sdk-core/src/worker/workflow/managed_run.rs +14 -18
  119. package/sdk-core/crates/sdk-core/src/worker/workflow/mod.rs +4 -6
  120. package/sdk-core/crates/sdk-core/src/worker/workflow/run_cache.rs +4 -7
  121. package/sdk-core/crates/sdk-core/src/worker/workflow/wft_extraction.rs +2 -4
  122. package/sdk-core/crates/sdk-core/src/worker/workflow/wft_poller.rs +8 -9
  123. package/sdk-core/crates/sdk-core/src/worker/workflow/workflow_stream.rs +1 -3
  124. package/sdk-core/crates/sdk-core/tests/activities_procmacro.rs +6 -0
  125. package/sdk-core/crates/sdk-core/tests/activities_trybuild/basic_pass.rs +54 -0
  126. package/sdk-core/crates/sdk-core/tests/activities_trybuild/invalid_self_type_fail.rs +18 -0
  127. package/sdk-core/crates/sdk-core/tests/activities_trybuild/invalid_self_type_fail.stderr +5 -0
  128. package/sdk-core/crates/sdk-core/tests/activities_trybuild/missing_context_fail.rs +14 -0
  129. package/sdk-core/crates/sdk-core/tests/activities_trybuild/missing_context_fail.stderr +5 -0
  130. package/sdk-core/crates/sdk-core/tests/activities_trybuild/multi_arg_pass.rs +48 -0
  131. package/sdk-core/crates/sdk-core/tests/activities_trybuild/no_input_pass.rs +14 -0
  132. package/sdk-core/crates/sdk-core/tests/activities_trybuild/no_return_type_pass.rs +19 -0
  133. package/sdk-core/crates/sdk-core/tests/cloud_tests.rs +14 -5
  134. package/sdk-core/crates/sdk-core/tests/common/activity_functions.rs +55 -0
  135. package/sdk-core/crates/sdk-core/tests/common/mod.rs +241 -196
  136. package/sdk-core/crates/sdk-core/tests/common/workflows.rs +41 -28
  137. package/sdk-core/crates/sdk-core/tests/global_metric_tests.rs +3 -5
  138. package/sdk-core/crates/sdk-core/tests/heavy_tests/fuzzy_workflow.rs +73 -64
  139. package/sdk-core/crates/sdk-core/tests/heavy_tests.rs +298 -252
  140. package/sdk-core/crates/sdk-core/tests/integ_tests/async_activity_client_tests.rs +230 -0
  141. package/sdk-core/crates/sdk-core/tests/integ_tests/client_tests.rs +94 -57
  142. package/sdk-core/crates/sdk-core/tests/integ_tests/data_converter_tests.rs +381 -0
  143. package/sdk-core/crates/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +16 -12
  144. package/sdk-core/crates/sdk-core/tests/integ_tests/heartbeat_tests.rs +48 -40
  145. package/sdk-core/crates/sdk-core/tests/integ_tests/metrics_tests.rs +327 -255
  146. package/sdk-core/crates/sdk-core/tests/integ_tests/pagination_tests.rs +50 -45
  147. package/sdk-core/crates/sdk-core/tests/integ_tests/polling_tests.rs +147 -126
  148. package/sdk-core/crates/sdk-core/tests/integ_tests/queries_tests.rs +103 -89
  149. package/sdk-core/crates/sdk-core/tests/integ_tests/update_tests.rs +609 -453
  150. package/sdk-core/crates/sdk-core/tests/integ_tests/visibility_tests.rs +80 -62
  151. package/sdk-core/crates/sdk-core/tests/integ_tests/worker_heartbeat_tests.rs +360 -231
  152. package/sdk-core/crates/sdk-core/tests/integ_tests/worker_tests.rs +248 -185
  153. package/sdk-core/crates/sdk-core/tests/integ_tests/worker_versioning_tests.rs +52 -43
  154. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_client_tests.rs +180 -0
  155. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/activities.rs +428 -315
  156. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +82 -56
  157. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +56 -28
  158. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +364 -243
  159. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/client_interactions.rs +552 -0
  160. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +101 -42
  161. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +243 -147
  162. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/eager.rs +98 -28
  163. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +1475 -1036
  164. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +73 -41
  165. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/nexus.rs +397 -238
  166. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/patches.rs +414 -189
  167. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/queries.rs +415 -0
  168. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/replay.rs +96 -36
  169. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/resets.rs +154 -137
  170. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/signals.rs +183 -105
  171. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +85 -38
  172. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/timers.rs +142 -40
  173. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +73 -54
  174. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests.rs +363 -226
  175. package/sdk-core/crates/sdk-core/tests/main.rs +17 -15
  176. package/sdk-core/crates/sdk-core/tests/manual_tests.rs +207 -152
  177. package/sdk-core/crates/sdk-core/tests/shared_tests/mod.rs +65 -34
  178. package/sdk-core/crates/sdk-core/tests/shared_tests/priority.rs +107 -84
  179. package/sdk-core/crates/sdk-core/tests/workflows_procmacro.rs +6 -0
  180. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/async_query_fail.rs +26 -0
  181. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/async_query_fail.stderr +5 -0
  182. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/basic_pass.rs +49 -0
  183. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/minimal_pass.rs +21 -0
  184. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/mut_query_fail.rs +26 -0
  185. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/mut_query_fail.stderr +5 -0
  186. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/sync_run_fail.rs +21 -0
  187. package/sdk-core/crates/sdk-core/tests/workflows_trybuild/sync_run_fail.stderr +5 -0
  188. package/sdk-core/crates/sdk-core-c-bridge/Cargo.toml +7 -1
  189. package/sdk-core/crates/sdk-core-c-bridge/include/temporal-sdk-core-c-bridge.h +14 -14
  190. package/sdk-core/crates/sdk-core-c-bridge/src/client.rs +83 -74
  191. package/sdk-core/crates/sdk-core-c-bridge/src/metric.rs +9 -14
  192. package/sdk-core/crates/sdk-core-c-bridge/src/runtime.rs +1 -2
  193. package/sdk-core/crates/sdk-core-c-bridge/src/tests/context.rs +13 -13
  194. package/sdk-core/crates/sdk-core-c-bridge/src/tests/mod.rs +6 -6
  195. package/sdk-core/crates/sdk-core-c-bridge/src/tests/utils.rs +3 -4
  196. package/sdk-core/crates/sdk-core-c-bridge/src/worker.rs +62 -75
  197. package/sdk-core/rustfmt.toml +2 -1
  198. package/src/client.rs +205 -318
  199. package/src/metrics.rs +22 -30
  200. package/src/runtime.rs +4 -5
  201. package/src/worker.rs +16 -19
  202. package/ts/native.ts +1 -1
  203. package/sdk-core/crates/client/src/workflow_handle/mod.rs +0 -212
  204. package/sdk-core/crates/common/src/errors.rs +0 -85
  205. package/sdk-core/crates/common/tests/worker_task_types_test.rs +0 -129
  206. package/sdk-core/crates/sdk/src/activity_context.rs +0 -238
  207. package/sdk-core/crates/sdk/src/app_data.rs +0 -37
  208. package/sdk-core/crates/sdk-core/tests/integ_tests/activity_functions.rs +0 -5
  209. package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +0 -61
@@ -6,7 +6,18 @@ mod slot_provider;
6
6
  pub(crate) mod tuner;
7
7
  mod workflow;
8
8
 
9
- pub use temporalio_common::worker::{WorkerConfig, WorkerConfigBuilder};
9
+ use temporalio_client::Connection;
10
+ use temporalio_common::{
11
+ protos::{
12
+ coresdk::{
13
+ ActivitySlotInfo, LocalActivitySlotInfo, NamespaceInfo, NexusSlotInfo,
14
+ WorkflowSlotInfo, activity_result::ActivityExecutionResult, namespace_info,
15
+ },
16
+ temporal::api::{enums::v1::VersioningBehavior, worker::v1::PluginInfo},
17
+ },
18
+ telemetry::TelemetryInstance,
19
+ worker::{WorkerDeploymentOptions, WorkerDeploymentVersion},
20
+ };
10
21
  pub use tuner::{
11
22
  FixedSizeSlotSupplier, ResourceBasedSlotsOptions, ResourceBasedSlotsOptionsBuilder,
12
23
  ResourceBasedTuner, ResourceSlotOptions, SlotSupplierOptions, TunerBuilder, TunerHolder,
@@ -26,18 +37,13 @@ pub(crate) use wft_poller::WFTPollerShared;
26
37
  pub use workflow::LEGACY_QUERY_ID;
27
38
 
28
39
  use crate::{
29
- ActivityHeartbeat, CompleteActivityError, PollError, WorkerTrait,
40
+ ActivityHeartbeat,
30
41
  abstractions::{MeteredPermitDealer, PermitDealerContextData, dbg_panic},
31
- errors::CompleteWfError,
32
42
  pollers::{ActivityTaskOptions, BoxedActPoller, BoxedNexusPoller, LongPollBuffer},
33
43
  protosext::validate_activity_completion,
34
- sealed::AnyClient,
35
- telemetry::{
36
- TelemetryInstance,
37
- metrics::{
38
- MetricsContext, activity_poller, activity_worker_type, local_activity_worker_type,
39
- nexus_poller, nexus_worker_type, workflow_worker_type,
40
- },
44
+ telemetry::metrics::{
45
+ MetricsContext, WorkerHeartbeatMetrics, activity_poller, activity_worker_type,
46
+ local_activity_worker_type, nexus_poller, nexus_worker_type, workflow_worker_type,
41
47
  },
42
48
  worker::{
43
49
  activities::{LACompleteAction, LocalActivityManager, NextPendingLAAction},
@@ -58,6 +64,8 @@ use gethostname::gethostname;
58
64
  use parking_lot::RwLock;
59
65
  use slot_provider::SlotProvider;
60
66
  use std::{
67
+ any::Any,
68
+ collections::{HashMap, HashSet},
61
69
  convert::TryInto,
62
70
  future,
63
71
  sync::{
@@ -70,15 +78,12 @@ use temporalio_client::worker::{
70
78
  ClientWorker, HeartbeatCallback, SharedNamespaceWorkerTrait, Slot as SlotTrait,
71
79
  };
72
80
  use temporalio_common::{
73
- errors::{CompleteNexusError, WorkerValidationError},
74
81
  protos::{
75
82
  TaskToken,
76
83
  coresdk::{
77
- ActivityTaskCompletion, NamespaceInfo,
78
- activity_result::activity_execution_result,
84
+ ActivityTaskCompletion,
79
85
  activity_task::ActivityTask,
80
- namespace_info,
81
- nexus::{NexusTask, NexusTaskCompletion, nexus_task_completion},
86
+ nexus::{NexusTask, NexusTaskCompletion},
82
87
  workflow_activation::{WorkflowActivation, remove_from_cache::EvictionReason},
83
88
  workflow_completion::WorkflowActivationCompletion,
84
89
  },
@@ -89,11 +94,8 @@ use temporalio_common::{
89
94
  worker::v1::{WorkerHeartbeat, WorkerHostInfo, WorkerPollerInfo, WorkerSlotsInfo},
90
95
  },
91
96
  },
92
- telemetry::metrics::{TemporalMeter, WorkerHeartbeatMetrics},
93
- worker::{
94
- ActivitySlotKind, LocalActivitySlotKind, NexusSlotKind, PollerBehavior, SlotKind,
95
- WorkerTaskTypes, WorkflowSlotKind,
96
- },
97
+ telemetry::metrics::TemporalMeter,
98
+ worker::WorkerTaskTypes,
97
99
  };
98
100
  use tokio::sync::{mpsc::unbounded_channel, watch};
99
101
  use tokio_stream::wrappers::UnboundedReceiverStream;
@@ -112,6 +114,278 @@ use {
112
114
  },
113
115
  };
114
116
 
117
+ /// Defines per-worker configuration options
118
+ #[derive(Clone, bon::Builder)]
119
+ #[builder(on(String, into), state_mod(vis = "pub"), finish_fn(vis = "", name = build_internal))]
120
+ #[non_exhaustive]
121
+ pub struct WorkerConfig {
122
+ /// The Temporal service namespace this worker is bound to
123
+ pub namespace: String,
124
+ /// What task queue will this worker poll from? This task queue name will be used for both
125
+ /// workflow and activity polling.
126
+ pub task_queue: String,
127
+ /// A human-readable string that can identify this worker. Using something like sdk version
128
+ /// and host name is a good default. If set, overrides the identity set (if any) on the client
129
+ /// used by this worker.
130
+ pub client_identity_override: Option<String>,
131
+ /// If set nonzero, workflows will be cached and sticky task queues will be used, meaning that
132
+ /// history updates are applied incrementally to suspended instances of workflow execution.
133
+ /// Workflows are evicted according to a least-recently-used policy once the cache maximum is
134
+ /// reached. Workflows may also be explicitly evicted at any time, or as a result of errors
135
+ /// or failures.
136
+ #[builder(default = 0)]
137
+ pub max_cached_workflows: usize,
138
+ /// Set a [crate::WorkerTuner] for this worker. Either this or at least one of the
139
+ /// `max_outstanding_*` fields must be set.
140
+ pub tuner: Option<Arc<dyn WorkerTuner + Send + Sync>>,
141
+ /// Maximum number of concurrent poll workflow task requests we will perform at a time on this
142
+ /// worker's task queue. See also [WorkerConfig::nonsticky_to_sticky_poll_ratio].
143
+ /// If using SimpleMaximum, Must be at least 2 when `max_cached_workflows` > 0, or is an error.
144
+ #[builder(default = PollerBehavior::SimpleMaximum(5))]
145
+ pub workflow_task_poller_behavior: PollerBehavior,
146
+ /// Only applies when using [PollerBehavior::SimpleMaximum]
147
+ ///
148
+ /// (max workflow task polls * this number) = the number of max pollers that will be allowed for
149
+ /// the nonsticky queue when sticky tasks are enabled. If both defaults are used, the sticky
150
+ /// queue will allow 4 max pollers while the nonsticky queue will allow one. The minimum for
151
+ /// either poller is 1, so if the maximum allowed is 1 and sticky queues are enabled, there will
152
+ /// be 2 concurrent polls.
153
+ #[builder(default = 0.2)]
154
+ pub nonsticky_to_sticky_poll_ratio: f32,
155
+ /// Maximum number of concurrent poll activity task requests we will perform at a time on this
156
+ /// worker's task queue
157
+ #[builder(default = PollerBehavior::SimpleMaximum(5))]
158
+ pub activity_task_poller_behavior: PollerBehavior,
159
+ /// Maximum number of concurrent poll nexus task requests we will perform at a time on this
160
+ /// worker's task queue
161
+ #[builder(default = PollerBehavior::SimpleMaximum(5))]
162
+ pub nexus_task_poller_behavior: PollerBehavior,
163
+ /// Specifies which task types this worker will poll for.
164
+ ///
165
+ /// Note: At least one task type must be specified or the worker will fail validation.
166
+ pub task_types: WorkerTaskTypes,
167
+ /// How long a workflow task is allowed to sit on the sticky queue before it is timed out
168
+ /// and moved to the non-sticky queue where it may be picked up by any worker.
169
+ #[builder(default = Duration::from_secs(10))]
170
+ pub sticky_queue_schedule_to_start_timeout: Duration,
171
+
172
+ /// Longest interval for throttling activity heartbeats
173
+ #[builder(default = Duration::from_secs(60))]
174
+ pub max_heartbeat_throttle_interval: Duration,
175
+
176
+ /// Default interval for throttling activity heartbeats in case
177
+ /// `ActivityOptions.heartbeat_timeout` is unset.
178
+ /// When the timeout *is* set in the `ActivityOptions`, throttling is set to
179
+ /// `heartbeat_timeout * 0.8`.
180
+ #[builder(default = Duration::from_secs(30))]
181
+ pub default_heartbeat_throttle_interval: Duration,
182
+
183
+ /// Sets the maximum number of activities per second the task queue will dispatch, controlled
184
+ /// server-side. Note that this only takes effect upon an activity poll request. If multiple
185
+ /// workers on the same queue have different values set, they will thrash with the last poller
186
+ /// winning.
187
+ ///
188
+ /// Setting this to a nonzero value will also disable eager activity execution.
189
+ pub max_task_queue_activities_per_second: Option<f64>,
190
+
191
+ /// Limits the number of activities per second that this worker will process. The worker will
192
+ /// not poll for new activities if by doing so it might receive and execute an activity which
193
+ /// would cause it to exceed this limit. Negative, zero, or NaN values will cause building
194
+ /// the options to fail.
195
+ pub max_worker_activities_per_second: Option<f64>,
196
+
197
+ /// If set false (default), shutdown will not finish until all pending evictions have been
198
+ /// issued and replied to. If set true shutdown will be considered complete when the only
199
+ /// remaining work is pending evictions.
200
+ ///
201
+ /// This flag is useful during tests to avoid needing to deal with lots of uninteresting
202
+ /// evictions during shutdown. Alternatively, if a lang implementation finds it easy to clean
203
+ /// up during shutdown, setting this true saves some back-and-forth.
204
+ #[builder(default = false)]
205
+ pub ignore_evicts_on_shutdown: bool,
206
+
207
+ /// Maximum number of next page (or initial) history event listing requests we'll make
208
+ /// concurrently. I don't this it's worth exposing this to users until we encounter a reason.
209
+ #[builder(default = 5)]
210
+ pub fetching_concurrency: usize,
211
+
212
+ /// If set, core will issue cancels for all outstanding activities and nexus operations after
213
+ /// shutdown has been initiated and this amount of time has elapsed.
214
+ pub graceful_shutdown_period: Option<Duration>,
215
+
216
+ /// The amount of time core will wait before timing out activities using its own local timers
217
+ /// after one of them elapses. This is to avoid racing with server's own tracking of the
218
+ /// timeout.
219
+ #[builder(default = Duration::from_secs(5))]
220
+ pub local_timeout_buffer_for_activities: Duration,
221
+
222
+ /// Any error types listed here will cause any workflow being processed by this worker to fail,
223
+ /// rather than simply failing the workflow task.
224
+ #[builder(default)]
225
+ pub workflow_failure_errors: HashSet<WorkflowErrorType>,
226
+
227
+ /// Like [WorkerConfig::workflow_failure_errors], but specific to certain workflow types (the
228
+ /// map key).
229
+ #[builder(default)]
230
+ pub workflow_types_to_failure_errors: HashMap<String, HashSet<WorkflowErrorType>>,
231
+
232
+ /// The maximum allowed number of workflow tasks that will ever be given to this worker at one
233
+ /// time. Note that one workflow task may require multiple activations - so the WFT counts as
234
+ /// "outstanding" until all activations it requires have been completed. Must be at least 2 if
235
+ /// `max_cached_workflows` is > 0, or is an error.
236
+ ///
237
+ /// Mutually exclusive with `tuner`
238
+ #[builder(into)]
239
+ pub max_outstanding_workflow_tasks: Option<usize>,
240
+ /// The maximum number of activity tasks that will ever be given to this worker concurrently.
241
+ ///
242
+ /// Mutually exclusive with `tuner`
243
+ #[builder(into)]
244
+ pub max_outstanding_activities: Option<usize>,
245
+ /// The maximum number of local activity tasks that will ever be given to this worker
246
+ /// concurrently.
247
+ ///
248
+ /// Mutually exclusive with `tuner`
249
+ #[builder(into)]
250
+ pub max_outstanding_local_activities: Option<usize>,
251
+ /// The maximum number of nexus tasks that will ever be given to this worker
252
+ /// concurrently.
253
+ ///
254
+ /// Mutually exclusive with `tuner`
255
+ #[builder(into)]
256
+ pub max_outstanding_nexus_tasks: Option<usize>,
257
+
258
+ /// A versioning strategy for this worker.
259
+ pub versioning_strategy: WorkerVersioningStrategy,
260
+
261
+ /// List of plugins used by lang.
262
+ #[builder(default)]
263
+ pub plugins: HashSet<PluginInfo>,
264
+
265
+ /// Skips the single worker+client+namespace+task_queue check
266
+ #[builder(default = false)]
267
+ pub skip_client_worker_set_check: bool,
268
+ }
269
+
270
+ impl WorkerConfig {
271
+ /// Returns true if the configuration specifies we should fail a workflow on a certain error
272
+ /// type rather than failing the workflow task.
273
+ pub fn should_fail_workflow(
274
+ &self,
275
+ workflow_type: &str,
276
+ error_type: &WorkflowErrorType,
277
+ ) -> bool {
278
+ self.workflow_failure_errors.contains(error_type)
279
+ || self
280
+ .workflow_types_to_failure_errors
281
+ .get(workflow_type)
282
+ .map(|s| s.contains(error_type))
283
+ .unwrap_or(false)
284
+ }
285
+
286
+ pub(crate) fn computed_deployment_version(&self) -> Option<WorkerDeploymentVersion> {
287
+ let wdv = match self.versioning_strategy {
288
+ WorkerVersioningStrategy::None { ref build_id } => WorkerDeploymentVersion {
289
+ deployment_name: "".to_owned(),
290
+ build_id: build_id.clone(),
291
+ },
292
+ WorkerVersioningStrategy::WorkerDeploymentBased(ref opts) => opts.version.clone(),
293
+ WorkerVersioningStrategy::LegacyBuildIdBased { ref build_id } => {
294
+ WorkerDeploymentVersion {
295
+ deployment_name: "".to_owned(),
296
+ build_id: build_id.clone(),
297
+ }
298
+ }
299
+ };
300
+ if wdv.is_empty() { None } else { Some(wdv) }
301
+ }
302
+ }
303
+
304
+ impl<S: worker_config_builder::IsComplete> WorkerConfigBuilder<S> {
305
+ /// Build and validate the worker configuration
306
+ pub fn build(self) -> Result<WorkerConfig, String> {
307
+ let config = self.build_internal();
308
+ let task_types = &config.task_types;
309
+ if task_types.is_empty() {
310
+ return Err("At least one task type must be enabled in `task_types`".to_string());
311
+ }
312
+ if !task_types.enable_workflows && task_types.enable_local_activities {
313
+ return Err(
314
+ "`task_types` cannot enable local activities without workflows".to_string(),
315
+ );
316
+ }
317
+
318
+ config.workflow_task_poller_behavior.validate()?;
319
+ config.activity_task_poller_behavior.validate()?;
320
+ config.nexus_task_poller_behavior.validate()?;
321
+
322
+ if let Some(ref x) = config.max_worker_activities_per_second
323
+ && (!x.is_normal() || x.is_sign_negative())
324
+ {
325
+ return Err(
326
+ "`max_worker_activities_per_second` must be positive and nonzero".to_string(),
327
+ );
328
+ }
329
+
330
+ if matches!(config.max_outstanding_workflow_tasks, Some(v) if v == 0) {
331
+ return Err("`max_outstanding_workflow_tasks` must be > 0".to_string());
332
+ }
333
+ if matches!(config.max_outstanding_activities, Some(v) if v == 0) {
334
+ return Err("`max_outstanding_activities` must be > 0".to_string());
335
+ }
336
+ if matches!(config.max_outstanding_local_activities, Some(v) if v == 0) {
337
+ return Err("`max_outstanding_local_activities` must be > 0".to_string());
338
+ }
339
+ if matches!(config.max_outstanding_nexus_tasks, Some(v) if v == 0) {
340
+ return Err("`max_outstanding_nexus_tasks` must be > 0".to_string());
341
+ }
342
+
343
+ if config.max_cached_workflows > 0 {
344
+ if let Some(max_wft) = config.max_outstanding_workflow_tasks
345
+ && max_wft < 2
346
+ {
347
+ return Err(
348
+ "`max_cached_workflows` > 0 requires `max_outstanding_workflow_tasks` >= 2"
349
+ .to_string(),
350
+ );
351
+ }
352
+ if matches!(config.workflow_task_poller_behavior, PollerBehavior::SimpleMaximum(u) if u < 2)
353
+ {
354
+ return Err("`max_cached_workflows` > 0 requires `workflow_task_poller_behavior` to be at least 2".to_string());
355
+ }
356
+ }
357
+
358
+ if config.tuner.is_some()
359
+ && (config.max_outstanding_workflow_tasks.is_some()
360
+ || config.max_outstanding_activities.is_some()
361
+ || config.max_outstanding_local_activities.is_some())
362
+ {
363
+ return Err("max_outstanding_* fields are mutually exclusive with `tuner`".to_string());
364
+ }
365
+
366
+ match &config.versioning_strategy {
367
+ WorkerVersioningStrategy::None { .. } => {}
368
+ WorkerVersioningStrategy::WorkerDeploymentBased(d) => {
369
+ if d.use_worker_versioning
370
+ && (d.version.build_id.is_empty() || d.version.deployment_name.is_empty())
371
+ {
372
+ return Err("WorkerDeploymentVersion must have a non-empty build_id and deployment_name when deployment-based versioning is enabled".to_string());
373
+ }
374
+ }
375
+ WorkerVersioningStrategy::LegacyBuildIdBased { build_id } => {
376
+ if build_id.is_empty() {
377
+ return Err(
378
+ "Legacy build id-based versioning must have a non-empty build_id"
379
+ .to_string(),
380
+ );
381
+ }
382
+ }
383
+ }
384
+
385
+ Ok(config)
386
+ }
387
+ }
388
+
115
389
  /// A worker polls on a certain task queue
116
390
  pub struct Worker {
117
391
  config: WorkerConfig,
@@ -141,6 +415,9 @@ pub struct Worker {
141
415
  client_worker_registrator: Arc<ClientWorkerRegistrator>,
142
416
  /// Status of the worker
143
417
  status: Arc<RwLock<WorkerStatus>>,
418
+ /// Set during validate() when server supports graceful poll cancellation on shutdown.
419
+ /// Shared with pollers so they can decide per-poll whether to hard-kill or wait.
420
+ graceful_poll_shutdown: Arc<AtomicBool>,
144
421
  }
145
422
 
146
423
  struct AllPermitsTracker {
@@ -172,175 +449,10 @@ impl WorkerTelemetry {
172
449
  }
173
450
  }
174
451
 
175
- #[async_trait::async_trait]
176
- impl WorkerTrait for Worker {
177
- async fn validate(&self) -> Result<NamespaceInfo, WorkerValidationError> {
178
- match self.client.describe_namespace().await {
179
- Ok(info) => {
180
- let limits = info.namespace_info.and_then(|ns_info| {
181
- ns_info.limits.map(|api_limits| namespace_info::Limits {
182
- blob_size_limit_error: api_limits.blob_size_limit_error,
183
- memo_size_limit_error: api_limits.memo_size_limit_error,
184
- })
185
- });
186
- return Ok(NamespaceInfo { limits });
187
- }
188
- Err(e) => {
189
- if e.code() == tonic::Code::Unimplemented {
190
- // Ignore if unimplemented since we wouldn't want to fail against an old server, for
191
- // example.
192
- return Ok(NamespaceInfo {
193
- ..Default::default()
194
- });
195
- }
196
- return Err(WorkerValidationError::NamespaceDescribeError {
197
- source: e,
198
- namespace: self.config.namespace.clone(),
199
- });
200
- }
201
- }
202
- }
203
-
204
- async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollError> {
205
- self.next_workflow_activation().await
206
- }
207
-
208
- #[instrument(skip(self))]
209
- async fn poll_activity_task(&self) -> Result<ActivityTask, PollError> {
210
- loop {
211
- match self.activity_poll().await.transpose() {
212
- Some(r) => break r,
213
- None => {
214
- tokio::task::yield_now().await;
215
- continue;
216
- }
217
- }
218
- }
219
- }
220
-
221
- #[instrument(skip(self))]
222
- async fn poll_nexus_task(&self) -> Result<NexusTask, PollError> {
223
- match &self.nexus_mgr {
224
- Some(mgr) => mgr.next_nexus_task().await,
225
- None => Err(PollError::ShutDown),
226
- }
227
- }
228
-
229
- async fn complete_workflow_activation(
230
- &self,
231
- completion: WorkflowActivationCompletion,
232
- ) -> Result<(), CompleteWfError> {
233
- self.complete_workflow_activation(completion).await
234
- }
235
-
236
- async fn complete_activity_task(
237
- &self,
238
- completion: ActivityTaskCompletion,
239
- ) -> Result<(), CompleteActivityError> {
240
- let task_token = TaskToken(completion.task_token);
241
- let status = if let Some(s) = completion.result.and_then(|r| r.status) {
242
- s
243
- } else {
244
- return Err(CompleteActivityError::MalformedActivityCompletion {
245
- reason: "Activity completion had empty result/status field".to_owned(),
246
- completion: None,
247
- });
248
- };
249
-
250
- self.complete_activity(task_token, status).await
251
- }
252
-
253
- async fn complete_nexus_task(
254
- &self,
255
- completion: NexusTaskCompletion,
256
- ) -> Result<(), CompleteNexusError> {
257
- let status = if let Some(s) = completion.status {
258
- s
259
- } else {
260
- return Err(CompleteNexusError::MalformedNexusCompletion {
261
- reason: "Nexus completion had empty status field".to_owned(),
262
- });
263
- };
264
-
265
- self.complete_nexus_task(TaskToken(completion.task_token), status)
266
- .await
267
- }
268
-
269
- fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
270
- self.record_heartbeat(details);
271
- }
272
-
273
- fn request_workflow_eviction(&self, run_id: &str) {
274
- self.request_wf_eviction(
275
- run_id,
276
- "Eviction explicitly requested by lang",
277
- EvictionReason::LangRequested,
278
- );
279
- }
280
-
281
- fn get_config(&self) -> &WorkerConfig {
282
- &self.config
283
- }
284
-
285
- /// Begins the shutdown process, tells pollers they should stop. Is idempotent.
286
- fn initiate_shutdown(&self) {
287
- if !self.shutdown_token.is_cancelled() {
288
- info!(
289
- task_queue=%self.config.task_queue,
290
- namespace=%self.config.namespace,
291
- "Initiated shutdown",
292
- );
293
- }
294
- self.shutdown_token.cancel();
295
- // First, disable Eager Workflow Start
296
- if !self.client_worker_registrator.shared_namespace_worker {
297
- let _res = self
298
- .client
299
- .workers()
300
- .unregister_slot_provider(self.worker_instance_key);
301
- }
302
-
303
- // Push a BumpStream message to the workflow activation queue. This ensures that
304
- // any pending workflow activation polls will resolve, even if there are no other inputs.
305
- if let Some(workflows) = &self.workflows {
306
- workflows.bump_stream();
307
- }
308
-
309
- // Second, we want to stop polling of both activity and workflow tasks
310
- if let Some(atm) = self.at_task_mgr.as_ref() {
311
- atm.initiate_shutdown();
312
- }
313
- // Let the manager know that shutdown has been initiated to try to unblock the local
314
- // activity poll in case this worker is an activity-only worker.
315
- if let Some(la_mgr) = &self.local_act_mgr {
316
- la_mgr.shutdown_initiated();
317
-
318
- // If workflows have never been polled, immediately tell the local activity manager
319
- // that workflows have shut down, so it can proceed with shutdown without waiting.
320
- // This is particularly important for activity-only workers.
321
- if self.workflows.as_ref().is_none_or(|w| !w.ever_polled()) {
322
- la_mgr.workflows_have_shutdown();
323
- }
324
- }
325
- }
326
-
327
- async fn shutdown(&self) {
328
- self.shutdown().await
329
- }
330
-
331
- async fn finalize_shutdown(self) {
332
- self.finalize_shutdown().await
333
- }
334
-
335
- fn worker_instance_key(&self) -> Uuid {
336
- self.worker_instance_key
337
- }
338
- }
339
-
340
452
  impl Worker {
341
453
  /// Creates a new [Worker] from a [WorkerClient] instance with real task pollers and optional
342
454
  /// telemetry.
343
- pub fn new(
455
+ pub(crate) fn new(
344
456
  config: WorkerConfig,
345
457
  sticky_queue_name: Option<String>,
346
458
  client: Arc<dyn WorkerClient>,
@@ -365,6 +477,39 @@ impl Worker {
365
477
  )
366
478
  }
367
479
 
480
+ /// Validate that the worker can properly connect to server, plus any other validation that
481
+ /// needs to be done asynchronously. Lang SDKs should call this function once before calling
482
+ /// any others.
483
+ pub async fn validate(&self) -> Result<NamespaceInfo, WorkerValidationError> {
484
+ match self.client.describe_namespace().await {
485
+ Ok(info) => {
486
+ let ns_info = info.namespace_info;
487
+ let limits = ns_info.as_ref().and_then(|ns_info| {
488
+ ns_info.limits.map(|api_limits| namespace_info::Limits {
489
+ blob_size_limit_error: api_limits.blob_size_limit_error,
490
+ memo_size_limit_error: api_limits.memo_size_limit_error,
491
+ })
492
+ });
493
+ if ns_info
494
+ .and_then(|ns| ns.capabilities)
495
+ .is_some_and(|caps| caps.worker_poll_complete_on_shutdown)
496
+ {
497
+ self.graceful_poll_shutdown.store(true, Ordering::Relaxed);
498
+ }
499
+ Ok(NamespaceInfo { limits })
500
+ }
501
+ Err(e) if e.code() == tonic::Code::Unimplemented => {
502
+ // Ignore if unimplemented since we wouldn't want to fail against an old server, for
503
+ // example.
504
+ Ok(NamespaceInfo::default())
505
+ }
506
+ Err(e) => Err(WorkerValidationError::NamespaceDescribeError {
507
+ source: e,
508
+ namespace: self.config.namespace.clone(),
509
+ }),
510
+ }
511
+ }
512
+
368
513
  /// Replace client.
369
514
  ///
370
515
  /// For eager workflow purposes, this new client will now apply to future eager start requests
@@ -374,10 +519,7 @@ impl Worker {
374
519
  /// For worker heartbeat, this will remove an existing shared worker if it is the last worker of
375
520
  /// the old client and create a new nexus worker if it's the first client of the namespace on
376
521
  /// the new client.
377
- pub fn replace_client<CT>(&self, new_client: CT) -> Result<(), anyhow::Error>
378
- where
379
- CT: Into<AnyClient>,
380
- {
522
+ pub fn replace_client(&self, mut new_connection: Connection) -> Result<(), anyhow::Error> {
381
523
  // Unregister worker from current client, register in new client at the end
382
524
  self.client
383
525
  .workers()
@@ -387,13 +529,12 @@ impl Worker {
387
529
  .workers()
388
530
  .finalize_unregister(self.worker_instance_key)?;
389
531
 
390
- let new_worker_client = super::init_worker_client(
391
- self.config.namespace.clone(),
532
+ super::init_worker_client(
533
+ &mut new_connection,
392
534
  self.config.client_identity_override.clone(),
393
- new_client,
394
535
  );
395
536
 
396
- self.client.replace_client(new_worker_client);
537
+ self.client.replace_connection(new_connection);
397
538
  *self.client_worker_registrator.client.write() = self.client.clone();
398
539
  self.client
399
540
  .workers()
@@ -475,6 +616,7 @@ impl Worker {
475
616
  let wf_sticky_last_suc_poll_time = Arc::new(AtomicCell::new(None));
476
617
  let act_last_suc_poll_time = Arc::new(AtomicCell::new(None));
477
618
  let nexus_last_suc_poll_time = Arc::new(AtomicCell::new(None));
619
+ let graceful_poll_shutdown = Arc::new(AtomicBool::new(false));
478
620
 
479
621
  let nexus_slots = MeteredPermitDealer::new(
480
622
  tuner.nexus_task_slot_supplier(),
@@ -495,6 +637,7 @@ impl Worker {
495
637
  &wft_slots,
496
638
  wf_last_suc_poll_time.clone(),
497
639
  wf_sticky_last_suc_poll_time.clone(),
640
+ graceful_poll_shutdown.clone(),
498
641
  )
499
642
  .boxed();
500
643
  let stream = if !client.is_mock() {
@@ -524,6 +667,7 @@ impl Worker {
524
667
  max_tps: config.max_task_queue_activities_per_second,
525
668
  },
526
669
  act_last_suc_poll_time.clone(),
670
+ graceful_poll_shutdown.clone(),
527
671
  );
528
672
  Some(Box::from(ap) as BoxedActPoller)
529
673
  } else {
@@ -541,6 +685,7 @@ impl Worker {
541
685
  Some(move |np| np_metrics.record_num_pollers(np)),
542
686
  nexus_last_suc_poll_time.clone(),
543
687
  shared_namespace_worker,
688
+ graceful_poll_shutdown.clone(),
544
689
  )) as BoxedNexusPoller)
545
690
  } else {
546
691
  None
@@ -645,9 +790,7 @@ impl Worker {
645
790
  });
646
791
 
647
792
  let deployment_options = match &config.versioning_strategy {
648
- temporalio_common::worker::WorkerVersioningStrategy::WorkerDeploymentBased(opts) => {
649
- Some(opts.clone())
650
- }
793
+ WorkerVersioningStrategy::WorkerDeploymentBased(opts) => Some(opts.clone()),
651
794
  _ => None,
652
795
  };
653
796
  let provider = SlotProvider::new(
@@ -657,7 +800,7 @@ impl Worker {
657
800
  external_wft_tx,
658
801
  deployment_options,
659
802
  );
660
- let worker_instance_key = Uuid::new_v4();
803
+ let worker_instance_key = client.worker_instance_key();
661
804
  let worker_status = Arc::new(RwLock::new(WorkerStatus::Running));
662
805
 
663
806
  let sdk_name_and_ver = client.sdk_name_and_version();
@@ -762,12 +905,23 @@ impl Worker {
762
905
  nexus_mgr,
763
906
  client_worker_registrator,
764
907
  status: worker_status,
908
+ graceful_poll_shutdown,
765
909
  })
766
910
  }
767
911
 
768
- /// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
769
- /// completed
770
- async fn shutdown(&self) {
912
+ /// Initiates async shutdown procedure, eventually ceases all polling of the server and shuts
913
+ /// down this worker. [Worker::poll_workflow_activation] and [Worker::poll_activity_task] should
914
+ /// be called until both return a `ShutDown` error to ensure that all outstanding work is
915
+ /// complete. This means that the lang sdk will need to call
916
+ /// [Worker::complete_workflow_activation] and [Worker::complete_activity_task] for those
917
+ /// workflows & activities until they are done. At that point, the lang SDK can end the process,
918
+ /// or drop the [Worker] instance via [Worker::finalize_shutdown], which will close the
919
+ /// connection and free resources. If you have set [WorkerConfig::task_types] to exclude
920
+ /// [WorkerTaskTypes::activity_only()], you may skip calling [Worker::poll_activity_task].
921
+ ///
922
+ /// Lang implementations should use [Worker::initiate_shutdown] followed by
923
+ /// [Worker::finalize_shutdown].
924
+ pub async fn shutdown(&self) {
771
925
  self.initiate_shutdown();
772
926
  {
773
927
  *self.status.write() = WorkerStatus::ShuttingDown;
@@ -783,7 +937,17 @@ impl Worker {
783
937
  .and_then(|wf| wf.get_sticky_queue_name())
784
938
  .unwrap_or_default();
785
939
  // This is a best effort call and we can still shutdown the worker if it fails
786
- match self.client.shutdown_worker(sticky_name, heartbeat).await {
940
+ let task_queue_types = self.config.task_types.to_task_queue_types();
941
+ match self
942
+ .client
943
+ .shutdown_worker(
944
+ sticky_name,
945
+ self.config.task_queue.clone(),
946
+ task_queue_types,
947
+ heartbeat,
948
+ )
949
+ .await
950
+ {
787
951
  Err(err)
788
952
  if !matches!(
789
953
  err.code(),
@@ -827,8 +991,12 @@ impl Worker {
827
991
  }
828
992
  }
829
993
 
830
- /// Finish shutting down by consuming the background pollers and freeing all resources
831
- async fn finalize_shutdown(self) {
994
+ /// Completes shutdown and frees all resources. You should avoid simply dropping workers, as
995
+ /// this does not allow async tasks to report any panics that may have occurred cleanly.
996
+ ///
997
+ /// This should be called only after [Worker::shutdown] has resolved and/or both polling
998
+ /// functions have returned `ShutDown` errors.
999
+ pub async fn finalize_shutdown(self) {
832
1000
  self.shutdown().await;
833
1001
  if let Some(b) = self.at_task_mgr {
834
1002
  b.shutdown().await;
@@ -882,11 +1050,26 @@ impl Worker {
882
1050
  self.workflows.as_ref().and_then(|w| w.unused_wft_permits())
883
1051
  }
884
1052
 
885
- /// Get new activity tasks (may be local or nonlocal). Local activities are returned first
886
- /// before polling the server if there are any.
1053
+ /// Ask the worker for some work, returning an [ActivityTask]. It is then the language SDK's
1054
+ /// responsibility to call the appropriate activity code with the provided inputs. Blocks
1055
+ /// indefinitely until such work is available or [Worker::shutdown] is called.
1056
+ ///
1057
+ /// Do not call poll concurrently. It handles polling the server concurrently internally.
887
1058
  ///
888
- /// Returns `Ok(None)` in the event of a poll timeout or if the polling loop should otherwise
889
- /// be restarted
1059
+ /// Local activities are returned first before polling the server if there are any.
1060
+ #[instrument(skip(self))]
1061
+ pub async fn poll_activity_task(&self) -> Result<ActivityTask, PollError> {
1062
+ loop {
1063
+ match self.activity_poll().await.transpose() {
1064
+ Some(r) => break r,
1065
+ None => {
1066
+ tokio::task::yield_now().await;
1067
+ continue;
1068
+ }
1069
+ }
1070
+ }
1071
+ }
1072
+
890
1073
  async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollError> {
891
1074
  let local_activities_complete = self.local_activities_complete.load(Ordering::Relaxed);
892
1075
  let non_local_activities_complete =
@@ -969,8 +1152,26 @@ impl Worker {
969
1152
  r
970
1153
  }
971
1154
 
972
- /// Attempt to record an activity heartbeat
973
- pub(crate) fn record_heartbeat(&self, details: ActivityHeartbeat) {
1155
+ /// Notify the Temporal service that an activity is still alive. Long running activities that
1156
+ /// take longer than `activity_heartbeat_timeout` to finish must call this function in order to
1157
+ /// report progress, otherwise the activity will timeout and a new attempt will be scheduled.
1158
+ ///
1159
+ /// The first heartbeat request will be sent immediately, subsequent rapid calls to this
1160
+ /// function will result in heartbeat requests being aggregated and the last one received during
1161
+ /// the aggregation period will be sent to the server, where that period is defined as half the
1162
+ /// heartbeat timeout.
1163
+ ///
1164
+ /// Unlike Java/Go SDKs we do not return cancellation status as part of heartbeat response and
1165
+ /// instead send it as a separate activity task to the lang, decoupling heartbeat and
1166
+ /// cancellation processing.
1167
+ ///
1168
+ /// For now activity still need to send heartbeats if they want to receive cancellation
1169
+ /// requests. In the future we will change this and will dispatch cancellations more
1170
+ /// proactively. Note that this function does not block on the server call and returns
1171
+ /// immediately. Underlying validation errors are swallowed and logged, this has been agreed to
1172
+ /// be optimal behavior for the user as we don't want to break activity execution due to badly
1173
+ /// configured heartbeat options.
1174
+ pub fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
974
1175
  if let Some(at_mgr) = self.at_task_mgr.as_ref() {
975
1176
  let tt = TaskToken(details.task_token.clone());
976
1177
  if let Err(e) = at_mgr.record_heartbeat(details) {
@@ -979,14 +1180,28 @@ impl Worker {
979
1180
  }
980
1181
  }
981
1182
 
982
- #[instrument(skip(self, task_token, status),
983
- fields(task_token=%&task_token, status=%&status,
984
- task_queue=%self.config.task_queue, workflow_id, run_id))]
985
- pub(crate) async fn complete_activity(
1183
+ /// Tell the worker that an activity has finished executing. May (and should) be freely called
1184
+ /// concurrently.
1185
+ #[instrument(skip(self, completion),
1186
+ fields(task_token, status,
1187
+ task_queue=%self.config.task_queue, workflow_id, run_id))]
1188
+ pub async fn complete_activity_task(
986
1189
  &self,
987
- task_token: TaskToken,
988
- status: activity_execution_result::Status,
1190
+ completion: ActivityTaskCompletion,
989
1191
  ) -> Result<(), CompleteActivityError> {
1192
+ let task_token = TaskToken(completion.task_token);
1193
+ let status = if let Some(s) = completion.result.and_then(|r| r.status) {
1194
+ s
1195
+ } else {
1196
+ return Err(CompleteActivityError::MalformedActivityCompletion {
1197
+ reason: "Activity completion had empty result/status field".to_owned(),
1198
+ completion: None,
1199
+ });
1200
+ };
1201
+
1202
+ tracing::Span::current().record("task_token", task_token.to_string());
1203
+ tracing::Span::current().record("status", status.to_string());
1204
+
990
1205
  validate_activity_completion(&status)?;
991
1206
  if task_token.is_local_activity_task() {
992
1207
  let as_la_res: LocalActivityExecutionResult = status.try_into()?;
@@ -1002,8 +1217,20 @@ impl Worker {
1002
1217
  }
1003
1218
  }
1004
1219
 
1220
+ /// Ask the worker for some work, returning a [WorkflowActivation]. It is then the language
1221
+ /// SDK's responsibility to call the appropriate workflow code with the provided inputs. Blocks
1222
+ /// indefinitely until such work is available or [Worker::shutdown] is called.
1223
+ ///
1224
+ /// It is important to understand that all activations must be responded to. There can only
1225
+ /// be one outstanding activation for a particular run of a workflow at any time. If an
1226
+ /// activation is not responded to, it will cause that workflow to become stuck forever.
1227
+ ///
1228
+ /// See [WorkflowActivation] for more details on the expected behavior of lang w.r.t activation
1229
+ /// & job processing.
1230
+ ///
1231
+ /// Do not call poll concurrently. It handles polling the server concurrently internally.
1005
1232
  #[instrument(skip(self), fields(run_id, workflow_id, task_queue=%self.config.task_queue))]
1006
- pub(crate) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollError> {
1233
+ pub async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollError> {
1007
1234
  match &self.workflows {
1008
1235
  Some(workflows) => {
1009
1236
  let r = workflows.next_workflow_activation().await;
@@ -1025,10 +1252,14 @@ impl Worker {
1025
1252
  }
1026
1253
  }
1027
1254
 
1255
+ /// Tell the worker that a workflow activation has completed. May (and should) be freely called
1256
+ /// concurrently. The future may take some time to resolve, as fetching more events might be
1257
+ /// necessary for completion to... complete - thus SDK implementers should make sure they do
1258
+ /// not serialize completions.
1028
1259
  #[instrument(skip(self, completion),
1029
1260
  fields(completion=%&completion, run_id=%completion.run_id, workflow_id,
1030
1261
  task_queue=%self.config.task_queue))]
1031
- pub(crate) async fn complete_workflow_activation(
1262
+ pub async fn complete_workflow_activation(
1032
1263
  &self,
1033
1264
  completion: WorkflowActivationCompletion,
1034
1265
  ) -> Result<(), CompleteWfError> {
@@ -1049,21 +1280,61 @@ impl Worker {
1049
1280
  }
1050
1281
  }
1051
1282
 
1283
+ /// Ask the worker for some nexus related work. It is then the language SDK's
1284
+ /// responsibility to call the appropriate nexus operation handler code with the provided
1285
+ /// inputs. Blocks indefinitely until such work is available or [Worker::shutdown] is called.
1286
+ ///
1287
+ /// All tasks must be responded to for shutdown to complete.
1288
+ ///
1289
+ /// Do not call poll concurrently. It handles polling the server concurrently internally.
1290
+ #[instrument(skip(self))]
1291
+ pub async fn poll_nexus_task(&self) -> Result<NexusTask, PollError> {
1292
+ match &self.nexus_mgr {
1293
+ Some(mgr) => mgr.next_nexus_task().await,
1294
+ None => Err(PollError::ShutDown),
1295
+ }
1296
+ }
1297
+
1298
+ /// Tell the worker that a nexus task has completed. May (and should) be freely called
1299
+ /// concurrently.
1052
1300
  #[instrument(
1053
- skip(self, tt, status),
1054
- fields(task_token=%&tt, status=%&status, task_queue=%self.config.task_queue)
1301
+ skip(self, completion),
1302
+ fields(task_token, status, task_queue=%self.config.task_queue)
1055
1303
  )]
1056
- async fn complete_nexus_task(
1304
+ pub async fn complete_nexus_task(
1057
1305
  &self,
1058
- tt: TaskToken,
1059
- status: nexus_task_completion::Status,
1306
+ completion: NexusTaskCompletion,
1060
1307
  ) -> Result<(), CompleteNexusError> {
1308
+ let status = if let Some(s) = completion.status {
1309
+ s
1310
+ } else {
1311
+ return Err(CompleteNexusError::MalformedNexusCompletion {
1312
+ reason: "Nexus completion had empty status field".to_owned(),
1313
+ });
1314
+ };
1315
+ let tt = TaskToken(completion.task_token);
1316
+ tracing::Span::current().record("task_token", tt.to_string());
1317
+ tracing::Span::current().record("status", status.to_string());
1318
+
1061
1319
  match &self.nexus_mgr {
1062
1320
  Some(mgr) => mgr.complete_task(tt, status, &*self.client).await,
1063
1321
  None => Err(CompleteNexusError::NexusNotEnabled),
1064
1322
  }
1065
1323
  }
1066
1324
 
1325
+ /// Request that a workflow be evicted by its run id. This will generate a workflow activation
1326
+ /// with the eviction job inside it to be eventually returned by
1327
+ /// [Worker::poll_workflow_activation]. If the workflow had any existing outstanding
1328
+ /// activations, such activations are invalidated and subsequent completions of them will do
1329
+ /// nothing and log a warning.
1330
+ pub fn request_workflow_eviction(&self, run_id: &str) {
1331
+ self.request_wf_eviction(
1332
+ run_id,
1333
+ "Eviction explicitly requested by lang",
1334
+ EvictionReason::LangRequested,
1335
+ );
1336
+ }
1337
+
1067
1338
  /// Request a workflow eviction
1068
1339
  pub(crate) fn request_wf_eviction(
1069
1340
  &self,
@@ -1078,6 +1349,63 @@ impl Worker {
1078
1349
  }
1079
1350
  }
1080
1351
 
1352
+ /// Return this worker's config
1353
+ pub fn get_config(&self) -> &WorkerConfig {
1354
+ &self.config
1355
+ }
1356
+
1357
+ /// Initiate shutdown. See [Worker::shutdown], this is just a sync version that starts the
1358
+ /// process. You can then wait on `shutdown` or [Worker::finalize_shutdown].
1359
+ pub fn initiate_shutdown(&self) {
1360
+ if !self.shutdown_token.is_cancelled() {
1361
+ info!(
1362
+ task_queue=%self.config.task_queue,
1363
+ namespace=%self.config.namespace,
1364
+ "Initiated shutdown",
1365
+ );
1366
+ }
1367
+ self.shutdown_token.cancel();
1368
+ {
1369
+ *self.status.write() = WorkerStatus::ShuttingDown;
1370
+ }
1371
+ // First, disable Eager Workflow Start
1372
+ if !self.client_worker_registrator.shared_namespace_worker {
1373
+ let _res = self
1374
+ .client
1375
+ .workers()
1376
+ .unregister_slot_provider(self.worker_instance_key);
1377
+ }
1378
+
1379
+ // Push a BumpStream message to the workflow activation queue. This ensures that
1380
+ // any pending workflow activation polls will resolve, even if there are no other inputs.
1381
+ if let Some(workflows) = &self.workflows {
1382
+ workflows.bump_stream();
1383
+ }
1384
+
1385
+ // Second, we want to stop polling of both activity and workflow tasks
1386
+ if let Some(atm) = self.at_task_mgr.as_ref() {
1387
+ atm.initiate_shutdown();
1388
+ }
1389
+ // Let the manager know that shutdown has been initiated to try to unblock the local
1390
+ // activity poll in case this worker is an activity-only worker.
1391
+ if let Some(la_mgr) = &self.local_act_mgr {
1392
+ la_mgr.shutdown_initiated();
1393
+
1394
+ // If workflows have never been polled, immediately tell the local activity manager
1395
+ // that workflows have shut down, so it can proceed with shutdown without waiting.
1396
+ // This is particularly important for activity-only workers.
1397
+ if self.workflows.as_ref().is_none_or(|w| !w.ever_polled()) {
1398
+ la_mgr.workflows_have_shutdown();
1399
+ }
1400
+ }
1401
+ }
1402
+
1403
+ /// Unique identifier for this worker instance.
1404
+ /// This must be stable across the worker's lifetime and unique per instance.
1405
+ pub fn worker_instance_key(&self) -> Uuid {
1406
+ self.worker_instance_key
1407
+ }
1408
+
1081
1409
  /// Sets a function to be called at the end of each activation completion
1082
1410
  pub(crate) fn set_post_activate_hook(
1083
1411
  &mut self,
@@ -1120,6 +1448,446 @@ impl Worker {
1120
1448
  }
1121
1449
  }
1122
1450
 
1451
+ /// Errors thrown by [crate::Worker::validate]
1452
+ #[derive(thiserror::Error, Debug)]
1453
+ pub enum WorkerValidationError {
1454
+ /// The namespace provided to the worker does not exist on the server.
1455
+ #[error("Namespace {namespace} was not found or otherwise could not be described: {source:?}")]
1456
+ NamespaceDescribeError {
1457
+ /// The underlying server error.
1458
+ source: tonic::Status,
1459
+ /// The associated namespace.
1460
+ namespace: String,
1461
+ },
1462
+ }
1463
+
1464
+ /// Errors thrown by [crate::Worker] polling methods
1465
+ #[derive(thiserror::Error, Debug)]
1466
+ pub enum PollError {
1467
+ /// [crate::Worker::shutdown] was called, and there are no more tasks to be handled from this
1468
+ /// poll function. Lang must call [crate::Worker::complete_workflow_activation],
1469
+ /// [crate::Worker::complete_activity_task], or
1470
+ /// [crate::Worker::complete_nexus_task] for any remaining tasks, and then may exit.
1471
+ #[error("Core is shut down and there are no more tasks of this kind")]
1472
+ ShutDown,
1473
+ /// Unhandled error when calling the temporal server. Core will attempt to retry any non-fatal
1474
+ /// errors, so lang should consider this fatal.
1475
+ #[error("Unhandled grpc error when polling: {0:?}")]
1476
+ TonicError(#[from] tonic::Status),
1477
+ }
1478
+
1479
+ /// Errors thrown by [crate::Worker::complete_workflow_activation]
1480
+ #[derive(thiserror::Error, Debug)]
1481
+ #[allow(clippy::large_enum_variant)]
1482
+ pub enum CompleteWfError {
1483
+ /// Lang SDK sent us a malformed workflow completion. This likely means a bug in the lang sdk.
1484
+ #[error("Lang SDK sent us a malformed workflow completion for run ({run_id}): {reason}")]
1485
+ MalformedWorkflowCompletion {
1486
+ /// Reason the completion was malformed
1487
+ reason: String,
1488
+ /// The run associated with the completion
1489
+ run_id: String,
1490
+ },
1491
+ /// Workflows have not been enabled on this worker.
1492
+ #[error("Workflows are not enabled on this worker")]
1493
+ WorkflowNotEnabled,
1494
+ }
1495
+
1496
+ /// Errors thrown by [crate::Worker::complete_activity_task]
1497
+ #[derive(thiserror::Error, Debug)]
1498
+ #[allow(clippy::large_enum_variant)]
1499
+ pub enum CompleteActivityError {
1500
+ /// Lang SDK sent us a malformed activity completion. This likely means a bug in the lang sdk.
1501
+ #[error("Lang SDK sent us a malformed activity completion ({reason}): {completion:?}")]
1502
+ MalformedActivityCompletion {
1503
+ /// Reason the completion was malformed
1504
+ reason: String,
1505
+ /// The completion, which may not be included to avoid unnecessary copies.
1506
+ completion: Option<ActivityExecutionResult>,
1507
+ },
1508
+ /// Activities have not been enabled on this worker.
1509
+ #[error("Activities are not enabled on this worker")]
1510
+ ActivityNotEnabled,
1511
+ }
1512
+
1513
+ /// Errors thrown by [crate::Worker::complete_nexus_task]
1514
+ #[derive(thiserror::Error, Debug)]
1515
+ pub enum CompleteNexusError {
1516
+ /// Lang SDK sent us a malformed nexus completion. This likely means a bug in the lang sdk.
1517
+ #[error("Lang SDK sent us a malformed nexus completion: {reason}")]
1518
+ MalformedNexusCompletion {
1519
+ /// Reason the completion was malformed
1520
+ reason: String,
1521
+ },
1522
+ /// Nexus has not been enabled on this worker. If a user registers any Nexus handlers, the
1523
+ #[error("Nexus is not enabled on this worker")]
1524
+ NexusNotEnabled,
1525
+ }
1526
+
1527
+ /// Errors we can encounter during workflow processing which we may treat as either WFT failures
1528
+ /// or whole-workflow failures depending on user preference.
1529
+ #[derive(Clone, Debug, Eq, PartialEq, Hash)]
1530
+ pub enum WorkflowErrorType {
1531
+ /// A nondeterminism error
1532
+ Nondeterminism,
1533
+ }
1534
+
1535
+ /// This trait allows users to customize the performance characteristics of workers dynamically.
1536
+ /// For more, see the docstrings of the traits in the return types of its functions.
1537
+ pub trait WorkerTuner {
1538
+ /// Return a [SlotSupplier] for workflow tasks. Note that workflow task slot suppliers must be
1539
+ /// willing to hand out a minimum of one non-sticky slot and one sticky slot if workflow caching
1540
+ /// is enabled, otherwise the worker may fail to process new tasks.
1541
+ fn workflow_task_slot_supplier(
1542
+ &self,
1543
+ ) -> Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync>;
1544
+
1545
+ /// Return a [SlotSupplier] for activity tasks
1546
+ fn activity_task_slot_supplier(
1547
+ &self,
1548
+ ) -> Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>;
1549
+
1550
+ /// Return a [SlotSupplier] for local activities
1551
+ fn local_activity_slot_supplier(
1552
+ &self,
1553
+ ) -> Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>;
1554
+
1555
+ /// Return a [SlotSupplier] for nexus tasks
1556
+ fn nexus_task_slot_supplier(
1557
+ &self,
1558
+ ) -> Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync>;
1559
+ }
1560
+
1561
+ /// Implementing this trait allows users to customize how many tasks of certain kinds the worker
1562
+ /// will perform concurrently.
1563
+ ///
1564
+ /// Note that, for implementations on workflow tasks ([WorkflowSlotKind]), workers that have the
1565
+ /// workflow cache enabled should be willing to hand out _at least_ two slots, to avoid the worker
1566
+ /// becoming stuck only polling on the worker's sticky queue.
1567
+ #[async_trait::async_trait]
1568
+ pub trait SlotSupplier {
1569
+ /// The kind of slot this supplier is supplying.
1570
+ type SlotKind: SlotKind;
1571
+ /// Block until a slot is available, then return a permit for the slot.
1572
+ async fn reserve_slot(&self, ctx: &dyn SlotReservationContext) -> SlotSupplierPermit;
1573
+
1574
+ /// Try to immediately reserve a slot, returning None if one is not available. Implementations
1575
+ /// must not block, or risk blocking the async event loop.
1576
+ fn try_reserve_slot(&self, ctx: &dyn SlotReservationContext) -> Option<SlotSupplierPermit>;
1577
+
1578
+ /// Marks a slot as actually now being used. This is separate from reserving one because the
1579
+ /// pollers need to reserve a slot before they have actually obtained work from server. Once
1580
+ /// that task is obtained (and validated) then the slot can actually be used to work on the
1581
+ /// task.
1582
+ ///
1583
+ /// Users' implementation of this can choose to emit metrics, or otherwise leverage the
1584
+ /// information provided by the `info` parameter to be better able to make future decisions
1585
+ /// about whether a slot should be handed out.
1586
+ fn mark_slot_used(&self, ctx: &dyn SlotMarkUsedContext<SlotKind = Self::SlotKind>);
1587
+
1588
+ /// Frees a slot.
1589
+ fn release_slot(&self, ctx: &dyn SlotReleaseContext<SlotKind = Self::SlotKind>);
1590
+
1591
+ /// If this implementation knows how many slots are available at any moment, it should return
1592
+ /// that here.
1593
+ fn available_slots(&self) -> Option<usize> {
1594
+ None
1595
+ }
1596
+
1597
+ /// Returns a human-friendly identifier describing this supplier implementation for
1598
+ /// diagnostics and telemetry.
1599
+ fn slot_supplier_kind(&self) -> String {
1600
+ "Custom".to_string()
1601
+ }
1602
+ }
1603
+
1604
+ /// Context for slot reservation.
1605
+ pub trait SlotReservationContext: Send + Sync {
1606
+ /// Returns the name of the task queue this worker is polling
1607
+ fn task_queue(&self) -> &str;
1608
+
1609
+ /// Returns the identity of the worker
1610
+ fn worker_identity(&self) -> &str;
1611
+
1612
+ /// Returns the deployment version of the worker, if one is set.
1613
+ fn worker_deployment_version(&self) -> &Option<WorkerDeploymentVersion>;
1614
+
1615
+ /// Returns the number of currently outstanding slot permits, whether used or un-used.
1616
+ fn num_issued_slots(&self) -> usize;
1617
+
1618
+ /// Returns true iff this is a sticky poll for a workflow task
1619
+ fn is_sticky(&self) -> bool;
1620
+
1621
+ /// Returns the metrics meter if metrics are enabled
1622
+ fn get_metrics_meter(&self) -> Option<TemporalMeter> {
1623
+ None
1624
+ }
1625
+ }
1626
+
1627
+ /// Context for slots being marked as used.
1628
+ pub trait SlotMarkUsedContext: Send + Sync {
1629
+ /// The kind of slot being marked used.
1630
+ type SlotKind: SlotKind;
1631
+ /// The slot permit that is being used
1632
+ fn permit(&self) -> &SlotSupplierPermit;
1633
+ /// Returns the info of slot that was marked as used
1634
+ fn info(&self) -> &<Self::SlotKind as SlotKind>::Info;
1635
+
1636
+ /// Returns the metrics meter if metrics are enabled
1637
+ fn get_metrics_meter(&self) -> Option<TemporalMeter> {
1638
+ None
1639
+ }
1640
+ }
1641
+
1642
+ /// Context for slots being released.
1643
+ pub trait SlotReleaseContext: Send + Sync {
1644
+ /// The kind of slot being marked released.
1645
+ type SlotKind: SlotKind;
1646
+ /// The slot permit that is being used
1647
+ fn permit(&self) -> &SlotSupplierPermit;
1648
+ /// Returns the info of slot that was released, if it was used
1649
+ fn info(&self) -> Option<&<Self::SlotKind as SlotKind>::Info>;
1650
+
1651
+ /// Returns the metrics meter if metrics are enabled
1652
+ fn get_metrics_meter(&self) -> Option<TemporalMeter> {
1653
+ None
1654
+ }
1655
+ }
1656
+
1657
+ /// A permit issued by a [SlotSupplier].
1658
+ #[derive(Default, Debug)]
1659
+ pub struct SlotSupplierPermit {
1660
+ user_data: Option<Box<dyn Any + Send + Sync>>,
1661
+ }
1662
+ impl SlotSupplierPermit {
1663
+ /// Attach some user data to the slot permit.
1664
+ pub fn with_user_data<T: Any + Send + Sync>(user_data: T) -> Self {
1665
+ Self {
1666
+ user_data: Some(Box::new(user_data)),
1667
+ }
1668
+ }
1669
+ /// Attempts to downcast the inner data, if any, into the provided type and returns it.
1670
+ /// Returns none if there is no data or the data is not of the appropriate type.
1671
+ pub fn user_data<T: Any + Send + Sync>(&self) -> Option<&T> {
1672
+ self.user_data.as_ref().and_then(|b| b.downcast_ref())
1673
+ }
1674
+ /// Attempts to downcast the inner data, if any, into the provided type and returns it mutably.
1675
+ /// Returns none if there is no data or the data is not of the appropriate type.
1676
+ pub fn user_data_mut<T: Any + Send + Sync>(&mut self) -> Option<&mut T> {
1677
+ self.user_data.as_mut().and_then(|b| b.downcast_mut())
1678
+ }
1679
+ }
1680
+
1681
+ /// What kind of task the slot is used for.
1682
+ #[derive(Debug, Copy, Clone, derive_more::Display, Eq, PartialEq)]
1683
+ pub enum SlotKindType {
1684
+ /// Workflow tasks.
1685
+ Workflow,
1686
+ /// Activity tasks.
1687
+ Activity,
1688
+ /// Local activity tasks.
1689
+ LocalActivity,
1690
+ /// Nexus tasks.
1691
+ Nexus,
1692
+ }
1693
+
1694
+ /// Marker struct for workflow slots.
1695
+ #[derive(Debug, Copy, Clone)]
1696
+ pub struct WorkflowSlotKind {}
1697
+ /// Marker struct for activity slots.
1698
+ #[derive(Debug, Copy, Clone)]
1699
+ pub struct ActivitySlotKind {}
1700
+ /// Marker struct for local activity slots.
1701
+ #[derive(Debug, Copy, Clone)]
1702
+ pub struct LocalActivitySlotKind {}
1703
+ /// Marker struct for nexus slots.
1704
+ #[derive(Debug, Copy, Clone)]
1705
+ pub struct NexusSlotKind {}
1706
+
1707
+ /// Contextual information about in-use slots.
1708
+ pub enum SlotInfo<'a> {
1709
+ /// For workflow slots.
1710
+ Workflow(&'a WorkflowSlotInfo),
1711
+ /// For activity slots.
1712
+ Activity(&'a ActivitySlotInfo),
1713
+ /// For local activity slots.
1714
+ LocalActivity(&'a LocalActivitySlotInfo),
1715
+ /// For nexus slots.
1716
+ Nexus(&'a NexusSlotInfo),
1717
+ }
1718
+
1719
+ /// Allows reifying slot info into the appropriate type.
1720
+ pub trait SlotInfoTrait: prost::Message {
1721
+ /// Downcast a protobuf message into the enum.
1722
+ fn downcast(&self) -> SlotInfo<'_>;
1723
+ }
1724
+ impl SlotInfoTrait for WorkflowSlotInfo {
1725
+ fn downcast(&self) -> SlotInfo<'_> {
1726
+ SlotInfo::Workflow(self)
1727
+ }
1728
+ }
1729
+ impl SlotInfoTrait for ActivitySlotInfo {
1730
+ fn downcast(&self) -> SlotInfo<'_> {
1731
+ SlotInfo::Activity(self)
1732
+ }
1733
+ }
1734
+ impl SlotInfoTrait for LocalActivitySlotInfo {
1735
+ fn downcast(&self) -> SlotInfo<'_> {
1736
+ SlotInfo::LocalActivity(self)
1737
+ }
1738
+ }
1739
+ impl SlotInfoTrait for NexusSlotInfo {
1740
+ fn downcast(&self) -> SlotInfo<'_> {
1741
+ SlotInfo::Nexus(self)
1742
+ }
1743
+ }
1744
+
1745
+ /// Associates slot info/kinds together.
1746
+ pub trait SlotKind {
1747
+ /// The associated info for this kind.
1748
+ type Info: SlotInfoTrait;
1749
+
1750
+ /// Return this kind.
1751
+ fn kind() -> SlotKindType;
1752
+ }
1753
+ impl SlotKind for WorkflowSlotKind {
1754
+ type Info = WorkflowSlotInfo;
1755
+
1756
+ fn kind() -> SlotKindType {
1757
+ SlotKindType::Workflow
1758
+ }
1759
+ }
1760
+ impl SlotKind for ActivitySlotKind {
1761
+ type Info = ActivitySlotInfo;
1762
+
1763
+ fn kind() -> SlotKindType {
1764
+ SlotKindType::Activity
1765
+ }
1766
+ }
1767
+ impl SlotKind for LocalActivitySlotKind {
1768
+ type Info = LocalActivitySlotInfo;
1769
+
1770
+ fn kind() -> SlotKindType {
1771
+ SlotKindType::LocalActivity
1772
+ }
1773
+ }
1774
+ impl SlotKind for NexusSlotKind {
1775
+ type Info = NexusSlotInfo;
1776
+
1777
+ fn kind() -> SlotKindType {
1778
+ SlotKindType::Nexus
1779
+ }
1780
+ }
1781
+
1782
+ /// Different strategies for task polling
1783
+ #[derive(Clone, Copy, Debug, PartialEq)]
1784
+ pub enum PollerBehavior {
1785
+ /// Will attempt to poll as long as a slot is available, up to the provided maximum. Cannot
1786
+ /// be less than two for workflow tasks, or one for other tasks.
1787
+ SimpleMaximum(usize),
1788
+ /// Will automatically scale the number of pollers based on feedback from the server. Still
1789
+ /// requires a slot to be available before beginning polling.
1790
+ Autoscaling {
1791
+ /// At least this many poll calls will always be attempted (assuming slots are available).
1792
+ /// Cannot be zero.
1793
+ minimum: usize,
1794
+ /// At most this many poll calls will ever be open at once. Must be >= `minimum`.
1795
+ maximum: usize,
1796
+ /// This many polls will be attempted initially before scaling kicks in. Must be between
1797
+ /// `minimum` and `maximum`.
1798
+ initial: usize,
1799
+ },
1800
+ }
1801
+
1802
+ impl PollerBehavior {
1803
+ /// Returns true if the behavior is using autoscaling.
1804
+ pub fn is_autoscaling(&self) -> bool {
1805
+ matches!(self, PollerBehavior::Autoscaling { .. })
1806
+ }
1807
+
1808
+ /// Validates the behavior.
1809
+ pub fn validate(&self) -> Result<(), String> {
1810
+ match self {
1811
+ PollerBehavior::SimpleMaximum(x) => {
1812
+ if *x < 1 {
1813
+ return Err("SimpleMaximum poller behavior must be at least 1".to_owned());
1814
+ }
1815
+ }
1816
+ PollerBehavior::Autoscaling {
1817
+ minimum,
1818
+ maximum,
1819
+ initial,
1820
+ } => {
1821
+ if *minimum < 1 {
1822
+ return Err("Autoscaling minimum poller behavior must be at least 1".to_owned());
1823
+ }
1824
+ if *maximum < *minimum {
1825
+ return Err(
1826
+ "Autoscaling maximum must be greater than or equal to minimum".to_owned(),
1827
+ );
1828
+ }
1829
+ if *initial < *minimum || *initial > *maximum {
1830
+ return Err(
1831
+ "Autoscaling initial must be between minimum and maximum".to_owned()
1832
+ );
1833
+ }
1834
+ }
1835
+ }
1836
+ Ok(())
1837
+ }
1838
+ }
1839
+
1840
+ /// Strategy a core worker uses for versioning.
1841
+ #[derive(Clone, Debug)]
1842
+ pub enum WorkerVersioningStrategy {
1843
+ /// Don't enable any versioning
1844
+ None {
1845
+ /// Build ID may still be passed as a way to identify the worker, or may be left empty.
1846
+ build_id: String,
1847
+ },
1848
+ /// Maybe use the modern deployment-based versioning, or just pass a deployment version.
1849
+ WorkerDeploymentBased(WorkerDeploymentOptions),
1850
+ /// Use the legacy build-id-based whole worker versioning.
1851
+ LegacyBuildIdBased {
1852
+ /// A Build ID to use, must be non-empty.
1853
+ build_id: String,
1854
+ },
1855
+ }
1856
+
1857
+ impl Default for WorkerVersioningStrategy {
1858
+ fn default() -> Self {
1859
+ WorkerVersioningStrategy::None {
1860
+ build_id: String::new(),
1861
+ }
1862
+ }
1863
+ }
1864
+
1865
+ impl WorkerVersioningStrategy {
1866
+ /// Return the build ID associated with this strategy.
1867
+ pub fn build_id(&self) -> &str {
1868
+ match self {
1869
+ WorkerVersioningStrategy::None { build_id } => build_id,
1870
+ WorkerVersioningStrategy::WorkerDeploymentBased(opts) => &opts.version.build_id,
1871
+ WorkerVersioningStrategy::LegacyBuildIdBased { build_id } => build_id,
1872
+ }
1873
+ }
1874
+
1875
+ /// Returns true if this uses "build id based" legacy versioning.
1876
+ pub fn uses_build_id_based(&self) -> bool {
1877
+ matches!(self, WorkerVersioningStrategy::LegacyBuildIdBased { .. })
1878
+ }
1879
+
1880
+ /// Returns the default versioning behavior associated with this strategy, if any.
1881
+ pub fn default_versioning_behavior(&self) -> Option<VersioningBehavior> {
1882
+ match self {
1883
+ WorkerVersioningStrategy::WorkerDeploymentBased(opts) => {
1884
+ opts.default_versioning_behavior
1885
+ }
1886
+ _ => None,
1887
+ }
1888
+ }
1889
+ }
1890
+
1123
1891
  struct ClientWorkerRegistrator {
1124
1892
  worker_instance_key: Uuid,
1125
1893
  slot_provider: SlotProvider,
@@ -1415,13 +2183,13 @@ mod tests {
1415
2183
  use crate::{
1416
2184
  advance_fut,
1417
2185
  test_help::test_worker_cfg,
1418
- worker::client::mocks::{mock_manual_worker_client, mock_worker_client},
2186
+ worker::{
2187
+ PollerBehavior,
2188
+ client::mocks::{mock_manual_worker_client, mock_worker_client},
2189
+ },
1419
2190
  };
1420
2191
  use futures_util::FutureExt;
1421
- use temporalio_common::{
1422
- protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse,
1423
- worker::PollerBehavior,
1424
- };
2192
+ use temporalio_common::protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
1425
2193
 
1426
2194
  #[tokio::test]
1427
2195
  async fn activity_timeouts_maintain_permit() {
@@ -1484,7 +2252,6 @@ mod tests {
1484
2252
 
1485
2253
  #[test]
1486
2254
  fn max_polls_zero_is_err() {
1487
- use temporalio_common::worker::{WorkerConfig, WorkerTaskTypes, WorkerVersioningStrategy};
1488
2255
  assert!(
1489
2256
  WorkerConfig::builder()
1490
2257
  .namespace("test")
@@ -1498,4 +2265,132 @@ mod tests {
1498
2265
  .is_err()
1499
2266
  );
1500
2267
  }
2268
+
2269
+ fn default_versioning_strategy() -> WorkerVersioningStrategy {
2270
+ WorkerVersioningStrategy::None {
2271
+ build_id: String::new(),
2272
+ }
2273
+ }
2274
+
2275
+ #[test]
2276
+ fn test_default_configuration_polls_all_types() {
2277
+ let config = WorkerConfig::builder()
2278
+ .namespace("default")
2279
+ .task_queue("test-queue")
2280
+ .versioning_strategy(default_versioning_strategy())
2281
+ .task_types(WorkerTaskTypes::all())
2282
+ .build()
2283
+ .unwrap();
2284
+
2285
+ let effective = &config.task_types;
2286
+ assert!(
2287
+ effective.enable_workflows,
2288
+ "Should poll workflows by default"
2289
+ );
2290
+ assert!(
2291
+ effective.enable_local_activities,
2292
+ "should poll local activities by default"
2293
+ );
2294
+ assert!(
2295
+ effective.enable_remote_activities,
2296
+ "Should poll remote activities by default"
2297
+ );
2298
+ assert!(effective.enable_nexus, "Should poll nexus by default");
2299
+ }
2300
+
2301
+ #[test]
2302
+ fn test_invalid_task_types_fails_validation() {
2303
+ // empty task types
2304
+ let result = WorkerConfig::builder()
2305
+ .namespace("default")
2306
+ .task_queue("test-queue")
2307
+ .versioning_strategy(default_versioning_strategy())
2308
+ .task_types(WorkerTaskTypes {
2309
+ enable_workflows: false,
2310
+ enable_local_activities: false,
2311
+ enable_remote_activities: false,
2312
+ enable_nexus: false,
2313
+ })
2314
+ .build();
2315
+
2316
+ assert!(result.is_err(), "Empty task_types should fail validation");
2317
+ let err = result.err().unwrap();
2318
+ assert!(
2319
+ err.contains("At least one task type"),
2320
+ "Error should mention task types: {err}",
2321
+ );
2322
+
2323
+ // local activities with no workflows
2324
+ let result = WorkerConfig::builder()
2325
+ .namespace("default")
2326
+ .task_queue("test-queue")
2327
+ .versioning_strategy(default_versioning_strategy())
2328
+ .task_types(WorkerTaskTypes {
2329
+ enable_workflows: false,
2330
+ enable_local_activities: true,
2331
+ enable_remote_activities: false,
2332
+ enable_nexus: false,
2333
+ })
2334
+ .build();
2335
+
2336
+ assert!(result.is_err(), "Empty task_types should fail validation");
2337
+ let err = result.err().unwrap();
2338
+ assert!(
2339
+ err.contains("cannot enable local activities without workflows"),
2340
+ "Error should mention task types: {err}",
2341
+ );
2342
+ }
2343
+
2344
+ #[test]
2345
+ fn test_all_combinations() {
2346
+ let combinations = [
2347
+ (WorkerTaskTypes::workflow_only(), "workflows only"),
2348
+ (WorkerTaskTypes::activity_only(), "activities only"),
2349
+ (WorkerTaskTypes::nexus_only(), "nexus only"),
2350
+ (
2351
+ WorkerTaskTypes {
2352
+ enable_workflows: true,
2353
+ enable_local_activities: true,
2354
+ enable_remote_activities: true,
2355
+ enable_nexus: false,
2356
+ },
2357
+ "workflows + activities",
2358
+ ),
2359
+ (
2360
+ WorkerTaskTypes {
2361
+ enable_workflows: true,
2362
+ enable_local_activities: true,
2363
+ enable_remote_activities: false,
2364
+ enable_nexus: true,
2365
+ },
2366
+ "workflows + nexus",
2367
+ ),
2368
+ (
2369
+ WorkerTaskTypes {
2370
+ enable_workflows: false,
2371
+ enable_local_activities: false,
2372
+ enable_remote_activities: true,
2373
+ enable_nexus: true,
2374
+ },
2375
+ "activities + nexus",
2376
+ ),
2377
+ (WorkerTaskTypes::all(), "all types"),
2378
+ ];
2379
+
2380
+ for (task_types, description) in combinations {
2381
+ let config = WorkerConfig::builder()
2382
+ .namespace("default")
2383
+ .task_queue("test-queue")
2384
+ .versioning_strategy(default_versioning_strategy())
2385
+ .task_types(task_types)
2386
+ .build()
2387
+ .unwrap();
2388
+
2389
+ let effective = config.task_types;
2390
+ assert_eq!(
2391
+ effective, task_types,
2392
+ "Effective types should match for {description}",
2393
+ );
2394
+ }
2395
+ }
1501
2396
  }