@temporalio/core-bridge 0.16.4 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/Cargo.lock +339 -226
  2. package/Cargo.toml +7 -3
  3. package/common.js +50 -0
  4. package/index.d.ts +7 -0
  5. package/index.js +12 -0
  6. package/package.json +7 -4
  7. package/releases/aarch64-apple-darwin/index.node +0 -0
  8. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  9. package/{index.node → releases/index.node} +0 -0
  10. package/releases/x86_64-apple-darwin/index.node +0 -0
  11. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  12. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  13. package/scripts/build.js +10 -50
  14. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  15. package/sdk-core/.buildkite/docker/docker-compose.yaml +2 -2
  16. package/sdk-core/.buildkite/pipeline.yml +2 -0
  17. package/sdk-core/Cargo.toml +1 -88
  18. package/sdk-core/README.md +30 -6
  19. package/sdk-core/bridge-ffi/Cargo.toml +24 -0
  20. package/sdk-core/bridge-ffi/LICENSE.txt +23 -0
  21. package/sdk-core/bridge-ffi/build.rs +25 -0
  22. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +216 -0
  23. package/sdk-core/bridge-ffi/src/lib.rs +829 -0
  24. package/sdk-core/bridge-ffi/src/wrappers.rs +193 -0
  25. package/sdk-core/client/Cargo.toml +32 -0
  26. package/sdk-core/{src/pollers/gateway.rs → client/src/lib.rs} +101 -195
  27. package/sdk-core/client/src/metrics.rs +89 -0
  28. package/sdk-core/client/src/mocks.rs +167 -0
  29. package/sdk-core/{src/pollers → client/src}/retry.rs +172 -14
  30. package/sdk-core/core/Cargo.toml +96 -0
  31. package/sdk-core/{src → core/src}/core_tests/activity_tasks.rs +193 -37
  32. package/sdk-core/{src → core/src}/core_tests/child_workflows.rs +14 -14
  33. package/sdk-core/{src → core/src}/core_tests/determinism.rs +8 -8
  34. package/sdk-core/core/src/core_tests/local_activities.rs +328 -0
  35. package/sdk-core/{src → core/src}/core_tests/mod.rs +6 -9
  36. package/sdk-core/{src → core/src}/core_tests/queries.rs +54 -54
  37. package/sdk-core/{src → core/src}/core_tests/replay_flag.rs +8 -12
  38. package/sdk-core/{src → core/src}/core_tests/workers.rs +120 -33
  39. package/sdk-core/{src → core/src}/core_tests/workflow_cancels.rs +16 -26
  40. package/sdk-core/{src → core/src}/core_tests/workflow_tasks.rs +280 -292
  41. package/sdk-core/core/src/lib.rs +374 -0
  42. package/sdk-core/{src → core/src}/log_export.rs +3 -27
  43. package/sdk-core/core/src/pending_activations.rs +162 -0
  44. package/sdk-core/{src → core/src}/pollers/mod.rs +4 -22
  45. package/sdk-core/{src → core/src}/pollers/poll_buffer.rs +1 -1
  46. package/sdk-core/core/src/protosext/mod.rs +396 -0
  47. package/sdk-core/core/src/replay/mod.rs +210 -0
  48. package/sdk-core/core/src/retry_logic.rs +144 -0
  49. package/sdk-core/{src → core/src}/telemetry/metrics.rs +3 -58
  50. package/sdk-core/{src → core/src}/telemetry/mod.rs +8 -8
  51. package/sdk-core/{src → core/src}/telemetry/prometheus_server.rs +0 -0
  52. package/sdk-core/{src → core/src}/test_help/mod.rs +35 -83
  53. package/sdk-core/{src → core/src}/worker/activities/activity_heartbeat_manager.rs +95 -42
  54. package/sdk-core/core/src/worker/activities/local_activities.rs +973 -0
  55. package/sdk-core/{src → core/src}/worker/activities.rs +52 -33
  56. package/sdk-core/{src → core/src}/worker/dispatcher.rs +8 -6
  57. package/sdk-core/{src → core/src}/worker/mod.rs +347 -221
  58. package/sdk-core/core/src/worker/wft_delivery.rs +81 -0
  59. package/sdk-core/{src → core/src}/workflow/bridge.rs +5 -2
  60. package/sdk-core/{src → core/src}/workflow/driven_workflow.rs +17 -7
  61. package/sdk-core/{src → core/src}/workflow/history_update.rs +33 -7
  62. package/sdk-core/{src → core/src/workflow}/machines/activity_state_machine.rs +26 -26
  63. package/sdk-core/{src → core/src/workflow}/machines/cancel_external_state_machine.rs +8 -11
  64. package/sdk-core/{src → core/src/workflow}/machines/cancel_workflow_state_machine.rs +19 -21
  65. package/sdk-core/{src → core/src/workflow}/machines/child_workflow_state_machine.rs +20 -31
  66. package/sdk-core/{src → core/src/workflow}/machines/complete_workflow_state_machine.rs +3 -5
  67. package/sdk-core/{src → core/src/workflow}/machines/continue_as_new_workflow_state_machine.rs +18 -18
  68. package/sdk-core/{src → core/src/workflow}/machines/fail_workflow_state_machine.rs +5 -6
  69. package/sdk-core/core/src/workflow/machines/local_activity_state_machine.rs +1451 -0
  70. package/sdk-core/{src → core/src/workflow}/machines/mod.rs +54 -107
  71. package/sdk-core/{src → core/src/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
  72. package/sdk-core/{src → core/src/workflow}/machines/patch_state_machine.rs +29 -30
  73. package/sdk-core/{src → core/src/workflow}/machines/side_effect_state_machine.rs +0 -0
  74. package/sdk-core/{src → core/src/workflow}/machines/signal_external_state_machine.rs +17 -19
  75. package/sdk-core/{src → core/src/workflow}/machines/timer_state_machine.rs +20 -21
  76. package/sdk-core/{src → core/src/workflow}/machines/transition_coverage.rs +5 -2
  77. package/sdk-core/{src → core/src/workflow}/machines/upsert_search_attributes_state_machine.rs +0 -0
  78. package/sdk-core/core/src/workflow/machines/workflow_machines/local_acts.rs +96 -0
  79. package/sdk-core/{src → core/src/workflow}/machines/workflow_machines.rs +357 -171
  80. package/sdk-core/{src → core/src/workflow}/machines/workflow_task_state_machine.rs +1 -1
  81. package/sdk-core/{src → core/src}/workflow/mod.rs +200 -39
  82. package/sdk-core/{src → core/src}/workflow/workflow_tasks/cache_manager.rs +0 -0
  83. package/sdk-core/{src → core/src}/workflow/workflow_tasks/concurrency_manager.rs +38 -5
  84. package/sdk-core/{src → core/src}/workflow/workflow_tasks/mod.rs +317 -103
  85. package/sdk-core/{test_utils → core-api}/Cargo.toml +10 -7
  86. package/sdk-core/{src → core-api/src}/errors.rs +42 -92
  87. package/sdk-core/core-api/src/lib.rs +158 -0
  88. package/sdk-core/{src/worker/config.rs → core-api/src/worker.rs} +18 -23
  89. package/sdk-core/etc/deps.svg +156 -0
  90. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +5 -5
  91. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +3 -5
  92. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +7 -1
  93. package/sdk-core/histories/fail_wf_task.bin +0 -0
  94. package/sdk-core/histories/timer_workflow_history.bin +0 -0
  95. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +44 -13
  96. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +19 -1
  97. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +1 -1
  98. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +9 -0
  99. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +1 -0
  100. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +1 -0
  101. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +13 -0
  102. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +14 -7
  103. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +176 -18
  104. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +6 -0
  105. package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +11 -0
  106. package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +3 -0
  107. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +156 -7
  108. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +135 -104
  109. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +78 -0
  110. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +78 -0
  111. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +205 -0
  112. package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +61 -0
  113. package/sdk-core/protos/local/{child_workflow.proto → temporal/sdk/core/child_workflow/child_workflow.proto} +1 -1
  114. package/sdk-core/protos/local/{common.proto → temporal/sdk/core/common/common.proto} +5 -3
  115. package/sdk-core/protos/local/{core_interface.proto → temporal/sdk/core/core_interface.proto} +10 -10
  116. package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +30 -0
  117. package/sdk-core/protos/local/{workflow_activation.proto → temporal/sdk/core/workflow_activation/workflow_activation.proto} +35 -11
  118. package/sdk-core/protos/local/{workflow_commands.proto → temporal/sdk/core/workflow_commands/workflow_commands.proto} +55 -4
  119. package/sdk-core/protos/local/{workflow_completion.proto → temporal/sdk/core/workflow_completion/workflow_completion.proto} +3 -3
  120. package/sdk-core/sdk/Cargo.toml +32 -0
  121. package/sdk-core/{src/prototype_rust_sdk → sdk/src}/conversions.rs +0 -0
  122. package/sdk-core/sdk/src/lib.rs +699 -0
  123. package/sdk-core/sdk/src/payload_converter.rs +11 -0
  124. package/sdk-core/sdk/src/workflow_context/options.rs +180 -0
  125. package/sdk-core/{src/prototype_rust_sdk → sdk/src}/workflow_context.rs +201 -124
  126. package/sdk-core/{src/prototype_rust_sdk → sdk/src}/workflow_future.rs +63 -30
  127. package/sdk-core/sdk-core-protos/Cargo.toml +10 -0
  128. package/sdk-core/sdk-core-protos/build.rs +28 -6
  129. package/sdk-core/sdk-core-protos/src/constants.rs +7 -0
  130. package/sdk-core/{src/test_help → sdk-core-protos/src}/history_builder.rs +134 -49
  131. package/sdk-core/sdk-core-protos/src/history_info.rs +216 -0
  132. package/sdk-core/sdk-core-protos/src/lib.rs +601 -168
  133. package/sdk-core/sdk-core-protos/src/task_token.rs +38 -0
  134. package/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
  135. package/sdk-core/test-utils/Cargo.toml +32 -0
  136. package/sdk-core/{src/test_help → test-utils/src}/canned_histories.rs +59 -78
  137. package/sdk-core/test-utils/src/histfetch.rs +28 -0
  138. package/sdk-core/{test_utils → test-utils}/src/lib.rs +131 -68
  139. package/sdk-core/tests/integ_tests/client_tests.rs +1 -1
  140. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -7
  141. package/sdk-core/tests/integ_tests/polling_tests.rs +12 -11
  142. package/sdk-core/tests/integ_tests/queries_tests.rs +82 -78
  143. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +91 -71
  144. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +3 -4
  145. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +2 -4
  146. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +4 -6
  147. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +4 -6
  148. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -4
  149. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +496 -0
  150. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +5 -8
  151. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +125 -0
  152. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +7 -13
  153. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +33 -5
  154. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +12 -16
  155. package/sdk-core/tests/integ_tests/workflow_tests.rs +85 -82
  156. package/sdk-core/tests/load_tests.rs +6 -6
  157. package/sdk-core/tests/main.rs +2 -2
  158. package/src/conversions.rs +24 -21
  159. package/src/errors.rs +8 -0
  160. package/src/lib.rs +323 -211
  161. package/sdk-core/protos/local/activity_result.proto +0 -46
  162. package/sdk-core/protos/local/activity_task.proto +0 -66
  163. package/sdk-core/src/core_tests/retry.rs +0 -147
  164. package/sdk-core/src/lib.rs +0 -403
  165. package/sdk-core/src/machines/local_activity_state_machine.rs +0 -117
  166. package/sdk-core/src/pending_activations.rs +0 -249
  167. package/sdk-core/src/protosext/mod.rs +0 -160
  168. package/sdk-core/src/prototype_rust_sdk.rs +0 -412
  169. package/sdk-core/src/task_token.rs +0 -20
  170. package/sdk-core/src/test_help/history_info.rs +0 -157
@@ -1,40 +1,48 @@
1
1
  mod activities;
2
- mod config;
3
2
  mod dispatcher;
3
+ mod wft_delivery;
4
4
 
5
- pub use crate::worker::config::{WorkerConfig, WorkerConfigBuilder};
5
+ pub use temporal_sdk_core_api::worker::{WorkerConfig, WorkerConfigBuilder};
6
+
7
+ pub(crate) use activities::{
8
+ ExecutingLAId, LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
9
+ NewLocalAct,
10
+ };
6
11
  pub(crate) use dispatcher::WorkerDispatcher;
7
12
 
8
13
  use crate::{
9
14
  errors::CompleteWfError,
10
- machines::{EmptyWorkflowCommandErr, WFMachinesError},
11
15
  pollers::{
12
- new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller,
13
- GatewayRef, Poller, WorkflowTaskPoller,
16
+ new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller, Poller,
17
+ WorkflowTaskPoller,
14
18
  },
15
- protosext::{legacy_query_failure, ValidPollWFTQResponse, WorkflowTaskCompletion},
16
- task_token::TaskToken,
19
+ protosext::{legacy_query_failure, ValidPollWFTQResponse},
17
20
  telemetry::metrics::{
18
21
  activity_poller, workflow_poller, workflow_sticky_poller, MetricsContext,
19
22
  },
23
+ worker::{
24
+ activities::{DispatchOrTimeoutLA, LACompleteAction, LocalActivityManager},
25
+ wft_delivery::WFTSource,
26
+ },
20
27
  workflow::{
21
28
  workflow_tasks::{
22
29
  ActivationAction, FailedActivationOutcome, NewWfTaskOutcome,
23
30
  ServerCommandsWithWorkflowInfo, WorkflowTaskManager,
24
31
  },
25
- WorkflowCachingPolicy,
32
+ EmptyWorkflowCommandErr, LocalResolution, WFMachinesError, WorkflowCachingPolicy,
26
33
  },
27
34
  ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError,
28
35
  };
29
- use activities::WorkerActivityTasks;
36
+ use activities::{LocalInFlightActInfo, WorkerActivityTasks};
30
37
  use futures::{Future, TryFutureExt};
31
38
  use std::{convert::TryInto, sync::Arc};
39
+ use temporal_client::{ServerGatewayApis, WorkflowTaskCompletion};
32
40
  use temporal_sdk_core_protos::{
33
41
  coresdk::{
34
- activity_result::activity_result,
42
+ activity_result::activity_execution_result,
35
43
  activity_task::ActivityTask,
36
- workflow_activation::WfActivation,
37
- workflow_completion::{self, wf_activation_completion, WfActivationCompletion},
44
+ workflow_activation::{remove_from_cache::EvictionReason, WorkflowActivation},
45
+ workflow_completion::{self, workflow_activation_completion, WorkflowActivationCompletion},
38
46
  },
39
47
  temporal::api::{
40
48
  enums::v1::{TaskQueueKind, WorkflowTaskFailedCause},
@@ -42,39 +50,42 @@ use temporal_sdk_core_protos::{
42
50
  taskqueue::v1::{StickyExecutionAttributes, TaskQueue},
43
51
  workflowservice::v1::{PollActivityTaskQueueResponse, PollWorkflowTaskQueueResponse},
44
52
  },
53
+ TaskToken,
45
54
  };
46
- use tokio::sync::{watch, Mutex, Semaphore};
55
+ use tokio::sync::{watch, Notify, Semaphore};
47
56
  use tonic::Code;
48
57
  use tracing_futures::Instrument;
49
58
 
50
59
  /// A worker polls on a certain task queue
51
60
  pub struct Worker {
52
61
  config: WorkerConfig,
53
- server_gateway: Arc<GatewayRef>,
62
+ server_gateway: Arc<dyn ServerGatewayApis + Send + Sync>,
54
63
 
55
64
  /// Will be populated when this worker should poll on a sticky WFT queue
56
65
  sticky_name: Option<String>,
57
66
 
58
- // TODO: Worth moving inside wf task mgr too?
59
67
  /// Buffers workflow task polling in the event we need to return a pending activation while
60
68
  /// a poll is ongoing. Sticky and nonsticky polling happens inside of it.
61
- wf_task_poll_buffer: BoxedWFPoller,
62
- /// Workflow task management
63
- wft_manager: WorkflowTaskManager,
69
+ wf_task_source: WFTSource,
70
+ /// Workflow task management TODO: No pub
71
+ pub(crate) wft_manager: WorkflowTaskManager,
64
72
  /// Manages activity tasks for this worker/task queue
65
73
  at_task_mgr: Option<WorkerActivityTasks>,
74
+ /// Manages local activities
75
+ local_act_mgr: LocalActivityManager,
66
76
  /// Ensures we stay at or below this worker's maximum concurrent workflow limit
67
77
  workflows_semaphore: Semaphore,
68
78
  /// Used to wake blocked workflow task polling when there is some change to workflow activations
69
79
  /// that should cause us to restart the loop
70
- pending_activations_notification_receiver: Mutex<watch::Receiver<bool>>,
71
- /// Watched during shutdown to wait for all WFTs to complete
72
- wfts_drained: watch::Receiver<bool>,
73
- /// notifies when all WFTs have been drained after shutdown
74
- wfts_drained_sender: watch::Sender<bool>,
80
+ pending_activations_notify: Arc<Notify>,
81
+ /// Watched during shutdown to wait for all WFTs to complete. Should be notified any time
82
+ /// a WFT is completed.
83
+ wfts_drained_notify: Arc<Notify>,
75
84
  /// Has shutdown been called?
76
85
  shutdown_requested: watch::Receiver<bool>,
77
86
  shutdown_sender: watch::Sender<bool>,
87
+ /// Will be called at the end of each activation completion
88
+ post_activate_hook: Option<Box<dyn Fn(&Self) + Send + Sync>>,
78
89
 
79
90
  metrics: MetricsContext,
80
91
  }
@@ -83,7 +94,7 @@ impl Worker {
83
94
  pub(crate) fn new(
84
95
  config: WorkerConfig,
85
96
  sticky_queue_name: Option<String>,
86
- sg: Arc<GatewayRef>,
97
+ sg: Arc<dyn ServerGatewayApis + Send + Sync>,
87
98
  metrics: MetricsContext,
88
99
  ) -> Self {
89
100
  metrics.worker_registered();
@@ -96,7 +107,7 @@ impl Worker {
96
107
  let max_sticky_polls = config.max_sticky_polls();
97
108
  let wft_metrics = metrics.with_new_attrs([workflow_poller()]);
98
109
  let mut wf_task_poll_buffer = new_workflow_task_buffer(
99
- sg.gw.clone(),
110
+ sg.clone(),
100
111
  config.task_queue.clone(),
101
112
  false,
102
113
  max_nonsticky_polls,
@@ -106,7 +117,7 @@ impl Worker {
106
117
  let sticky_queue_poller = sticky_queue_name.as_ref().map(|sqn| {
107
118
  let sticky_metrics = metrics.with_new_attrs([workflow_sticky_poller()]);
108
119
  let mut sp = new_workflow_task_buffer(
109
- sg.gw.clone(),
120
+ sg.clone(),
110
121
  sqn.clone(),
111
122
  true,
112
123
  max_sticky_polls,
@@ -119,7 +130,7 @@ impl Worker {
119
130
  None
120
131
  } else {
121
132
  let mut ap = new_activity_task_buffer(
122
- sg.gw.clone(),
133
+ sg.clone(),
123
134
  config.task_queue.clone(),
124
135
  config.max_concurrent_at_polls,
125
136
  config.max_concurrent_at_polls * 2,
@@ -148,7 +159,7 @@ impl Worker {
148
159
  pub(crate) fn new_with_pollers(
149
160
  config: WorkerConfig,
150
161
  sticky_queue_name: Option<String>,
151
- sg: Arc<GatewayRef>,
162
+ sg: Arc<dyn ServerGatewayApis + Send + Sync>,
152
163
  wft_poller: BoxedWFPoller,
153
164
  act_poller: Option<BoxedActPoller>,
154
165
  metrics: MetricsContext,
@@ -160,63 +171,76 @@ impl Worker {
160
171
  max_cached_workflows: config.max_cached_workflows,
161
172
  }
162
173
  };
163
- let (pan_tx, pan_rx) = watch::channel(true);
164
- let (wftd_tx, wftd_rx) = watch::channel(false);
174
+ let pa_notif = Arc::new(Notify::new());
175
+ let wfts_drained_notify = Arc::new(Notify::new());
165
176
  let (shut_tx, shut_rx) = watch::channel(false);
166
177
  Self {
167
178
  server_gateway: sg.clone(),
168
179
  sticky_name: sticky_queue_name,
169
- wf_task_poll_buffer: wft_poller,
170
- wft_manager: WorkflowTaskManager::new(pan_tx, cache_policy, metrics.clone()),
180
+ wf_task_source: WFTSource::new(wft_poller),
181
+ wft_manager: WorkflowTaskManager::new(pa_notif.clone(), cache_policy, metrics.clone()),
171
182
  at_task_mgr: act_poller.map(|ap| {
172
183
  WorkerActivityTasks::new(
173
184
  config.max_outstanding_activities,
174
185
  ap,
175
- sg.gw.clone(),
186
+ sg.clone(),
176
187
  metrics.clone(),
177
188
  config.max_heartbeat_throttle_interval,
178
189
  config.default_heartbeat_throttle_interval,
179
190
  )
180
191
  }),
192
+ local_act_mgr: LocalActivityManager::new(
193
+ config.max_outstanding_local_activities,
194
+ sg.get_options().namespace.clone(),
195
+ ),
181
196
  workflows_semaphore: Semaphore::new(config.max_outstanding_workflow_tasks),
182
197
  config,
183
198
  shutdown_requested: shut_rx,
184
199
  shutdown_sender: shut_tx,
185
- wfts_drained: wftd_rx,
186
- wfts_drained_sender: wftd_tx,
187
- pending_activations_notification_receiver: Mutex::new(pan_rx),
200
+ post_activate_hook: None,
201
+ pending_activations_notify: pa_notif,
202
+ wfts_drained_notify,
188
203
  metrics,
189
204
  }
190
205
  }
191
206
 
192
- /// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
193
- /// completed
194
- pub(crate) async fn shutdown(&self) {
207
+ /// Begins the shutdown process, tells pollers they should stop. Is idempotent.
208
+ pub(crate) fn initiate_shutdown(&self) {
195
209
  let _ = self.shutdown_sender.send(true);
210
+ // First, we want to stop polling of both activity and workflow tasks
196
211
  if let Some(atm) = self.at_task_mgr.as_ref() {
197
212
  atm.notify_shutdown();
198
213
  }
199
- self.wf_task_poll_buffer.notify_shutdown();
200
- // Notify in case shutdown was requested while there were no more outstanding WFTs.
201
- // This is required because the only other place where we notify wfts_drained is on
202
- // activation completion and activation polling checks for wfts_drained.
203
- self.maybe_notify_wtfs_drained();
204
- // wait until all outstanding workflow tasks have been completed before shutting down
205
- if !*self.wfts_drained.borrow() {
206
- self.wfts_drained
207
- .clone()
208
- .changed()
209
- .await
210
- .expect("wfts_drained should not be dropped");
214
+ self.wf_task_source.stop_pollers();
215
+ }
216
+
217
+ /// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
218
+ /// completed
219
+ pub(crate) async fn shutdown(&self) {
220
+ self.initiate_shutdown();
221
+ // Next we need to wait for all local activities to finish so no more workflow task
222
+ // heartbeats will be generated
223
+ self.local_act_mgr.shutdown_and_wait_all_finished().await;
224
+ // Then we need to wait for any tasks generated as a result of completing WFTs, which
225
+ // heartbeating generates
226
+ self.wf_task_source
227
+ .wait_for_tasks_from_complete_to_drain()
228
+ .await;
229
+ // wait until all outstanding workflow tasks have been completed
230
+ self.all_wfts_drained().await;
231
+ // Wait for activities to finish
232
+ if let Some(acts) = self.at_task_mgr.as_ref() {
233
+ acts.wait_all_finished().await;
211
234
  }
212
235
  }
213
236
 
214
237
  /// Finish shutting down by consuming the background pollers and freeing all resources
215
238
  pub(crate) async fn finalize_shutdown(self) {
216
- self.wf_task_poll_buffer.shutdown_box().await;
217
- if let Some(b) = self.at_task_mgr {
218
- b.shutdown().await;
219
- }
239
+ tokio::join!(self.wf_task_source.shutdown(), async {
240
+ if let Some(b) = self.at_task_mgr {
241
+ b.shutdown().await;
242
+ }
243
+ });
220
244
  }
221
245
 
222
246
  pub(crate) fn outstanding_workflow_tasks(&self) -> usize {
@@ -228,25 +252,36 @@ impl Worker {
228
252
  self.workflows_semaphore.available_permits()
229
253
  }
230
254
 
231
- /// Wait until not at the outstanding activity limit, and then poll this worker's task queue for
232
- /// new activities.
255
+ /// Get new activity tasks (may be local or nonlocal). Local activities are returned first
256
+ /// before polling the server if there are any.
233
257
  ///
234
258
  /// Returns `Ok(None)` in the event of a poll timeout or if the polling loop should otherwise
235
259
  /// be restarted
236
260
  pub(crate) async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollActivityError> {
237
- // No activity polling is allowed if this worker said it only handles local activities
238
- let act_mgr = self
239
- .at_task_mgr
240
- .as_ref()
241
- .ok_or_else(|| PollActivityError::NoWorkerForQueue(self.config.task_queue.clone()))?;
261
+ let act_mgr_poll = async {
262
+ if let Some(ref act_mgr) = self.at_task_mgr {
263
+ act_mgr.poll().await
264
+ } else {
265
+ let _ = self.shutdown_requested.clone().changed().await;
266
+ Err(PollActivityError::ShutDown)
267
+ }
268
+ };
242
269
 
243
270
  tokio::select! {
244
271
  biased;
245
272
 
246
- r = act_mgr.poll() => r,
247
- _ = self.shutdown_notifier() => {
248
- Err(PollActivityError::ShutDown)
249
- }
273
+ r = self.local_act_mgr.next_pending() => {
274
+ match r {
275
+ Some(DispatchOrTimeoutLA::Dispatch(r)) => Ok(Some(r)),
276
+ Some(DispatchOrTimeoutLA::Timeout { run_id, resolution, task }) => {
277
+ self.notify_local_result(
278
+ &run_id, LocalResolution::LocalActivity(resolution)).await;
279
+ Ok(task)
280
+ },
281
+ None => Ok(None)
282
+ }
283
+ },
284
+ r = act_mgr_poll => r,
250
285
  }
251
286
  }
252
287
 
@@ -263,10 +298,35 @@ impl Worker {
263
298
  pub(crate) async fn complete_activity(
264
299
  &self,
265
300
  task_token: TaskToken,
266
- status: activity_result::Status,
301
+ status: activity_execution_result::Status,
267
302
  ) -> Result<(), CompleteActivityError> {
303
+ if task_token.is_local_activity_task() {
304
+ let as_la_res: LocalActivityExecutionResult = status.try_into()?;
305
+ match self.local_act_mgr.complete(&task_token, &as_la_res) {
306
+ LACompleteAction::Report(info) => {
307
+ self.complete_local_act(as_la_res, info, None).await
308
+ }
309
+ LACompleteAction::LangDoesTimerBackoff(backoff, info) => {
310
+ // This la needs to write a failure marker, and then we will tell lang how
311
+ // long of a timer to schedule to back off for. We do this because there are
312
+ // no other situations where core generates "internal" commands so it is much
313
+ // simpler for lang to reply with the timer / next LA command than to do it
314
+ // internally. Plus, this backoff hack we'd like to eliminate eventually.
315
+ self.complete_local_act(as_la_res, info, Some(backoff))
316
+ .await
317
+ }
318
+ LACompleteAction::WillBeRetried => {
319
+ // Nothing to do here
320
+ }
321
+ LACompleteAction::Untracked => {
322
+ warn!("Tried to complete untracked local activity {}", task_token);
323
+ }
324
+ }
325
+ return Ok(());
326
+ }
327
+
268
328
  if let Some(atm) = &self.at_task_mgr {
269
- atm.complete(task_token, status, self.server_gateway.gw.as_ref())
329
+ atm.complete(task_token, status, self.server_gateway.as_ref())
270
330
  .await
271
331
  } else {
272
332
  error!(
@@ -276,8 +336,7 @@ impl Worker {
276
336
  Ok(())
277
337
  }
278
338
  }
279
-
280
- pub(crate) async fn next_workflow_activation(&self) -> Result<WfActivation, PollWfError> {
339
+ pub(crate) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
281
340
  // The poll needs to be in a loop because we can't guarantee tail call optimization in Rust
282
341
  // (simply) and we really, really need that for long-poll retries.
283
342
  loop {
@@ -297,8 +356,6 @@ impl Worker {
297
356
  _ => continue,
298
357
  }
299
358
  }
300
- let mut pending_activations_notification =
301
- self.pending_activations_notification_receiver.lock().await;
302
359
 
303
360
  let selected_f = tokio::select! {
304
361
  biased;
@@ -307,7 +364,7 @@ impl Worker {
307
364
  // the loop right away to provide any potential new pending activation.
308
365
  // Continue here means that we unnecessarily add another permit to the poll buffer,
309
366
  // this will go away when polling is done in the background.
310
- _ = pending_activations_notification.changed() => continue,
367
+ _ = self.pending_activations_notify.notified() => continue,
311
368
  r = self.workflow_poll_or_wfts_drained() => r,
312
369
  }?;
313
370
 
@@ -326,32 +383,47 @@ impl Worker {
326
383
 
327
384
  pub(crate) async fn complete_workflow_activation(
328
385
  &self,
329
- completion: WfActivationCompletion,
386
+ completion: WorkflowActivationCompletion,
330
387
  ) -> Result<(), CompleteWfError> {
331
388
  let wfstatus = completion.status;
332
- let did_complete_wft = match wfstatus {
333
- Some(wf_activation_completion::Status::Successful(success)) => {
389
+ let report_outcome = match wfstatus {
390
+ Some(workflow_activation_completion::Status::Successful(success)) => {
334
391
  self.wf_activation_success(&completion.run_id, success)
335
392
  .await
336
393
  }
337
- Some(wf_activation_completion::Status::Failed(failure)) => {
338
- self.wf_activation_failed(&completion.run_id, failure).await
394
+
395
+ Some(workflow_activation_completion::Status::Failed(failure)) => {
396
+ self.wf_activation_failed(
397
+ &completion.run_id,
398
+ WorkflowTaskFailedCause::Unspecified,
399
+ EvictionReason::LangFail,
400
+ failure,
401
+ )
402
+ .await
403
+ }
404
+ None => {
405
+ return Err(CompleteWfError::MalformedWorkflowCompletion {
406
+ reason: "Workflow completion had empty status field".to_owned(),
407
+ completion: None,
408
+ })
339
409
  }
340
- None => Err(CompleteWfError::MalformedWorkflowCompletion {
341
- reason: "Workflow completion had empty status field".to_owned(),
342
- completion: None,
343
- }),
344
410
  }?;
345
- self.after_workflow_activation(&completion.run_id, did_complete_wft);
346
- Ok(())
347
- }
348
411
 
349
- fn maybe_notify_wtfs_drained(&self) {
350
- if *self.shutdown_requested.borrow() && self.outstanding_workflow_tasks() == 0 {
351
- self.wfts_drained_sender
352
- .send(true)
353
- .expect("wfts_drained sender shouldn't be dropped");
412
+ self.wft_manager
413
+ .after_wft_report(&completion.run_id, report_outcome.reported_to_server);
414
+ if report_outcome.reported_to_server || report_outcome.failed {
415
+ // If we failed the WFT but didn't report anything, we still want to release the WFT
416
+ // permit since the server will eventually time out the task and we've already evicted
417
+ // the run.
418
+ self.return_workflow_task_permit();
419
+ }
420
+ self.wfts_drained_notify.notify_waiters();
421
+
422
+ if let Some(h) = &self.post_activate_hook {
423
+ h(self);
354
424
  }
425
+
426
+ Ok(())
355
427
  }
356
428
 
357
429
  /// Tell the worker a workflow task has completed, for tracking max outstanding WFTs
@@ -359,37 +431,43 @@ impl Worker {
359
431
  self.workflows_semaphore.add_permits(1);
360
432
  }
361
433
 
362
- pub(crate) fn request_wf_eviction(&self, run_id: &str, reason: impl Into<String>) {
363
- self.wft_manager.request_eviction(run_id, reason);
434
+ pub(crate) fn request_wf_eviction(
435
+ &self,
436
+ run_id: &str,
437
+ message: impl Into<String>,
438
+ reason: EvictionReason,
439
+ ) {
440
+ self.wft_manager.request_eviction(run_id, message, reason);
441
+ }
442
+
443
+ /// Sets a function to be called at the end of each activation completion
444
+ pub(crate) fn set_post_activate_hook(
445
+ &mut self,
446
+ callback: impl Fn(&Self) + Send + Sync + 'static,
447
+ ) {
448
+ self.post_activate_hook = Some(Box::new(callback))
364
449
  }
365
450
 
366
451
  /// Resolves with WFT poll response or `PollWfError::ShutDown` if WFTs have been drained
367
452
  async fn workflow_poll_or_wfts_drained(
368
453
  &self,
369
454
  ) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
455
+ let mut shutdown_requested = self.shutdown_requested.clone();
370
456
  loop {
371
- if *self.wfts_drained.borrow() {
372
- debug!("Returning shutdown error");
373
- return Err(PollWfError::ShutDown);
374
- } else if *self.shutdown_requested.borrow() {
375
- self.wfts_drained
376
- .clone()
377
- .changed()
378
- .await
379
- .expect("wfts_drained should not be dropped");
380
- } else {
381
- let mut shutdown_requested = self.shutdown_requested.clone();
382
- tokio::select! {
383
- biased;
384
-
385
- r = self.workflow_poll()
386
- .map_err(Into::into) => match r {
387
- Err(PollWfError::ShutDown) => {},
388
- _ => return r,
389
- },
390
- _ = shutdown_requested.changed() => {},
391
- }
392
- };
457
+ tokio::select! {
458
+ biased;
459
+
460
+ r = self.workflow_poll().map_err(Into::into) => {
461
+ if matches!(r, Err(PollWfError::ShutDown)) {
462
+ // Don't actually return shutdown until workflow tasks are drained.
463
+ // Outstanding tasks being completed will generate new pending activations
464
+ // which will cause us to abort this function.
465
+ self.all_wfts_drained().await;
466
+ }
467
+ return r
468
+ },
469
+ _ = shutdown_requested.changed() => {},
470
+ }
393
471
  }
394
472
  }
395
473
 
@@ -399,6 +477,17 @@ impl Worker {
399
477
  /// Returns `Ok(None)` in the event of a poll timeout, or if there was some gRPC error that
400
478
  /// callers can't do anything about.
401
479
  async fn workflow_poll(&self) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
480
+ // We can't say we're shut down if there are outstanding LAs, as they could end up WFT
481
+ // heartbeating which is a "new" workflow task that we need to accept and process as long as
482
+ // the LA is outstanding. Similarly, if we already have such tasks (from a WFT completion),
483
+ // then we must fetch them from the source before we can say workflow polling is shutdown.
484
+ if *self.shutdown_requested.borrow()
485
+ && !self.wf_task_source.has_tasks_from_complete()
486
+ && self.local_act_mgr.num_outstanding() == 0
487
+ {
488
+ return Err(PollWfError::ShutDown);
489
+ }
490
+
402
491
  let sem = self
403
492
  .workflows_semaphore
404
493
  .acquire()
@@ -406,8 +495,8 @@ impl Worker {
406
495
  .expect("outstanding workflow tasks semaphore not dropped");
407
496
 
408
497
  let res = self
409
- .wf_task_poll_buffer
410
- .poll()
498
+ .wf_task_source
499
+ .next_wft()
411
500
  .await
412
501
  .ok_or(PollWfError::ShutDown)??;
413
502
 
@@ -444,12 +533,12 @@ impl Worker {
444
533
  async fn apply_server_work(
445
534
  &self,
446
535
  work: ValidPollWFTQResponse,
447
- ) -> Result<Option<WfActivation>, PollWfError> {
536
+ ) -> Result<Option<WorkflowActivation>, PollWfError> {
448
537
  let we = work.workflow_execution.clone();
449
538
  let tt = work.task_token.clone();
450
539
  let res = self
451
540
  .wft_manager
452
- .apply_new_poll_resp(work, &self.server_gateway)
541
+ .apply_new_poll_resp(work, self.server_gateway.clone())
453
542
  .await;
454
543
  Ok(match res {
455
544
  NewWfTaskOutcome::IssueActivation(a) => {
@@ -462,10 +551,10 @@ impl Worker {
462
551
  self.return_workflow_task_permit();
463
552
  None
464
553
  }
465
- NewWfTaskOutcome::Autocomplete => {
554
+ NewWfTaskOutcome::Autocomplete | NewWfTaskOutcome::LocalActsOutstanding => {
466
555
  debug!(workflow_execution=?we,
467
- "No work for lang to perform after polling server. Sending autocomplete.");
468
- self.complete_workflow_activation(WfActivationCompletion {
556
+ "No new work for lang to perform after polling server");
557
+ self.complete_workflow_activation(WorkflowActivationCompletion {
469
558
  task_queue: self.config.task_queue.clone(),
470
559
  run_id: we.run_id,
471
560
  status: Some(workflow_completion::Success::from_variants(vec![]).into()),
@@ -496,6 +585,7 @@ impl Worker {
496
585
  self.request_wf_eviction(
497
586
  &we.run_id,
498
587
  format!("Error while applying poll response to workflow: {:?}", e),
588
+ e.evict_reason(),
499
589
  );
500
590
  None
501
591
  }
@@ -509,7 +599,7 @@ impl Worker {
509
599
  &self,
510
600
  run_id: &str,
511
601
  success: workflow_completion::Success,
512
- ) -> Result<bool, CompleteWfError> {
602
+ ) -> Result<WFTReportOutcome, CompleteWfError> {
513
603
  // Convert to wf commands
514
604
  let cmds = success
515
605
  .commands
@@ -523,13 +613,18 @@ impl Worker {
523
613
  completion: None,
524
614
  })?;
525
615
 
526
- match self.wft_manager.successful_activation(run_id, cmds).await {
616
+ match self
617
+ .wft_manager
618
+ .successful_activation(run_id, cmds, |acts| self.local_act_mgr.enqueue(acts))
619
+ .await
620
+ {
527
621
  Ok(Some(ServerCommandsWithWorkflowInfo {
528
622
  task_token,
529
623
  action:
530
624
  ActivationAction::WftComplete {
531
625
  commands,
532
626
  query_responses,
627
+ force_new_wft,
533
628
  },
534
629
  })) => {
535
630
  debug!("Sending commands to server: {:?}", &commands);
@@ -541,19 +636,33 @@ impl Worker {
541
636
  commands,
542
637
  query_responses,
543
638
  sticky_attributes: None,
544
- return_new_workflow_task: false,
545
- force_create_new_workflow_task: false,
639
+ return_new_workflow_task: force_new_wft,
640
+ force_create_new_workflow_task: force_new_wft,
546
641
  };
547
642
  let sticky_attrs = self.get_sticky_attrs();
643
+ // Do not return new WFT if we would not cache, because returned new WFTs are always
644
+ // partial.
645
+ if sticky_attrs.is_none() {
646
+ completion.return_new_workflow_task = false;
647
+ }
548
648
  completion.sticky_attributes = sticky_attrs;
649
+
549
650
  self.handle_wft_reporting_errs(run_id, || async {
550
- self.server_gateway
651
+ let maybe_wft = self
652
+ .server_gateway
551
653
  .complete_workflow_task(completion)
552
654
  .instrument(span!(tracing::Level::DEBUG, "Complete WFT call"))
553
- .await
655
+ .await?;
656
+ if let Some(wft) = maybe_wft.workflow_task {
657
+ self.wf_task_source.add_wft_from_completion(wft);
658
+ }
659
+ Ok(())
554
660
  })
555
661
  .await?;
556
- Ok(true)
662
+ Ok(WFTReportOutcome {
663
+ reported_to_server: true,
664
+ failed: false,
665
+ })
557
666
  }
558
667
  Ok(Some(ServerCommandsWithWorkflowInfo {
559
668
  task_token,
@@ -563,9 +672,15 @@ impl Worker {
563
672
  self.server_gateway
564
673
  .respond_legacy_query(task_token, result)
565
674
  .await?;
566
- Ok(true)
675
+ Ok(WFTReportOutcome {
676
+ reported_to_server: true,
677
+ failed: false,
678
+ })
567
679
  }
568
- Ok(None) => Ok(false),
680
+ Ok(None) => Ok(WFTReportOutcome {
681
+ reported_to_server: false,
682
+ failed: false,
683
+ }),
569
684
  Err(update_err) => {
570
685
  // Automatically fail the workflow task in the event we couldn't update machines
571
686
  let fail_cause = if matches!(&update_err.source, WFMachinesError::Nondeterminism(_))
@@ -574,30 +689,14 @@ impl Worker {
574
689
  } else {
575
690
  WorkflowTaskFailedCause::Unspecified
576
691
  };
577
-
578
- warn!(run_id, error=?update_err, "Failing workflow task");
579
-
580
- if let Some(ref tt) = update_err.task_token {
581
- let wft_fail_str = format!("{:?}", update_err);
582
- self.handle_wft_reporting_errs(run_id, || async {
583
- self.server_gateway
584
- .fail_workflow_task(
585
- tt.clone(),
586
- fail_cause,
587
- Some(Failure::application_failure(wft_fail_str.clone(), false)),
588
- )
589
- .await
590
- })
591
- .await?;
592
- // We must evict the workflow since we've failed a WFT
593
- self.request_wf_eviction(
594
- run_id,
595
- format!("Workflow task failure: {}", wft_fail_str),
596
- );
597
- Ok(true)
598
- } else {
599
- Ok(false)
600
- }
692
+ let wft_fail_str = format!("{:?}", update_err);
693
+ self.wf_activation_failed(
694
+ run_id,
695
+ fail_cause,
696
+ update_err.evict_reason(),
697
+ Failure::application_failure(wft_fail_str.clone(), false).into(),
698
+ )
699
+ .await
601
700
  }
602
701
  }
603
702
  }
@@ -608,39 +707,45 @@ impl Worker {
608
707
  async fn wf_activation_failed(
609
708
  &self,
610
709
  run_id: &str,
710
+ cause: WorkflowTaskFailedCause,
711
+ reason: EvictionReason,
611
712
  failure: workflow_completion::Failure,
612
- ) -> Result<bool, CompleteWfError> {
613
- Ok(match self.wft_manager.failed_activation(run_id) {
614
- FailedActivationOutcome::Report(tt) => {
615
- self.handle_wft_reporting_errs(run_id, || async {
616
- self.server_gateway
617
- .fail_workflow_task(
618
- tt,
619
- WorkflowTaskFailedCause::Unspecified,
620
- failure.failure.map(Into::into),
621
- )
622
- .await
623
- })
624
- .await?;
625
- true
626
- }
627
- FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
628
- self.server_gateway
629
- .respond_legacy_query(task_token, legacy_query_failure(failure))
713
+ ) -> Result<WFTReportOutcome, CompleteWfError> {
714
+ Ok(
715
+ match self.wft_manager.failed_activation(
716
+ run_id,
717
+ reason,
718
+ format!("Workflow activation completion failed: {:?}", failure),
719
+ ) {
720
+ FailedActivationOutcome::Report(tt) => {
721
+ warn!(run_id, failure=?failure, "Failing workflow activation");
722
+ self.handle_wft_reporting_errs(run_id, || async {
723
+ self.server_gateway
724
+ .fail_workflow_task(tt, cause, failure.failure.map(Into::into))
725
+ .await
726
+ })
630
727
  .await?;
631
- true
632
- }
633
- FailedActivationOutcome::NoReport => false,
634
- })
635
- }
636
-
637
- fn after_workflow_activation(&self, run_id: &str, did_complete_wft: bool) {
638
- self.wft_manager.after_wft_report(run_id);
639
- if did_complete_wft {
640
- self.return_workflow_task_permit();
641
- }
642
- self.wft_manager.on_activation_done(run_id);
643
- self.maybe_notify_wtfs_drained();
728
+ WFTReportOutcome {
729
+ reported_to_server: true,
730
+ failed: true,
731
+ }
732
+ }
733
+ FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
734
+ warn!(run_id, failure=?failure, "Failing legacy query request");
735
+ self.server_gateway
736
+ .respond_legacy_query(task_token, legacy_query_failure(failure))
737
+ .await?;
738
+ WFTReportOutcome {
739
+ reported_to_server: true,
740
+ failed: true,
741
+ }
742
+ }
743
+ FailedActivationOutcome::NoReport => WFTReportOutcome {
744
+ reported_to_server: false,
745
+ failed: true,
746
+ },
747
+ },
748
+ )
644
749
  }
645
750
 
646
751
  /// Handle server errors from either completing or failing a workflow task. Returns any errors
@@ -653,20 +758,20 @@ impl Worker {
653
758
  where
654
759
  Fut: Future<Output = Result<T, tonic::Status>>,
655
760
  {
656
- let mut should_evict = false;
761
+ let mut should_evict = None;
657
762
  let res = match completer().await {
658
763
  Err(err) => {
659
764
  match err.code() {
660
765
  // Silence unhandled command errors since the lang SDK cannot do anything about
661
766
  // them besides poll again, which it will do anyway.
662
767
  tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
663
- warn!(error = %err, "Unhandled command response when completing");
664
- should_evict = true;
768
+ warn!(error = %err, run_id, "Unhandled command response when completing");
769
+ should_evict = Some(EvictionReason::UnhandledCommand);
665
770
  Ok(())
666
771
  }
667
772
  tonic::Code::NotFound => {
668
- warn!(error = %err, "Task not found when completing");
669
- should_evict = true;
773
+ warn!(error = %err, run_id, "Task not found when completing");
774
+ should_evict = Some(EvictionReason::TaskNotFound);
670
775
  Ok(())
671
776
  }
672
777
  _ => Err(err),
@@ -674,12 +779,46 @@ impl Worker {
674
779
  }
675
780
  _ => Ok(()),
676
781
  };
677
- if should_evict {
678
- self.request_wf_eviction(run_id, "Error reporting WFT to server");
782
+ if let Some(reason) = should_evict {
783
+ self.request_wf_eviction(run_id, "Error reporting WFT to server", reason);
679
784
  }
680
785
  res.map_err(Into::into)
681
786
  }
682
787
 
788
+ async fn complete_local_act(
789
+ &self,
790
+ la_res: LocalActivityExecutionResult,
791
+ info: LocalInFlightActInfo,
792
+ backoff: Option<prost_types::Duration>,
793
+ ) {
794
+ self.notify_local_result(
795
+ &info.la_info.workflow_exec_info.run_id,
796
+ LocalResolution::LocalActivity(LocalActivityResolution {
797
+ seq: info.la_info.schedule_cmd.seq,
798
+ result: la_res,
799
+ runtime: info.dispatch_time.elapsed(),
800
+ attempt: info.attempt,
801
+ backoff,
802
+ original_schedule_time: Some(info.la_info.schedule_time),
803
+ }),
804
+ )
805
+ .await
806
+ }
807
+
808
+ async fn notify_local_result(&self, run_id: &str, res: LocalResolution) {
809
+ if let Err(e) = self.wft_manager.notify_of_local_result(run_id, res).await {
810
+ error!(
811
+ "Problem with local resolution on run {}: {:?} -- will evict the workflow",
812
+ run_id, e
813
+ );
814
+ self.request_wf_eviction(
815
+ run_id,
816
+ "Issue while processing local resolution",
817
+ e.evict_reason(),
818
+ );
819
+ }
820
+ }
821
+
683
822
  /// Return the sticky execution attributes that should be used to complete workflow tasks
684
823
  /// for this worker (if any).
685
824
  fn get_sticky_attrs(&self) -> Option<StickyExecutionAttributes> {
@@ -696,102 +835,89 @@ impl Worker {
696
835
  })
697
836
  }
698
837
 
699
- /// A future that resolves to true the shutdown flag has been set to true, false is simply
700
- /// a signal that a poll loop should be restarted. Only meant to be called from polling funcs.
701
- async fn shutdown_notifier(&self) {
702
- if *self.shutdown_requested.borrow() {
703
- return;
838
+ /// Resolves when there are no more outstanding WFTs
839
+ async fn all_wfts_drained(&self) {
840
+ while self.outstanding_workflow_tasks() != 0 {
841
+ self.wfts_drained_notify.notified().await;
704
842
  }
705
- let _ = self.shutdown_requested.clone().changed().await;
706
843
  }
707
844
  }
708
845
 
709
- impl WorkerConfig {
710
- fn max_nonsticky_polls(&self) -> usize {
711
- ((self.max_concurrent_wft_polls as f32 * self.nonsticky_to_sticky_poll_ratio) as usize)
712
- .max(1)
713
- }
714
- fn max_sticky_polls(&self) -> usize {
715
- self.max_concurrent_wft_polls
716
- .saturating_sub(self.max_nonsticky_polls())
717
- .max(1)
718
- }
846
+ struct WFTReportOutcome {
847
+ reported_to_server: bool,
848
+ failed: bool,
719
849
  }
720
850
 
721
851
  #[cfg(test)]
722
852
  mod tests {
723
853
  use super::*;
724
- use crate::{pollers::MockServerGatewayApis, test_help::fake_sg_opts};
854
+ use temporal_client::mocks::mock_gateway;
725
855
  use temporal_sdk_core_protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
726
856
 
727
857
  #[tokio::test]
728
858
  async fn activity_timeouts_dont_eat_permits() {
729
- let mut mock_gateway = MockServerGatewayApis::new();
859
+ let mut mock_gateway = mock_gateway();
730
860
  mock_gateway
731
861
  .expect_poll_activity_task()
732
862
  .returning(|_| Ok(PollActivityTaskQueueResponse::default()));
733
- let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
734
863
 
735
864
  let cfg = WorkerConfigBuilder::default()
736
865
  .task_queue("whatever")
737
866
  .max_outstanding_activities(5_usize)
738
867
  .build()
739
868
  .unwrap();
740
- let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
869
+ let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
741
870
  assert_eq!(worker.activity_poll().await.unwrap(), None);
742
871
  assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
743
872
  }
744
873
 
745
874
  #[tokio::test]
746
875
  async fn workflow_timeouts_dont_eat_permits() {
747
- let mut mock_gateway = MockServerGatewayApis::new();
876
+ let mut mock_gateway = mock_gateway();
748
877
  mock_gateway
749
878
  .expect_poll_workflow_task()
750
879
  .returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
751
- let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
752
880
 
753
881
  let cfg = WorkerConfigBuilder::default()
754
882
  .task_queue("whatever")
755
883
  .max_outstanding_workflow_tasks(5_usize)
756
884
  .build()
757
885
  .unwrap();
758
- let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
886
+ let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
759
887
  assert_eq!(worker.workflow_poll().await.unwrap(), None);
760
888
  assert_eq!(worker.workflows_semaphore.available_permits(), 5);
761
889
  }
762
890
 
763
891
  #[tokio::test]
764
892
  async fn activity_errs_dont_eat_permits() {
765
- let mut mock_gateway = MockServerGatewayApis::new();
893
+ let mut mock_gateway = mock_gateway();
766
894
  mock_gateway
767
895
  .expect_poll_activity_task()
768
896
  .returning(|_| Err(tonic::Status::internal("ahhh")));
769
- let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
770
897
 
771
898
  let cfg = WorkerConfigBuilder::default()
772
899
  .task_queue("whatever")
773
900
  .max_outstanding_activities(5_usize)
774
901
  .build()
775
902
  .unwrap();
776
- let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
903
+ let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
777
904
  assert!(worker.activity_poll().await.is_err());
778
905
  assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
779
906
  }
780
907
 
781
908
  #[tokio::test]
782
909
  async fn workflow_errs_dont_eat_permits() {
783
- let mut mock_gateway = MockServerGatewayApis::new();
910
+ let mut mock_gateway = mock_gateway();
784
911
  mock_gateway
785
912
  .expect_poll_workflow_task()
786
913
  .returning(|_, _| Err(tonic::Status::internal("ahhh")));
787
- let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
788
914
 
789
915
  let cfg = WorkerConfigBuilder::default()
790
916
  .task_queue("whatever")
791
917
  .max_outstanding_workflow_tasks(5_usize)
792
918
  .build()
793
919
  .unwrap();
794
- let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
920
+ let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
795
921
  assert!(worker.workflow_poll().await.is_err());
796
922
  assert_eq!(worker.workflows_semaphore.available_permits(), 5);
797
923
  }