@temporalio/core-bridge 0.16.4 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/Cargo.lock +339 -226
  2. package/Cargo.toml +7 -3
  3. package/common.js +50 -0
  4. package/index.d.ts +7 -0
  5. package/index.js +12 -0
  6. package/package.json +7 -4
  7. package/releases/aarch64-apple-darwin/index.node +0 -0
  8. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  9. package/{index.node → releases/index.node} +0 -0
  10. package/releases/x86_64-apple-darwin/index.node +0 -0
  11. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  12. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  13. package/scripts/build.js +10 -50
  14. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  15. package/sdk-core/.buildkite/docker/docker-compose.yaml +2 -2
  16. package/sdk-core/.buildkite/pipeline.yml +2 -0
  17. package/sdk-core/Cargo.toml +1 -88
  18. package/sdk-core/README.md +30 -6
  19. package/sdk-core/bridge-ffi/Cargo.toml +24 -0
  20. package/sdk-core/bridge-ffi/LICENSE.txt +23 -0
  21. package/sdk-core/bridge-ffi/build.rs +25 -0
  22. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +216 -0
  23. package/sdk-core/bridge-ffi/src/lib.rs +829 -0
  24. package/sdk-core/bridge-ffi/src/wrappers.rs +193 -0
  25. package/sdk-core/client/Cargo.toml +32 -0
  26. package/sdk-core/{src/pollers/gateway.rs → client/src/lib.rs} +101 -195
  27. package/sdk-core/client/src/metrics.rs +89 -0
  28. package/sdk-core/client/src/mocks.rs +167 -0
  29. package/sdk-core/{src/pollers → client/src}/retry.rs +172 -14
  30. package/sdk-core/core/Cargo.toml +96 -0
  31. package/sdk-core/{src → core/src}/core_tests/activity_tasks.rs +193 -37
  32. package/sdk-core/{src → core/src}/core_tests/child_workflows.rs +14 -14
  33. package/sdk-core/{src → core/src}/core_tests/determinism.rs +8 -8
  34. package/sdk-core/core/src/core_tests/local_activities.rs +328 -0
  35. package/sdk-core/{src → core/src}/core_tests/mod.rs +6 -9
  36. package/sdk-core/{src → core/src}/core_tests/queries.rs +54 -54
  37. package/sdk-core/{src → core/src}/core_tests/replay_flag.rs +8 -12
  38. package/sdk-core/{src → core/src}/core_tests/workers.rs +120 -33
  39. package/sdk-core/{src → core/src}/core_tests/workflow_cancels.rs +16 -26
  40. package/sdk-core/{src → core/src}/core_tests/workflow_tasks.rs +280 -292
  41. package/sdk-core/core/src/lib.rs +374 -0
  42. package/sdk-core/{src → core/src}/log_export.rs +3 -27
  43. package/sdk-core/core/src/pending_activations.rs +162 -0
  44. package/sdk-core/{src → core/src}/pollers/mod.rs +4 -22
  45. package/sdk-core/{src → core/src}/pollers/poll_buffer.rs +1 -1
  46. package/sdk-core/core/src/protosext/mod.rs +396 -0
  47. package/sdk-core/core/src/replay/mod.rs +210 -0
  48. package/sdk-core/core/src/retry_logic.rs +144 -0
  49. package/sdk-core/{src → core/src}/telemetry/metrics.rs +3 -58
  50. package/sdk-core/{src → core/src}/telemetry/mod.rs +8 -8
  51. package/sdk-core/{src → core/src}/telemetry/prometheus_server.rs +0 -0
  52. package/sdk-core/{src → core/src}/test_help/mod.rs +35 -83
  53. package/sdk-core/{src → core/src}/worker/activities/activity_heartbeat_manager.rs +95 -42
  54. package/sdk-core/core/src/worker/activities/local_activities.rs +973 -0
  55. package/sdk-core/{src → core/src}/worker/activities.rs +52 -33
  56. package/sdk-core/{src → core/src}/worker/dispatcher.rs +8 -6
  57. package/sdk-core/{src → core/src}/worker/mod.rs +347 -221
  58. package/sdk-core/core/src/worker/wft_delivery.rs +81 -0
  59. package/sdk-core/{src → core/src}/workflow/bridge.rs +5 -2
  60. package/sdk-core/{src → core/src}/workflow/driven_workflow.rs +17 -7
  61. package/sdk-core/{src → core/src}/workflow/history_update.rs +33 -7
  62. package/sdk-core/{src → core/src/workflow}/machines/activity_state_machine.rs +26 -26
  63. package/sdk-core/{src → core/src/workflow}/machines/cancel_external_state_machine.rs +8 -11
  64. package/sdk-core/{src → core/src/workflow}/machines/cancel_workflow_state_machine.rs +19 -21
  65. package/sdk-core/{src → core/src/workflow}/machines/child_workflow_state_machine.rs +20 -31
  66. package/sdk-core/{src → core/src/workflow}/machines/complete_workflow_state_machine.rs +3 -5
  67. package/sdk-core/{src → core/src/workflow}/machines/continue_as_new_workflow_state_machine.rs +18 -18
  68. package/sdk-core/{src → core/src/workflow}/machines/fail_workflow_state_machine.rs +5 -6
  69. package/sdk-core/core/src/workflow/machines/local_activity_state_machine.rs +1451 -0
  70. package/sdk-core/{src → core/src/workflow}/machines/mod.rs +54 -107
  71. package/sdk-core/{src → core/src/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
  72. package/sdk-core/{src → core/src/workflow}/machines/patch_state_machine.rs +29 -30
  73. package/sdk-core/{src → core/src/workflow}/machines/side_effect_state_machine.rs +0 -0
  74. package/sdk-core/{src → core/src/workflow}/machines/signal_external_state_machine.rs +17 -19
  75. package/sdk-core/{src → core/src/workflow}/machines/timer_state_machine.rs +20 -21
  76. package/sdk-core/{src → core/src/workflow}/machines/transition_coverage.rs +5 -2
  77. package/sdk-core/{src → core/src/workflow}/machines/upsert_search_attributes_state_machine.rs +0 -0
  78. package/sdk-core/core/src/workflow/machines/workflow_machines/local_acts.rs +96 -0
  79. package/sdk-core/{src → core/src/workflow}/machines/workflow_machines.rs +357 -171
  80. package/sdk-core/{src → core/src/workflow}/machines/workflow_task_state_machine.rs +1 -1
  81. package/sdk-core/{src → core/src}/workflow/mod.rs +200 -39
  82. package/sdk-core/{src → core/src}/workflow/workflow_tasks/cache_manager.rs +0 -0
  83. package/sdk-core/{src → core/src}/workflow/workflow_tasks/concurrency_manager.rs +38 -5
  84. package/sdk-core/{src → core/src}/workflow/workflow_tasks/mod.rs +317 -103
  85. package/sdk-core/{test_utils → core-api}/Cargo.toml +10 -7
  86. package/sdk-core/{src → core-api/src}/errors.rs +42 -92
  87. package/sdk-core/core-api/src/lib.rs +158 -0
  88. package/sdk-core/{src/worker/config.rs → core-api/src/worker.rs} +18 -23
  89. package/sdk-core/etc/deps.svg +156 -0
  90. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +5 -5
  91. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +3 -5
  92. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +7 -1
  93. package/sdk-core/histories/fail_wf_task.bin +0 -0
  94. package/sdk-core/histories/timer_workflow_history.bin +0 -0
  95. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +44 -13
  96. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +19 -1
  97. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +1 -1
  98. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +9 -0
  99. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +1 -0
  100. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +1 -0
  101. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +13 -0
  102. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +14 -7
  103. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +176 -18
  104. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +6 -0
  105. package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +11 -0
  106. package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +3 -0
  107. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +156 -7
  108. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +135 -104
  109. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +78 -0
  110. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +78 -0
  111. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +205 -0
  112. package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +61 -0
  113. package/sdk-core/protos/local/{child_workflow.proto → temporal/sdk/core/child_workflow/child_workflow.proto} +1 -1
  114. package/sdk-core/protos/local/{common.proto → temporal/sdk/core/common/common.proto} +5 -3
  115. package/sdk-core/protos/local/{core_interface.proto → temporal/sdk/core/core_interface.proto} +10 -10
  116. package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +30 -0
  117. package/sdk-core/protos/local/{workflow_activation.proto → temporal/sdk/core/workflow_activation/workflow_activation.proto} +35 -11
  118. package/sdk-core/protos/local/{workflow_commands.proto → temporal/sdk/core/workflow_commands/workflow_commands.proto} +55 -4
  119. package/sdk-core/protos/local/{workflow_completion.proto → temporal/sdk/core/workflow_completion/workflow_completion.proto} +3 -3
  120. package/sdk-core/sdk/Cargo.toml +32 -0
  121. package/sdk-core/{src/prototype_rust_sdk → sdk/src}/conversions.rs +0 -0
  122. package/sdk-core/sdk/src/lib.rs +699 -0
  123. package/sdk-core/sdk/src/payload_converter.rs +11 -0
  124. package/sdk-core/sdk/src/workflow_context/options.rs +180 -0
  125. package/sdk-core/{src/prototype_rust_sdk → sdk/src}/workflow_context.rs +201 -124
  126. package/sdk-core/{src/prototype_rust_sdk → sdk/src}/workflow_future.rs +63 -30
  127. package/sdk-core/sdk-core-protos/Cargo.toml +10 -0
  128. package/sdk-core/sdk-core-protos/build.rs +28 -6
  129. package/sdk-core/sdk-core-protos/src/constants.rs +7 -0
  130. package/sdk-core/{src/test_help → sdk-core-protos/src}/history_builder.rs +134 -49
  131. package/sdk-core/sdk-core-protos/src/history_info.rs +216 -0
  132. package/sdk-core/sdk-core-protos/src/lib.rs +601 -168
  133. package/sdk-core/sdk-core-protos/src/task_token.rs +38 -0
  134. package/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
  135. package/sdk-core/test-utils/Cargo.toml +32 -0
  136. package/sdk-core/{src/test_help → test-utils/src}/canned_histories.rs +59 -78
  137. package/sdk-core/test-utils/src/histfetch.rs +28 -0
  138. package/sdk-core/{test_utils → test-utils}/src/lib.rs +131 -68
  139. package/sdk-core/tests/integ_tests/client_tests.rs +1 -1
  140. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -7
  141. package/sdk-core/tests/integ_tests/polling_tests.rs +12 -11
  142. package/sdk-core/tests/integ_tests/queries_tests.rs +82 -78
  143. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +91 -71
  144. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +3 -4
  145. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +2 -4
  146. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +4 -6
  147. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +4 -6
  148. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -4
  149. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +496 -0
  150. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +5 -8
  151. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +125 -0
  152. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +7 -13
  153. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +33 -5
  154. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +12 -16
  155. package/sdk-core/tests/integ_tests/workflow_tests.rs +85 -82
  156. package/sdk-core/tests/load_tests.rs +6 -6
  157. package/sdk-core/tests/main.rs +2 -2
  158. package/src/conversions.rs +24 -21
  159. package/src/errors.rs +8 -0
  160. package/src/lib.rs +323 -211
  161. package/sdk-core/protos/local/activity_result.proto +0 -46
  162. package/sdk-core/protos/local/activity_task.proto +0 -66
  163. package/sdk-core/src/core_tests/retry.rs +0 -147
  164. package/sdk-core/src/lib.rs +0 -403
  165. package/sdk-core/src/machines/local_activity_state_machine.rs +0 -117
  166. package/sdk-core/src/pending_activations.rs +0 -249
  167. package/sdk-core/src/protosext/mod.rs +0 -160
  168. package/sdk-core/src/prototype_rust_sdk.rs +0 -412
  169. package/sdk-core/src/task_token.rs +0 -20
  170. package/sdk-core/src/test_help/history_info.rs +0 -157
@@ -4,33 +4,46 @@ mod cache_manager;
4
4
  mod concurrency_manager;
5
5
 
6
6
  use crate::{
7
- errors::{WorkflowMissingError, WorkflowUpdateError},
8
- machines::{ProtoCommand, WFCommand, WFMachinesError},
9
7
  pending_activations::PendingActivations,
10
- pollers::GatewayRef,
11
- protosext::{ValidPollWFTQResponse, WfActivationExt},
12
- task_token::TaskToken,
8
+ protosext::{ValidPollWFTQResponse, WorkflowActivationExt},
13
9
  telemetry::metrics::MetricsContext,
10
+ worker::{LocalActRequest, LocalActivityResolution},
14
11
  workflow::{
12
+ machines::WFMachinesError,
15
13
  workflow_tasks::{
16
14
  cache_manager::WorkflowCacheManager, concurrency_manager::WorkflowConcurrencyManager,
17
15
  },
18
- HistoryPaginator, HistoryUpdate, WorkflowCachingPolicy, WorkflowManager, LEGACY_QUERY_ID,
16
+ HistoryPaginator, HistoryUpdate, LocalResolution, WFCommand, WorkflowCachingPolicy,
17
+ WorkflowManager, LEGACY_QUERY_ID,
19
18
  },
20
19
  };
21
20
  use crossbeam::queue::SegQueue;
22
21
  use futures::FutureExt;
23
22
  use parking_lot::Mutex;
24
- use std::{fmt::Debug, time::Instant};
25
- use temporal_sdk_core_protos::coresdk::{
26
- workflow_activation::{
27
- create_evict_activation, create_query_activation, wf_activation_job, QueryWorkflow,
28
- WfActivation,
23
+ use std::{
24
+ fmt::Debug,
25
+ ops::Add,
26
+ sync::Arc,
27
+ time::{Duration, Instant},
28
+ };
29
+ use temporal_client::ServerGatewayApis;
30
+ use temporal_sdk_core_protos::{
31
+ coresdk::{
32
+ workflow_activation::{
33
+ create_query_activation, remove_from_cache::EvictionReason, workflow_activation_job,
34
+ QueryWorkflow, WorkflowActivation,
35
+ },
36
+ workflow_commands::QueryResult,
37
+ FromPayloadsExt,
29
38
  },
30
- workflow_commands::QueryResult,
31
- FromPayloadsExt,
39
+ temporal::api::command::v1::Command as ProtoCommand,
40
+ TaskToken,
32
41
  };
33
- use tokio::sync::watch;
42
+ use tokio::{sync::Notify, time::timeout_at};
43
+
44
+ /// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
45
+ /// necessary.
46
+ const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
34
47
 
35
48
  /// Centralizes concerns related to applying new workflow tasks and reporting the activations they
36
49
  /// produce.
@@ -43,10 +56,13 @@ pub struct WorkflowTaskManager {
43
56
  /// when cancelling an activity in try-cancel/abandon mode), or for other reasons such as a
44
57
  /// requested eviction. They queue here.
45
58
  pending_activations: PendingActivations,
59
+ /// Holds activations which are purely query activations needed to respond to legacy queries.
60
+ /// Activations may only be added here for runs which do not have other pending activations.
61
+ pending_legacy_queries: SegQueue<WorkflowActivation>,
46
62
  /// Holds poll wft responses from the server that need to be applied
47
63
  ready_buffered_wft: SegQueue<ValidPollWFTQResponse>,
48
64
  /// Used to wake blocked workflow task polling
49
- pending_activations_notifier: watch::Sender<bool>,
65
+ pending_activations_notifier: Arc<Notify>,
50
66
  /// Lock guarded cache manager, which is the authority for limit-based workflow machine eviction
51
67
  /// from the cache.
52
68
  // TODO: Also should be moved inside concurrency manager, but there is some complexity around
@@ -98,7 +114,7 @@ pub struct WorkflowTaskInfo {
98
114
  #[derive(Debug, derive_more::From)]
99
115
  pub(crate) enum NewWfTaskOutcome {
100
116
  /// A new activation for the workflow should be issued to lang
101
- IssueActivation(WfActivation),
117
+ IssueActivation(WorkflowActivation),
102
118
  /// The poll loop should be restarted, there is nothing to do
103
119
  TaskBuffered,
104
120
  /// The workflow task should be auto-completed with an empty command list, as it must be replied
@@ -108,6 +124,8 @@ pub(crate) enum NewWfTaskOutcome {
108
124
  CacheMiss,
109
125
  /// The workflow task ran into problems while being applied and we must now evict the workflow
110
126
  Evict(WorkflowUpdateError),
127
+ /// No action should be taken. Possibly we are waiting for local activities to complete
128
+ LocalActsOutstanding,
111
129
  }
112
130
 
113
131
  #[derive(Debug)]
@@ -118,24 +136,25 @@ pub enum FailedActivationOutcome {
118
136
  }
119
137
 
120
138
  #[derive(Debug)]
121
- pub struct ServerCommandsWithWorkflowInfo {
139
+ pub(crate) struct ServerCommandsWithWorkflowInfo {
122
140
  pub task_token: TaskToken,
123
141
  pub action: ActivationAction,
124
142
  }
125
143
 
126
144
  #[derive(Debug)]
127
- pub enum ActivationAction {
145
+ pub(crate) enum ActivationAction {
128
146
  /// We should respond that the workflow task is complete
129
147
  WftComplete {
130
148
  commands: Vec<ProtoCommand>,
131
149
  query_responses: Vec<QueryResult>,
150
+ force_new_wft: bool,
132
151
  },
133
152
  /// We should respond to a legacy query request
134
153
  RespondLegacyQuery { result: QueryResult },
135
154
  }
136
155
 
137
156
  macro_rules! machine_mut {
138
- ($myself:ident, $run_id:ident, $task_token:ident, $clos:expr) => {{
157
+ ($myself:ident, $run_id:ident, $clos:expr) => {{
139
158
  $myself
140
159
  .workflow_machines
141
160
  .access($run_id, $clos)
@@ -143,20 +162,20 @@ macro_rules! machine_mut {
143
162
  .map_err(|source| WorkflowUpdateError {
144
163
  source,
145
164
  run_id: $run_id.to_owned(),
146
- task_token: Some($task_token.clone()),
147
165
  })
148
166
  }};
149
167
  }
150
168
 
151
169
  impl WorkflowTaskManager {
152
170
  pub(crate) fn new(
153
- pending_activations_notifier: watch::Sender<bool>,
171
+ pending_activations_notifier: Arc<Notify>,
154
172
  eviction_policy: WorkflowCachingPolicy,
155
173
  metrics: MetricsContext,
156
174
  ) -> Self {
157
175
  Self {
158
176
  workflow_machines: WorkflowConcurrencyManager::new(),
159
177
  pending_activations: Default::default(),
178
+ pending_legacy_queries: Default::default(),
160
179
  ready_buffered_wft: Default::default(),
161
180
  pending_activations_notifier,
162
181
  cache_manager: Mutex::new(WorkflowCacheManager::new(eviction_policy, metrics.clone())),
@@ -164,7 +183,11 @@ impl WorkflowTaskManager {
164
183
  }
165
184
  }
166
185
 
167
- pub(crate) fn next_pending_activation(&self) -> Option<WfActivation> {
186
+ pub(crate) fn next_pending_activation(&self) -> Option<WorkflowActivation> {
187
+ // Dispatch pending legacy queries first
188
+ if let leg_q @ Some(_) = self.pending_legacy_queries.pop() {
189
+ return leg_q;
190
+ }
168
191
  // It is important that we do not issue pending activations for any workflows which already
169
192
  // have an outstanding activation. If we did, it can result in races where an in-progress
170
193
  // completion may appear to be the last in a task (no more pending activations) because
@@ -173,37 +196,69 @@ impl WorkflowTaskManager {
173
196
  let maybe_act = self
174
197
  .pending_activations
175
198
  .pop_first_matching(|rid| self.workflow_machines.get_activation(rid).is_none());
176
- if let Some(act) = maybe_act.as_ref() {
177
- if let Err(WorkflowMissingError { run_id }) = self.insert_outstanding_activation(act) {
178
- self.request_eviction(&run_id, "Pending activation present for missing run");
199
+ if let Some(pending_info) = maybe_act {
200
+ if let Ok(act) = self
201
+ .workflow_machines
202
+ .access_sync(&pending_info.run_id, |wfm| wfm.machines.get_wf_activation())
203
+ .and_then(|mut act| {
204
+ if let Some(reason) = pending_info.needs_eviction {
205
+ act.append_evict_job(reason);
206
+ }
207
+ self.insert_outstanding_activation(&act)?;
208
+ Ok(act)
209
+ })
210
+ {
211
+ self.cache_manager.lock().touch(&act.run_id);
212
+ Some(act)
213
+ } else {
214
+ self.request_eviction(
215
+ &pending_info.run_id,
216
+ "Tried to apply pending activation for missing run",
217
+ EvictionReason::Fatal,
218
+ );
179
219
  // Continue trying to return a valid pending activation
180
- return self.next_pending_activation();
220
+ self.next_pending_activation()
181
221
  }
182
- self.cache_manager.lock().touch(&act.run_id);
222
+ } else {
223
+ None
183
224
  }
184
- maybe_act
185
225
  }
186
226
 
187
- pub fn next_buffered_poll(&self) -> Option<ValidPollWFTQResponse> {
227
+ pub(crate) fn next_buffered_poll(&self) -> Option<ValidPollWFTQResponse> {
188
228
  self.ready_buffered_wft.pop()
189
229
  }
190
230
 
191
- pub fn outstanding_wft(&self) -> usize {
231
+ pub(crate) fn outstanding_wft(&self) -> usize {
192
232
  self.workflow_machines.outstanding_wft()
193
233
  }
194
234
 
235
+ /// Returns the event id of the most recently processed event for the provided run id.
236
+ pub(crate) fn most_recently_processed_event(
237
+ &self,
238
+ run_id: &str,
239
+ ) -> Result<i64, WorkflowMissingError> {
240
+ self.workflow_machines
241
+ .access_sync(run_id, |wfm| wfm.machines.last_processed_event)
242
+ }
243
+
195
244
  /// Request a workflow eviction. This will queue up an activation to evict the workflow from
196
245
  /// the lang side. Workflow will not *actually* be evicted until lang replies to that activation
197
246
  ///
198
247
  /// Returns, if found, the number of attempts on the current workflow task
199
- pub fn request_eviction(&self, run_id: &str, reason: impl Into<String>) -> Option<u32> {
248
+ pub(crate) fn request_eviction(
249
+ &self,
250
+ run_id: &str,
251
+ message: impl Into<String>,
252
+ reason: EvictionReason,
253
+ ) -> Option<u32> {
200
254
  if self.workflow_machines.exists(run_id) {
201
255
  if !self.activation_has_eviction(run_id) {
202
- debug!(%run_id, "Eviction requested");
256
+ let message = message.into();
257
+ debug!(%run_id, %message, "Eviction requested");
203
258
  // Queue up an eviction activation
204
259
  self.pending_activations
205
- .push(create_evict_activation(run_id.to_string(), reason.into()));
206
- let _ = self.pending_activations_notifier.send(true);
260
+ .notify_needs_eviction(run_id, message, reason);
261
+ self.pending_activations_notifier.notify_waiters();
207
262
  }
208
263
  self.workflow_machines
209
264
  .get_task(run_id)
@@ -236,15 +291,14 @@ impl WorkflowTaskManager {
236
291
  }
237
292
 
238
293
  /// Given a validated poll response from the server, prepare an activation (if there is one) to
239
- /// be sent to lang. If applying the response to the workflow's state does not produce a new
240
- /// activation, `None` is returned.
294
+ /// be sent to lang.
241
295
  ///
242
296
  /// The new activation is immediately considered to be an outstanding workflow task - so it is
243
297
  /// expected that new activations will be dispatched to lang right away.
244
298
  pub(crate) async fn apply_new_poll_resp(
245
299
  &self,
246
300
  work: ValidPollWFTQResponse,
247
- gateway: &GatewayRef,
301
+ gateway: Arc<dyn ServerGatewayApis + Send + Sync>,
248
302
  ) -> NewWfTaskOutcome {
249
303
  let mut work = if let Some(w) = self.workflow_machines.buffer_resp_if_outstanding_work(work)
250
304
  {
@@ -256,6 +310,7 @@ impl WorkflowTaskManager {
256
310
  debug!(
257
311
  task_token = %&work.task_token,
258
312
  history_length = %work.history.events.len(),
313
+ attempt = %work.attempt,
259
314
  "Applying new workflow task from server"
260
315
  );
261
316
  let task_start_time = Instant::now();
@@ -285,7 +340,7 @@ impl WorkflowTaskManager {
285
340
  debug!("Dispatching legacy query {:?}", &lq);
286
341
  next_activation
287
342
  .jobs
288
- .push(wf_activation_job::Variant::QueryWorkflow(lq).into());
343
+ .push(workflow_activation_job::Variant::QueryWorkflow(lq).into());
289
344
  }
290
345
  None
291
346
  } else {
@@ -304,7 +359,20 @@ impl WorkflowTaskManager {
304
359
  .expect("Workflow machines must exist, we just created/updated them");
305
360
 
306
361
  if next_activation.jobs.is_empty() {
307
- NewWfTaskOutcome::Autocomplete
362
+ let outstanding_las = self
363
+ .workflow_machines
364
+ .access_sync(&next_activation.run_id, |wfm| {
365
+ wfm.machines.outstanding_local_activity_count()
366
+ })
367
+ .expect("Workflow machines must exist, we just created/updated them");
368
+ if outstanding_las > 0 {
369
+ // If there are outstanding local activities, we don't want to autocomplete the
370
+ // workflow task. We want to give them a chance to complete. If they take longer
371
+ // than the WFT timeout, we will force a new WFT just before the timeout.
372
+ NewWfTaskOutcome::LocalActsOutstanding
373
+ } else {
374
+ NewWfTaskOutcome::Autocomplete
375
+ }
308
376
  } else {
309
377
  if let Err(wme) = self.insert_outstanding_activation(&next_activation) {
310
378
  return NewWfTaskOutcome::Evict(wme.into());
@@ -319,27 +387,33 @@ impl WorkflowTaskManager {
319
387
  &self,
320
388
  run_id: &str,
321
389
  mut commands: Vec<WFCommand>,
390
+ local_activity_request_sink: impl FnOnce(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>,
322
391
  ) -> Result<Option<ServerCommandsWithWorkflowInfo>, WorkflowUpdateError> {
323
392
  // No-command replies to evictions can simply skip everything
324
393
  if commands.is_empty() && self.activation_has_eviction(run_id) {
325
394
  return Ok(None);
326
395
  }
327
396
 
328
- let task_token = if let Some(entry) = self.workflow_machines.get_task(run_id) {
329
- entry.info.task_token.clone()
330
- } else {
331
- if !self.activation_has_eviction(run_id) {
332
- // Don't bother warning if this was an eviction, since it's normal to issue
333
- // eviction activations without an associated workflow task in that case.
334
- warn!(
335
- run_id,
336
- "Attempted to complete activation for nonexistent run"
337
- );
338
- }
339
- return Ok(None);
340
- };
397
+ let (task_token, is_leg_query_task, start_time) =
398
+ if let Some(entry) = self.workflow_machines.get_task(run_id) {
399
+ (
400
+ entry.info.task_token.clone(),
401
+ entry.legacy_query.is_some(),
402
+ entry.start_time,
403
+ )
404
+ } else {
405
+ if !self.activation_has_eviction(run_id) {
406
+ // Don't bother warning if this was an eviction, since it's normal to issue
407
+ // eviction activations without an associated workflow task in that case.
408
+ warn!(
409
+ run_id,
410
+ "Attempted to complete activation for run without associated workflow task"
411
+ );
412
+ }
413
+ return Ok(None);
414
+ };
341
415
 
342
- // If the only command in the activation is a legacy query response, that means we need
416
+ // If the only command from the activation is a legacy query response, that means we need
343
417
  // to respond differently than a typical activation.
344
418
  let ret = if matches!(&commands.as_slice(),
345
419
  &[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
@@ -362,9 +436,12 @@ impl WorkflowTaskManager {
362
436
  if let WFCommand::QueryResponse(qr) = commands.remove(i) {
363
437
  if qr.query_id == LEGACY_QUERY_ID {
364
438
  return Err(WorkflowUpdateError {
365
- source: WFMachinesError::Fatal("Legacy query activation response included other commands, this is not allowed and constitutes an error in the lang SDK".to_string()),
439
+ source: WFMachinesError::Fatal(
440
+ "Legacy query activation response included other commands, \
441
+ this is not allowed and constitutes an error in the lang SDK"
442
+ .to_string(),
443
+ ),
366
444
  run_id: run_id.to_string(),
367
- task_token: Some(task_token)
368
445
  });
369
446
  }
370
447
  query_responses.push(qr);
@@ -374,34 +451,70 @@ impl WorkflowTaskManager {
374
451
  }
375
452
  }
376
453
 
377
- // Send commands from lang into the machines
378
- machine_mut!(self, run_id, task_token, |wfm: &mut WorkflowManager| {
379
- wfm.push_commands(commands).boxed()
380
- })?;
381
- // Check if the workflow run needs another activation and queue it up if there is one
382
- // by pushing it into the pending activations list
383
- let next_activation = machine_mut!(
454
+ let (are_pending, server_cmds, local_activities, wft_timeout) = machine_mut!(
384
455
  self,
385
456
  run_id,
386
- task_token,
387
- move |mgr: &mut WorkflowManager| mgr.get_next_activation().boxed()
457
+ |wfm: &mut WorkflowManager| {
458
+ async move {
459
+ // Send commands from lang into the machines then check if the workflow run
460
+ // needs another activation and mark it if so
461
+ wfm.push_commands(commands).await?;
462
+ let are_pending = wfm.apply_next_task_if_ready().await?;
463
+ // We want to fetch the outgoing commands only after a next WFT may have
464
+ // been applied, as outgoing server commands may be affected.
465
+ let outgoing_cmds = wfm.get_server_commands();
466
+ let new_local_acts = wfm.drain_queued_local_activities();
467
+
468
+ let wft_timeout: Duration = wfm
469
+ .machines
470
+ .started_attrs()
471
+ .and_then(|attrs| attrs.workflow_task_timeout.clone())
472
+ .and_then(|tt| tt.try_into().ok())
473
+ .ok_or_else(|| {
474
+ WFMachinesError::Fatal(
475
+ "Workflow's start attribs were missing a well formed task timeout"
476
+ .to_string(),
477
+ )
478
+ })?;
479
+
480
+ Ok((are_pending, outgoing_cmds, new_local_acts, wft_timeout))
481
+ }
482
+ .boxed()
483
+ }
388
484
  )?;
389
- if !next_activation.jobs.is_empty() {
390
- self.pending_activations.push(next_activation);
391
- let _ = self.pending_activations_notifier.send(true);
485
+
486
+ if are_pending {
487
+ self.needs_activation(run_id);
488
+ }
489
+ let immediate_resolutions = local_activity_request_sink(local_activities);
490
+ for resolution in immediate_resolutions {
491
+ self.notify_of_local_result(run_id, LocalResolution::LocalActivity(resolution))
492
+ .await?;
392
493
  }
393
- // We want to fetch the outgoing commands only after any new activation has been queued,
394
- // as doing so may have altered the outgoing commands.
395
- let server_cmds =
396
- machine_mut!(self, run_id, task_token, |wfm: &mut WorkflowManager| {
397
- async move { Ok(wfm.get_server_commands()) }.boxed()
398
- })?;
494
+
495
+ // The heartbeat deadline is 80% of the WFT timeout
496
+ let wft_heartbeat_deadline =
497
+ start_time.add(wft_timeout.mul_f32(WFT_HEARTBEAT_TIMEOUT_FRACTION));
498
+ // Wait on local activities to resolve if there are any, or for the WFT timeout to
499
+ // be about to expire, in which case we will need to send a WFT heartbeat.
500
+ let must_heartbeat = self
501
+ .wait_for_local_acts_or_heartbeat(run_id, wft_heartbeat_deadline)
502
+ .await;
503
+ let is_query_playback = is_leg_query_task && query_responses.is_empty();
504
+
399
505
  // We only actually want to send commands back to the server if there are no more
400
- // pending activations and we are caught up on replay.
401
- if !self.pending_activations.has_pending(run_id) && !server_cmds.replaying {
506
+ // pending activations and we are caught up on replay. We don't want to complete a wft
507
+ // if we already saw the final event in the workflow, or if we are playing back for the
508
+ // express purpose of fulfilling a query
509
+ if !self.pending_activations.has_pending(run_id)
510
+ && !server_cmds.replaying
511
+ && !is_query_playback
512
+ {
402
513
  Some(ServerCommandsWithWorkflowInfo {
403
514
  task_token,
404
515
  action: ActivationAction::WftComplete {
516
+ // TODO: Don't force if also sending complete execution cmd
517
+ force_new_wft: must_heartbeat,
405
518
  commands: server_cmds.commands,
406
519
  query_responses,
407
520
  },
@@ -414,6 +527,7 @@ impl WorkflowTaskManager {
414
527
  action: ActivationAction::WftComplete {
415
528
  commands: vec![],
416
529
  query_responses,
530
+ force_new_wft: false,
417
531
  },
418
532
  })
419
533
  }
@@ -423,7 +537,12 @@ impl WorkflowTaskManager {
423
537
 
424
538
  /// Record that an activation failed, returns enum that indicates if failure should be reported
425
539
  /// to the server
426
- pub(crate) fn failed_activation(&self, run_id: &str) -> FailedActivationOutcome {
540
+ pub(crate) fn failed_activation(
541
+ &self,
542
+ run_id: &str,
543
+ reason: EvictionReason,
544
+ failstr: String,
545
+ ) -> FailedActivationOutcome {
427
546
  let tt = if let Some(tt) = self
428
547
  .workflow_machines
429
548
  .get_task(run_id)
@@ -448,7 +567,7 @@ impl WorkflowTaskManager {
448
567
  } else {
449
568
  // Blow up any cached data associated with the workflow
450
569
  let should_report = self
451
- .request_eviction(run_id, "Activation failed by lang")
570
+ .request_eviction(run_id, failstr, reason)
452
571
  .map_or(true, |attempt| attempt <= 1);
453
572
  if should_report {
454
573
  FailedActivationOutcome::Report(tt)
@@ -465,8 +584,8 @@ impl WorkflowTaskManager {
465
584
  async fn instantiate_or_update_workflow(
466
585
  &self,
467
586
  poll_wf_resp: ValidPollWFTQResponse,
468
- gateway: &GatewayRef,
469
- ) -> Result<(WorkflowTaskInfo, WfActivation), WorkflowUpdateError> {
587
+ gateway: Arc<dyn ServerGatewayApis + Send + Sync>,
588
+ ) -> Result<(WorkflowTaskInfo, WorkflowActivation), WorkflowUpdateError> {
470
589
  let run_id = poll_wf_resp.workflow_execution.run_id.clone();
471
590
 
472
591
  let wft_info = WorkflowTaskInfo {
@@ -484,12 +603,12 @@ impl WorkflowTaskManager {
484
603
  poll_wf_resp.workflow_execution.workflow_id.clone(),
485
604
  poll_wf_resp.workflow_execution.run_id.clone(),
486
605
  poll_wf_resp.next_page_token,
487
- gateway.gw.clone(),
606
+ gateway.clone(),
488
607
  ),
489
608
  poll_wf_resp.previous_started_event_id,
490
609
  ),
491
610
  &poll_wf_resp.workflow_execution.workflow_id,
492
- &gateway.options.namespace,
611
+ &gateway.get_options().namespace,
493
612
  &poll_wf_resp.workflow_type,
494
613
  &self.metrics,
495
614
  )
@@ -501,27 +620,23 @@ impl WorkflowTaskManager {
501
620
  let query_jobs = poll_wf_resp
502
621
  .query_requests
503
622
  .into_iter()
504
- .map(|q| wf_activation_job::Variant::QueryWorkflow(q).into());
623
+ .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
505
624
  activation.jobs.extend(query_jobs);
506
625
  }
507
626
 
508
627
  Ok((wft_info, activation))
509
628
  }
510
- Err(source) => Err(WorkflowUpdateError {
511
- source,
512
- run_id,
513
- task_token: Some(wft_info.task_token),
514
- }),
629
+ Err(source) => Err(WorkflowUpdateError { source, run_id }),
515
630
  }
516
631
  }
517
632
 
518
- /// Called after every WFT completion or failure, updates outstanding task status & issues
519
- /// evictions if required. It is important this is called *after* reporting a successful WFT
520
- /// to server, as some replies (task not found) may require an eviction, which could be avoided
521
- /// if this is called too early.
633
+ /// Called after every workflow activation completion or failure, updates outstanding task
634
+ /// status & issues evictions if required. It is important this is called *after* potentially
635
+ /// reporting a successful WFT to server, as some replies (task not found) may require an
636
+ /// eviction, which could be avoided if this is called too early.
522
637
  ///
523
- /// Returns true if WFT is complete
524
- pub(crate) fn after_wft_report(&self, run_id: &str) -> bool {
638
+ /// Returns true if WFT was marked completed internally
639
+ pub(crate) fn after_wft_report(&self, run_id: &str, did_complete_wft: bool) -> bool {
525
640
  let mut just_evicted = false;
526
641
 
527
642
  if let Some(OutstandingActivation::Normal {
@@ -531,9 +646,10 @@ impl WorkflowTaskManager {
531
646
  self.evict_run(run_id);
532
647
  just_evicted = true;
533
648
  };
649
+
534
650
  // Workflows with no more pending activations (IE: They have completed a WFT) must be
535
651
  // removed from the outstanding tasks map
536
- if !self.pending_activations.has_pending(run_id) {
652
+ let retme = if !self.pending_activations.has_pending(run_id) {
537
653
  if !just_evicted {
538
654
  // Check if there was a legacy query which must be fulfilled, and if there is create
539
655
  // a new pending activation for it.
@@ -544,8 +660,8 @@ impl WorkflowTaskManager {
544
660
  {
545
661
  if let Some(query) = ot.legacy_query.take() {
546
662
  let na = create_query_activation(run_id.to_string(), [query]);
547
- self.pending_activations.push(na);
548
- let _ = self.pending_activations_notifier.send(true);
663
+ self.pending_legacy_queries.push(na);
664
+ self.pending_activations_notifier.notify_waiters();
549
665
  return false;
550
666
  }
551
667
  }
@@ -553,7 +669,11 @@ impl WorkflowTaskManager {
553
669
  // Evict run id if cache is full. Non-sticky will always evict.
554
670
  let maybe_evicted = self.cache_manager.lock().insert(run_id);
555
671
  if let Some(evicted_run_id) = maybe_evicted {
556
- self.request_eviction(&evicted_run_id, "Workflow cache full");
672
+ self.request_eviction(
673
+ &evicted_run_id,
674
+ "Workflow cache full",
675
+ EvictionReason::CacheFull,
676
+ );
557
677
  }
558
678
 
559
679
  // If there was a buffered poll response from the server, it is now ready to
@@ -565,9 +685,14 @@ impl WorkflowTaskManager {
565
685
 
566
686
  // The evict may or may not have already done this, but even when we aren't evicting
567
687
  // we want to clear the outstanding workflow task since it's now complete.
568
- return self.workflow_machines.complete_wft(run_id).is_some();
569
- }
570
- false
688
+ self.workflow_machines
689
+ .complete_wft(run_id, did_complete_wft)
690
+ .is_some()
691
+ } else {
692
+ false
693
+ };
694
+ self.on_activation_done(run_id);
695
+ retme
571
696
  }
572
697
 
573
698
  /// Must be called after *every* activation is replied to, regardless of whether or not we
@@ -575,20 +700,41 @@ impl WorkflowTaskManager {
575
700
  /// every activation we issue to lang has exactly one reply.
576
701
  ///
577
702
  /// Any subsequent action that needs to be taken will be created as a new activation
578
- pub(crate) fn on_activation_done(&self, run_id: &str) {
703
+ fn on_activation_done(&self, run_id: &str) {
579
704
  if self.workflow_machines.delete_activation(run_id).is_some() {
580
- let _ = self.pending_activations_notifier.send(true);
705
+ self.pending_activations_notifier.notify_waiters();
581
706
  }
582
707
  // It's possible the activation is already removed due to completing an eviction
583
708
  }
584
709
 
710
+ /// Let a workflow know that something we've been waiting locally on has resolved, like a local
711
+ /// activity or side effect
712
+ #[instrument(level = "debug", skip(self, resolved))]
713
+ pub(crate) async fn notify_of_local_result(
714
+ &self,
715
+ run_id: &str,
716
+ resolved: LocalResolution,
717
+ ) -> Result<(), WorkflowUpdateError> {
718
+ self.workflow_machines
719
+ .access_sync(run_id, |wfm: &mut WorkflowManager| {
720
+ wfm.notify_of_local_result(resolved)
721
+ })?
722
+ .map_err(|wfme| WorkflowUpdateError {
723
+ source: wfme,
724
+ run_id: run_id.to_string(),
725
+ })?;
726
+
727
+ self.needs_activation(run_id);
728
+ Ok(())
729
+ }
730
+
585
731
  fn make_buffered_poll_ready(&self, buffd: ValidPollWFTQResponse) {
586
732
  self.ready_buffered_wft.push(buffd);
587
733
  }
588
734
 
589
735
  fn insert_outstanding_activation(
590
736
  &self,
591
- act: &WfActivation,
737
+ act: &WorkflowActivation,
592
738
  ) -> Result<(), WorkflowMissingError> {
593
739
  let act_type = if act.is_legacy_query() {
594
740
  OutstandingActivation::LegacyQuery
@@ -621,4 +767,72 @@ impl WorkflowTaskManager {
621
767
  .map(OutstandingActivation::has_eviction)
622
768
  .unwrap_or_default()
623
769
  }
770
+
771
+ fn needs_activation(&self, run_id: &str) {
772
+ self.pending_activations.notify_needs_activation(run_id);
773
+ self.pending_activations_notifier.notify_waiters();
774
+ }
775
+
776
+ /// Wait for either all local activities to resolve, or for 80% of the WFT timeout, in which
777
+ /// case we will "heartbeat" by completing the WFT, even if there are no commands to send.
778
+ ///
779
+ /// Returns true if we must heartbeat
780
+ async fn wait_for_local_acts_or_heartbeat(
781
+ &self,
782
+ run_id: &str,
783
+ wft_heartbeat_deadline: Instant,
784
+ ) -> bool {
785
+ loop {
786
+ let la_count = self
787
+ .workflow_machines
788
+ .access_sync(run_id, |wfm| {
789
+ wfm.machines.outstanding_local_activity_count()
790
+ })
791
+ .expect("Workflow cannot go missing while we are waiting on LAs");
792
+ if la_count == 0 {
793
+ return false;
794
+ } else if Instant::now() >= wft_heartbeat_deadline {
795
+ // We must heartbeat b/c there are still pending local activities
796
+ return true;
797
+ }
798
+ // Since an LA resolution always results in a new pending activation, we can wait on
799
+ // notifications of that to re-check if they're all resolved.
800
+ let _ = timeout_at(
801
+ wft_heartbeat_deadline.into(),
802
+ self.pending_activations_notifier.notified(),
803
+ )
804
+ .await;
805
+ }
806
+ }
807
+ }
808
+
809
+ #[derive(Debug)]
810
+ pub(crate) struct WorkflowUpdateError {
811
+ /// Underlying workflow error
812
+ pub source: WFMachinesError,
813
+ /// The run id of the erring workflow
814
+ #[allow(dead_code)] // Useful in debug output
815
+ pub run_id: String,
816
+ }
817
+
818
+ impl WorkflowUpdateError {
819
+ pub fn evict_reason(&self) -> EvictionReason {
820
+ self.source.evict_reason()
821
+ }
822
+ }
823
+
824
+ impl From<WorkflowMissingError> for WorkflowUpdateError {
825
+ fn from(wme: WorkflowMissingError) -> Self {
826
+ Self {
827
+ source: WFMachinesError::Fatal("Workflow machines missing".to_string()),
828
+ run_id: wme.run_id,
829
+ }
830
+ }
831
+ }
832
+
833
+ /// The workflow machines were expected to be in the cache but were not
834
+ #[derive(Debug)]
835
+ pub(crate) struct WorkflowMissingError {
836
+ /// The run id of the erring workflow
837
+ pub run_id: String,
624
838
  }