@temporalio/core-bridge 0.23.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/Cargo.lock +118 -15
  2. package/Cargo.toml +2 -1
  3. package/LICENSE.md +1 -1
  4. package/README.md +1 -1
  5. package/index.d.ts +47 -18
  6. package/package.json +7 -7
  7. package/releases/aarch64-apple-darwin/index.node +0 -0
  8. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  9. package/releases/x86_64-apple-darwin/index.node +0 -0
  10. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  11. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  12. package/sdk-core/.buildkite/docker/docker-compose.yaml +4 -2
  13. package/sdk-core/ARCHITECTURE.md +9 -7
  14. package/sdk-core/README.md +5 -1
  15. package/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  16. package/sdk-core/bridge-ffi/src/wrappers.rs +0 -3
  17. package/sdk-core/client/src/lib.rs +26 -8
  18. package/sdk-core/client/src/raw.rs +166 -54
  19. package/sdk-core/client/src/retry.rs +9 -4
  20. package/sdk-core/client/src/workflow_handle/mod.rs +4 -2
  21. package/sdk-core/core/Cargo.toml +2 -0
  22. package/sdk-core/core/src/abstractions.rs +137 -16
  23. package/sdk-core/core/src/core_tests/activity_tasks.rs +258 -63
  24. package/sdk-core/core/src/core_tests/child_workflows.rs +1 -2
  25. package/sdk-core/core/src/core_tests/determinism.rs +2 -2
  26. package/sdk-core/core/src/core_tests/local_activities.rs +8 -7
  27. package/sdk-core/core/src/core_tests/queries.rs +146 -60
  28. package/sdk-core/core/src/core_tests/replay_flag.rs +1 -1
  29. package/sdk-core/core/src/core_tests/workers.rs +39 -23
  30. package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
  31. package/sdk-core/core/src/core_tests/workflow_tasks.rs +387 -280
  32. package/sdk-core/core/src/lib.rs +6 -4
  33. package/sdk-core/core/src/pollers/poll_buffer.rs +16 -10
  34. package/sdk-core/core/src/protosext/mod.rs +6 -6
  35. package/sdk-core/core/src/retry_logic.rs +1 -1
  36. package/sdk-core/core/src/telemetry/metrics.rs +21 -7
  37. package/sdk-core/core/src/telemetry/mod.rs +18 -4
  38. package/sdk-core/core/src/test_help/mod.rs +341 -109
  39. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +18 -9
  40. package/sdk-core/core/src/worker/activities/local_activities.rs +19 -16
  41. package/sdk-core/core/src/worker/activities.rs +156 -29
  42. package/sdk-core/core/src/worker/client.rs +1 -0
  43. package/sdk-core/core/src/worker/mod.rs +132 -659
  44. package/sdk-core/core/src/{workflow → worker/workflow}/bridge.rs +1 -1
  45. package/sdk-core/core/src/{workflow → worker/workflow}/driven_workflow.rs +1 -1
  46. package/sdk-core/core/src/{workflow → worker/workflow}/history_update.rs +16 -2
  47. package/sdk-core/core/src/{workflow → worker/workflow}/machines/activity_state_machine.rs +39 -4
  48. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_external_state_machine.rs +5 -2
  49. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_workflow_state_machine.rs +1 -1
  50. package/sdk-core/core/src/{workflow → worker/workflow}/machines/child_workflow_state_machine.rs +2 -4
  51. package/sdk-core/core/src/{workflow → worker/workflow}/machines/complete_workflow_state_machine.rs +0 -0
  52. package/sdk-core/core/src/{workflow → worker/workflow}/machines/continue_as_new_workflow_state_machine.rs +1 -1
  53. package/sdk-core/core/src/{workflow → worker/workflow}/machines/fail_workflow_state_machine.rs +0 -0
  54. package/sdk-core/core/src/{workflow → worker/workflow}/machines/local_activity_state_machine.rs +2 -5
  55. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mod.rs +1 -1
  56. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
  57. package/sdk-core/core/src/{workflow → worker/workflow}/machines/patch_state_machine.rs +1 -1
  58. package/sdk-core/core/src/{workflow → worker/workflow}/machines/side_effect_state_machine.rs +0 -0
  59. package/sdk-core/core/src/{workflow → worker/workflow}/machines/signal_external_state_machine.rs +4 -2
  60. package/sdk-core/core/src/{workflow → worker/workflow}/machines/timer_state_machine.rs +1 -2
  61. package/sdk-core/core/src/{workflow → worker/workflow}/machines/transition_coverage.rs +1 -1
  62. package/sdk-core/core/src/{workflow → worker/workflow}/machines/upsert_search_attributes_state_machine.rs +5 -7
  63. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines/local_acts.rs +2 -2
  64. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines.rs +40 -16
  65. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_task_state_machine.rs +0 -0
  66. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  67. package/sdk-core/core/src/worker/workflow/managed_run.rs +627 -0
  68. package/sdk-core/core/src/worker/workflow/mod.rs +1115 -0
  69. package/sdk-core/core/src/worker/workflow/run_cache.rs +143 -0
  70. package/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  71. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +936 -0
  72. package/sdk-core/core-api/src/errors.rs +3 -10
  73. package/sdk-core/core-api/src/lib.rs +2 -1
  74. package/sdk-core/core-api/src/worker.rs +26 -2
  75. package/sdk-core/etc/dynamic-config.yaml +2 -0
  76. package/sdk-core/integ-with-otel.sh +1 -1
  77. package/sdk-core/protos/api_upstream/Makefile +4 -4
  78. package/sdk-core/protos/api_upstream/api-linter.yaml +2 -0
  79. package/sdk-core/protos/api_upstream/buf.yaml +8 -9
  80. package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
  81. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -1
  82. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
  83. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
  84. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +3 -1
  85. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  86. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +3 -0
  87. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +32 -4
  88. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +69 -19
  89. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +13 -0
  90. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +163 -0
  91. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +97 -0
  92. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
  93. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +25 -0
  94. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +180 -3
  95. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +53 -3
  96. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +2 -2
  97. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +6 -5
  98. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -1
  99. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +2 -1
  100. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +0 -64
  101. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -1
  102. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +11 -8
  103. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +30 -25
  104. package/sdk-core/sdk/src/activity_context.rs +12 -5
  105. package/sdk-core/sdk/src/app_data.rs +37 -0
  106. package/sdk-core/sdk/src/lib.rs +76 -43
  107. package/sdk-core/sdk/src/workflow_context/options.rs +8 -6
  108. package/sdk-core/sdk/src/workflow_context.rs +14 -19
  109. package/sdk-core/sdk/src/workflow_future.rs +11 -6
  110. package/sdk-core/sdk-core-protos/src/history_builder.rs +19 -5
  111. package/sdk-core/sdk-core-protos/src/history_info.rs +11 -6
  112. package/sdk-core/sdk-core-protos/src/lib.rs +74 -176
  113. package/sdk-core/test-utils/src/lib.rs +85 -72
  114. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -9
  115. package/sdk-core/tests/integ_tests/polling_tests.rs +12 -0
  116. package/sdk-core/tests/integ_tests/queries_tests.rs +39 -22
  117. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +49 -4
  118. package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  119. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
  120. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +74 -13
  121. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +19 -0
  122. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
  123. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -3
  124. package/sdk-core/tests/integ_tests/workflow_tests.rs +10 -23
  125. package/sdk-core/tests/load_tests.rs +8 -3
  126. package/sdk-core/tests/main.rs +2 -1
  127. package/src/conversions.rs +47 -39
  128. package/src/errors.rs +10 -21
  129. package/src/lib.rs +342 -325
  130. package/sdk-core/core/src/pending_activations.rs +0 -173
  131. package/sdk-core/core/src/worker/wft_delivery.rs +0 -81
  132. package/sdk-core/core/src/workflow/mod.rs +0 -478
  133. package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +0 -194
  134. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +0 -418
  135. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +0 -989
@@ -0,0 +1,627 @@
1
+ #[cfg(test)]
2
+ mod managed_wf_test;
3
+
4
+ use crate::{
5
+ worker::{
6
+ workflow::{
7
+ machines::WorkflowMachines, ActivationAction, ActivationCompleteOutcome, HistoryUpdate,
8
+ LocalResolution, NewIncomingWFT, OutgoingServerCommands, RequestEvictMsg, RunActions,
9
+ RunActivationCompletion, RunUpdateResponse, ServerCommandsWithWorkflowInfo, WFCommand,
10
+ WorkflowBridge,
11
+ },
12
+ LocalActRequest,
13
+ },
14
+ MetricsContext,
15
+ };
16
+ use futures::{stream, StreamExt};
17
+ use std::{
18
+ ops::Add,
19
+ sync::mpsc::Sender,
20
+ time::{Duration, Instant},
21
+ };
22
+ use temporal_sdk_core_api::errors::WFMachinesError;
23
+ use temporal_sdk_core_protos::coresdk::{
24
+ workflow_activation::{RemoveFromCache, WorkflowActivation},
25
+ workflow_commands::QueryResult,
26
+ };
27
+ use tokio::{
28
+ sync::{
29
+ mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
30
+ oneshot,
31
+ },
32
+ task,
33
+ task::JoinHandle,
34
+ };
35
+ use tokio_stream::wrappers::UnboundedReceiverStream;
36
+ use tracing::Span;
37
+ use tracing_futures::Instrument;
38
+
39
+ use crate::worker::workflow::{
40
+ ActivationCompleteResult, ActivationOrAuto, FailRunUpdate, FulfillableActivationComplete,
41
+ GoodRunUpdate, LocalActivityRequestSink, RunAction, RunUpdateResponseKind,
42
+ };
43
+ use temporal_sdk_core_protos::TaskToken;
44
+
45
+ use crate::abstractions::dbg_panic;
46
+ #[cfg(test)]
47
+ pub(crate) use managed_wf_test::ManagedWFFunc;
48
+
49
+ type Result<T, E = WFMachinesError> = std::result::Result<T, E>;
50
+ /// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
51
+ /// necessary.
52
+ const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
53
+
54
+ pub(super) struct ManagedRun {
55
+ wfm: WorkflowManager,
56
+ update_tx: UnboundedSender<RunUpdateResponse>,
57
+ local_activity_request_sink: LocalActivityRequestSink,
58
+ waiting_on_la: Option<WaitingOnLAs>,
59
+ // Is set to true if the machines encounter an error and the only subsequent thing we should
60
+ // do is be evicted.
61
+ am_broken: bool,
62
+ }
63
+
64
+ /// If an activation completion needed to wait on LA completions (or heartbeat timeout) we use
65
+ /// this struct to store the data we need to finish the completion once that has happened
66
+ struct WaitingOnLAs {
67
+ wft_timeout: Duration,
68
+ /// If set, we are waiting for LAs to complete as part of a just-finished workflow activation.
69
+ /// If unset, we already had a heartbeat timeout and got a new WFT without any new work while
70
+ /// there are still incomplete LAs.
71
+ completion_dat: Option<(
72
+ CompletionDataForWFT,
73
+ oneshot::Sender<ActivationCompleteResult>,
74
+ )>,
75
+ hb_chan: UnboundedSender<Span>,
76
+ heartbeat_timeout_task: JoinHandle<()>,
77
+ }
78
+
79
+ #[derive(Debug)]
80
+ struct CompletionDataForWFT {
81
+ task_token: TaskToken,
82
+ query_responses: Vec<QueryResult>,
83
+ has_pending_query: bool,
84
+ activation_was_only_eviction: bool,
85
+ }
86
+
87
+ impl ManagedRun {
88
+ pub(super) fn new(
89
+ wfm: WorkflowManager,
90
+ update_tx: UnboundedSender<RunUpdateResponse>,
91
+ local_activity_request_sink: LocalActivityRequestSink,
92
+ ) -> Self {
93
+ Self {
94
+ wfm,
95
+ update_tx,
96
+ local_activity_request_sink,
97
+ waiting_on_la: None,
98
+ am_broken: false,
99
+ }
100
+ }
101
+
102
+ pub(super) async fn run(self, run_actions_rx: UnboundedReceiver<RunAction>) {
103
+ let (heartbeat_tx, heartbeat_rx) = unbounded_channel();
104
+ stream::select(
105
+ UnboundedReceiverStream::new(run_actions_rx),
106
+ UnboundedReceiverStream::new(heartbeat_rx).map(|trace_span| RunAction {
107
+ action: RunActions::HeartbeatTimeout,
108
+ trace_span,
109
+ }),
110
+ )
111
+ .fold((self, heartbeat_tx), |(mut me, heartbeat_tx), action| {
112
+ let span = action.trace_span;
113
+ let action = action.action;
114
+ async move {
115
+ let res = match action {
116
+ RunActions::NewIncomingWFT(wft) => me
117
+ .incoming_wft(wft)
118
+ .await
119
+ .map(RunActionOutcome::AfterNewWFT),
120
+ RunActions::ActivationCompletion(completion) => me
121
+ .completion(completion, &heartbeat_tx)
122
+ .await
123
+ .map(RunActionOutcome::AfterCompletion),
124
+ RunActions::CheckMoreWork {
125
+ want_to_evict,
126
+ has_pending_queries,
127
+ } => me
128
+ .check_more_work(want_to_evict, has_pending_queries)
129
+ .await
130
+ .map(RunActionOutcome::AfterCheckWork),
131
+ RunActions::LocalResolution(r) => me
132
+ .local_resolution(r)
133
+ .await
134
+ .map(RunActionOutcome::AfterLocalResolution),
135
+ RunActions::HeartbeatTimeout => {
136
+ let maybe_act = if me.heartbeat_timeout() {
137
+ Some(ActivationOrAuto::Autocomplete {
138
+ run_id: me.wfm.machines.run_id.clone(),
139
+ })
140
+ } else {
141
+ None
142
+ };
143
+ Ok(RunActionOutcome::AfterHeartbeatTimeout(maybe_act))
144
+ }
145
+ };
146
+ match res {
147
+ Ok(outcome) => {
148
+ me.send_update_response(outcome);
149
+ }
150
+ Err(e) => {
151
+ error!(error=?e, "Error in run machines");
152
+ me.am_broken = true;
153
+ me.update_tx
154
+ .send(RunUpdateResponse {
155
+ kind: RunUpdateResponseKind::Fail(FailRunUpdate {
156
+ run_id: me.wfm.machines.run_id.clone(),
157
+ err: e.source,
158
+ completion_resp: e.complete_resp_chan,
159
+ }),
160
+ span: Span::current(),
161
+ })
162
+ .expect("Machine can send update");
163
+ }
164
+ }
165
+ (me, heartbeat_tx)
166
+ }
167
+ .instrument(span)
168
+ })
169
+ .await;
170
+ }
171
+
172
+ async fn incoming_wft(
173
+ &mut self,
174
+ wft: NewIncomingWFT,
175
+ ) -> Result<Option<ActivationOrAuto>, RunUpdateErr> {
176
+ let activation = if let Some(h) = wft.history_update {
177
+ self.wfm.feed_history_from_server(h).await?
178
+ } else {
179
+ let r = self.wfm.get_next_activation().await?;
180
+ if r.jobs.is_empty() {
181
+ return Err(RunUpdateErr {
182
+ source: WFMachinesError::Fatal(format!(
183
+ "Machines created for {} with no jobs",
184
+ self.wfm.machines.run_id
185
+ )),
186
+ complete_resp_chan: None,
187
+ });
188
+ }
189
+ r
190
+ };
191
+
192
+ if activation.jobs.is_empty() {
193
+ if self.wfm.machines.outstanding_local_activity_count() > 0 {
194
+ // If the activation has no jobs but there are outstanding LAs, we need to restart the
195
+ // WFT heartbeat.
196
+ if let Some(ref mut lawait) = self.waiting_on_la {
197
+ if lawait.completion_dat.is_some() {
198
+ panic!("Should not have completion dat when getting new wft & empty jobs")
199
+ }
200
+ lawait.heartbeat_timeout_task.abort();
201
+ lawait.heartbeat_timeout_task = start_heartbeat_timeout_task(
202
+ lawait.hb_chan.clone(),
203
+ wft.start_time,
204
+ lawait.wft_timeout,
205
+ );
206
+ // No activation needs to be sent to lang. We just need to wait for another
207
+ // heartbeat timeout or LAs to resolve
208
+ return Ok(None);
209
+ } else {
210
+ panic!(
211
+ "Got a new WFT while there are outstanding local activities, but there \
212
+ was no waiting on LA info."
213
+ )
214
+ }
215
+ } else {
216
+ return Ok(Some(ActivationOrAuto::Autocomplete {
217
+ run_id: self.wfm.machines.run_id.clone(),
218
+ }));
219
+ }
220
+ }
221
+
222
+ Ok(Some(ActivationOrAuto::LangActivation(activation)))
223
+ }
224
+
225
+ async fn completion(
226
+ &mut self,
227
+ mut completion: RunActivationCompletion,
228
+ heartbeat_tx: &UnboundedSender<Span>,
229
+ ) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
230
+ let resp_chan = completion
231
+ .resp_chan
232
+ .take()
233
+ .expect("Completion response channel must be populated");
234
+
235
+ let outcome = async move {
236
+ // Send commands from lang into the machines then check if the workflow run
237
+ // needs another activation and mark it if so
238
+ self.wfm.push_commands(completion.commands).await?;
239
+ // Don't bother applying the next task if we're evicting at the end of
240
+ // this activation
241
+ if !completion.activation_was_eviction {
242
+ self.wfm.apply_next_task_if_ready().await?;
243
+ }
244
+ let new_local_acts = self.wfm.drain_queued_local_activities();
245
+
246
+ let immediate_resolutions = (self.local_activity_request_sink)(new_local_acts);
247
+ for resolution in immediate_resolutions {
248
+ self.wfm
249
+ .notify_of_local_result(LocalResolution::LocalActivity(resolution))?;
250
+ }
251
+
252
+ let data = CompletionDataForWFT {
253
+ task_token: completion.task_token,
254
+ query_responses: completion.query_responses,
255
+ has_pending_query: completion.has_pending_query,
256
+ activation_was_only_eviction: completion.activation_was_only_eviction,
257
+ };
258
+ if self.wfm.machines.outstanding_local_activity_count() == 0 {
259
+ Ok((None, data, self))
260
+ } else {
261
+ let wft_timeout: Duration = self
262
+ .wfm
263
+ .machines
264
+ .get_started_info()
265
+ .and_then(|attrs| attrs.workflow_task_timeout)
266
+ .ok_or_else(|| {
267
+ WFMachinesError::Fatal(
268
+ "Workflow's start attribs were missing a well formed task timeout"
269
+ .to_string(),
270
+ )
271
+ })?;
272
+ let heartbeat_tx = heartbeat_tx.clone();
273
+ Ok((
274
+ Some((heartbeat_tx, completion.start_time, wft_timeout)),
275
+ data,
276
+ self,
277
+ ))
278
+ }
279
+ }
280
+ .await;
281
+
282
+ match outcome {
283
+ Ok((None, data, me)) => Ok(Some(me.prepare_complete_resp(resp_chan, data, false))),
284
+ Ok((Some((chan, start_t, wft_timeout)), data, me)) => {
285
+ if let Some(wola) = me.waiting_on_la.as_mut() {
286
+ wola.heartbeat_timeout_task.abort();
287
+ }
288
+ me.waiting_on_la = Some(WaitingOnLAs {
289
+ wft_timeout,
290
+ completion_dat: Some((data, resp_chan)),
291
+ hb_chan: chan.clone(),
292
+ heartbeat_timeout_task: start_heartbeat_timeout_task(
293
+ chan,
294
+ start_t,
295
+ wft_timeout,
296
+ ),
297
+ });
298
+ Ok(None)
299
+ }
300
+ Err(e) => Err(RunUpdateErr {
301
+ source: e,
302
+ complete_resp_chan: Some(resp_chan),
303
+ }),
304
+ }
305
+ }
306
+
307
+ async fn check_more_work(
308
+ &mut self,
309
+ want_to_evict: Option<RequestEvictMsg>,
310
+ has_pending_queries: bool,
311
+ ) -> Result<Option<ActivationOrAuto>, RunUpdateErr> {
312
+ if self.wfm.machines.has_pending_jobs() && !self.am_broken {
313
+ Ok(Some(ActivationOrAuto::LangActivation(
314
+ self.wfm.get_next_activation().await?,
315
+ )))
316
+ } else {
317
+ if has_pending_queries && !self.am_broken {
318
+ return Ok(Some(ActivationOrAuto::ReadyForQueries(
319
+ self.wfm.machines.get_wf_activation(),
320
+ )));
321
+ }
322
+ if let Some(wte) = want_to_evict {
323
+ let mut act = self.wfm.machines.get_wf_activation();
324
+ // No other jobs make any sense to send if we encountered an error.
325
+ if self.am_broken {
326
+ act.jobs = vec![];
327
+ }
328
+ act.append_evict_job(RemoveFromCache {
329
+ message: wte.message,
330
+ reason: wte.reason as i32,
331
+ });
332
+ Ok(Some(ActivationOrAuto::LangActivation(act)))
333
+ } else {
334
+ Ok(None)
335
+ }
336
+ }
337
+ }
338
+
339
+ fn prepare_complete_resp(
340
+ &mut self,
341
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
342
+ data: CompletionDataForWFT,
343
+ due_to_heartbeat_timeout: bool,
344
+ ) -> FulfillableActivationComplete {
345
+ let outgoing_cmds = self.wfm.get_server_commands();
346
+ let query_responses = data.query_responses;
347
+ let has_query_responses = !query_responses.is_empty();
348
+ let is_query_playback = data.has_pending_query && !has_query_responses;
349
+
350
+ // We only actually want to send commands back to the server if there are no more
351
+ // pending activations and we are caught up on replay. We don't want to complete a wft
352
+ // if we already saw the final event in the workflow, or if we are playing back for the
353
+ // express purpose of fulfilling a query. If the activation we sent was *only* an
354
+ // eviction, and there were no commands produced during iteration, don't send that
355
+ // either.
356
+ let no_commands_and_evicting =
357
+ outgoing_cmds.commands.is_empty() && data.activation_was_only_eviction;
358
+ let to_be_sent = ServerCommandsWithWorkflowInfo {
359
+ task_token: data.task_token,
360
+ action: ActivationAction::WftComplete {
361
+ force_new_wft: due_to_heartbeat_timeout,
362
+ commands: outgoing_cmds.commands,
363
+ query_responses,
364
+ },
365
+ };
366
+
367
+ let should_respond = !(self.wfm.machines.has_pending_jobs()
368
+ || outgoing_cmds.replaying
369
+ || is_query_playback
370
+ || no_commands_and_evicting);
371
+ let outcome = if should_respond || has_query_responses {
372
+ ActivationCompleteOutcome::ReportWFTSuccess(to_be_sent)
373
+ } else {
374
+ ActivationCompleteOutcome::DoNothing
375
+ };
376
+ FulfillableActivationComplete {
377
+ result: ActivationCompleteResult {
378
+ most_recently_processed_event: self.wfm.machines.last_processed_event as usize,
379
+ outcome,
380
+ },
381
+ resp_chan,
382
+ }
383
+ }
384
+
385
+ async fn local_resolution(
386
+ &mut self,
387
+ res: LocalResolution,
388
+ ) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
389
+ debug!(resolution=?res, "Applying local resolution");
390
+ self.wfm.notify_of_local_result(res)?;
391
+ if self.wfm.machines.outstanding_local_activity_count() == 0 {
392
+ if let Some(mut wait_dat) = self.waiting_on_la.take() {
393
+ // Cancel the heartbeat timeout
394
+ wait_dat.heartbeat_timeout_task.abort();
395
+ if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
396
+ return Ok(Some(self.prepare_complete_resp(
397
+ resp_chan,
398
+ completion_dat,
399
+ false,
400
+ )));
401
+ }
402
+ }
403
+ }
404
+ Ok(None)
405
+ }
406
+
407
+ /// Returns `true` if autocompletion should be issued, which will actually cause us to end up
408
+ /// in [completion] again, at which point we'll start a new heartbeat timeout, which will
409
+ /// immediately trigger and thus finish the completion, forcing a new task as it should.
410
+ fn heartbeat_timeout(&mut self) -> bool {
411
+ if let Some(ref mut wait_dat) = self.waiting_on_la {
412
+ // Cancel the heartbeat timeout
413
+ wait_dat.heartbeat_timeout_task.abort();
414
+ if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
415
+ let compl = self.prepare_complete_resp(resp_chan, completion_dat, true);
416
+ // Immediately fulfill the completion since the run update will already have
417
+ // been replied to
418
+ compl.fulfill();
419
+ } else {
420
+ // Auto-reply WFT complete
421
+ return true;
422
+ }
423
+ } else {
424
+ // If a heartbeat timeout happened, we should always have been waiting on LAs
425
+ dbg_panic!("WFT heartbeat timeout fired but we were not waiting on any LAs");
426
+ }
427
+ false
428
+ }
429
+
430
+ fn send_update_response(&self, outcome: RunActionOutcome) {
431
+ let mut in_response_to_wft = false;
432
+ let (outgoing_activation, fulfillable_complete) = match outcome {
433
+ RunActionOutcome::AfterNewWFT(a) => {
434
+ in_response_to_wft = true;
435
+ (a, None)
436
+ }
437
+ RunActionOutcome::AfterCheckWork(a) => (a, None),
438
+ RunActionOutcome::AfterLocalResolution(f) => (None, f),
439
+ RunActionOutcome::AfterCompletion(f) => (None, f),
440
+ RunActionOutcome::AfterHeartbeatTimeout(a) => (a, None),
441
+ };
442
+
443
+ self.update_tx
444
+ .send(RunUpdateResponse {
445
+ kind: RunUpdateResponseKind::Good(GoodRunUpdate {
446
+ run_id: self.wfm.machines.run_id.clone(),
447
+ outgoing_activation,
448
+ fulfillable_complete,
449
+ have_seen_terminal_event: self.wfm.machines.have_seen_terminal_event,
450
+ more_pending_work: self.wfm.machines.has_pending_jobs(),
451
+ most_recently_processed_event_number: self.wfm.machines.last_processed_event
452
+ as usize,
453
+ in_response_to_wft,
454
+ }),
455
+ span: Span::current(),
456
+ })
457
+ .expect("Machine can send update");
458
+ }
459
+ }
460
+
461
+ fn start_heartbeat_timeout_task(
462
+ chan: UnboundedSender<Span>,
463
+ wft_start_time: Instant,
464
+ wft_timeout: Duration,
465
+ ) -> JoinHandle<()> {
466
+ // The heartbeat deadline is 80% of the WFT timeout
467
+ let wft_heartbeat_deadline =
468
+ wft_start_time.add(wft_timeout.mul_f32(WFT_HEARTBEAT_TIMEOUT_FRACTION));
469
+ task::spawn(async move {
470
+ tokio::time::sleep_until(wft_heartbeat_deadline.into()).await;
471
+ let _ = chan.send(Span::current());
472
+ })
473
+ }
474
+
475
+ enum RunActionOutcome {
476
+ AfterNewWFT(Option<ActivationOrAuto>),
477
+ AfterCheckWork(Option<ActivationOrAuto>),
478
+ AfterLocalResolution(Option<FulfillableActivationComplete>),
479
+ AfterCompletion(Option<FulfillableActivationComplete>),
480
+ AfterHeartbeatTimeout(Option<ActivationOrAuto>),
481
+ }
482
+
483
+ #[derive(Debug)]
484
+ struct RunUpdateErr {
485
+ source: WFMachinesError,
486
+ complete_resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
487
+ }
488
+
489
+ impl From<WFMachinesError> for RunUpdateErr {
490
+ fn from(e: WFMachinesError) -> Self {
491
+ RunUpdateErr {
492
+ source: e,
493
+ complete_resp_chan: None,
494
+ }
495
+ }
496
+ }
497
+
498
+ /// Manages an instance of a [WorkflowMachines], which is not thread-safe, as well as other data
499
+ /// associated with that specific workflow run.
500
+ pub(crate) struct WorkflowManager {
501
+ machines: WorkflowMachines,
502
+ /// Is always `Some` in normal operation. Optional to allow for unit testing with the test
503
+ /// workflow driver, which does not need to complete activations the normal way.
504
+ command_sink: Option<Sender<Vec<WFCommand>>>,
505
+ }
506
+
507
+ impl WorkflowManager {
508
+ /// Create a new workflow manager given workflow history and execution info as would be found
509
+ /// in [PollWorkflowTaskQueueResponse]
510
+ pub fn new(
511
+ history: HistoryUpdate,
512
+ namespace: String,
513
+ workflow_id: String,
514
+ workflow_type: String,
515
+ run_id: String,
516
+ metrics: MetricsContext,
517
+ ) -> Self {
518
+ let (wfb, cmd_sink) = WorkflowBridge::new();
519
+ let state_machines = WorkflowMachines::new(
520
+ namespace,
521
+ workflow_id,
522
+ workflow_type,
523
+ run_id,
524
+ history,
525
+ Box::new(wfb).into(),
526
+ metrics,
527
+ );
528
+ Self {
529
+ machines: state_machines,
530
+ command_sink: Some(cmd_sink),
531
+ }
532
+ }
533
+
534
+ #[cfg(test)]
535
+ pub const fn new_from_machines(workflow_machines: WorkflowMachines) -> Self {
536
+ Self {
537
+ machines: workflow_machines,
538
+ command_sink: None,
539
+ }
540
+ }
541
+
542
+ /// Given history that was just obtained from the server, pipe it into this workflow's machines.
543
+ ///
544
+ /// Should only be called when a workflow has caught up on replay (or is just beginning). It
545
+ /// will return a workflow activation if one is needed.
546
+ async fn feed_history_from_server(
547
+ &mut self,
548
+ update: HistoryUpdate,
549
+ ) -> Result<WorkflowActivation> {
550
+ self.machines.new_history_from_server(update).await?;
551
+ self.get_next_activation().await
552
+ }
553
+
554
+ /// Let this workflow know that something we've been waiting locally on has resolved, like a
555
+ /// local activity or side effect
556
+ ///
557
+ /// Returns true if the resolution did anything. EX: If the activity is already canceled and
558
+ /// used the TryCancel or Abandon modes, the resolution is uninteresting.
559
+ fn notify_of_local_result(&mut self, resolved: LocalResolution) -> Result<bool> {
560
+ self.machines.local_resolution(resolved)
561
+ }
562
+
563
+ /// Fetch the next workflow activation for this workflow if one is required. Doing so will apply
564
+ /// the next unapplied workflow task if such a sequence exists in history we already know about.
565
+ ///
566
+ /// Callers may also need to call [get_server_commands] after this to issue any pending commands
567
+ /// to the server.
568
+ async fn get_next_activation(&mut self) -> Result<WorkflowActivation> {
569
+ // First check if there are already some pending jobs, which can be a result of replay.
570
+ let activation = self.machines.get_wf_activation();
571
+ if !activation.jobs.is_empty() {
572
+ return Ok(activation);
573
+ }
574
+
575
+ self.machines.apply_next_wft_from_history().await?;
576
+ Ok(self.machines.get_wf_activation())
577
+ }
578
+
579
+ /// If there are no pending jobs for the workflow, apply the next workflow task and check
580
+ /// again if there are any jobs. Importantly, does not *drain* jobs.
581
+ ///
582
+ /// Returns true if there are jobs (before or after applying the next WFT).
583
+ async fn apply_next_task_if_ready(&mut self) -> Result<bool> {
584
+ if self.machines.has_pending_jobs() {
585
+ return Ok(true);
586
+ }
587
+ loop {
588
+ let consumed_events = self.machines.apply_next_wft_from_history().await?;
589
+
590
+ if consumed_events == 0 || !self.machines.replaying || self.machines.has_pending_jobs()
591
+ {
592
+ // Keep applying tasks while there are events, we are still replaying, and there are
593
+ // no jobs
594
+ break;
595
+ }
596
+ }
597
+ Ok(self.machines.has_pending_jobs())
598
+ }
599
+
600
+ /// Typically called after [get_next_activation], use this to retrieve commands to be sent to
601
+ /// the server which have been generated by the machines. Does *not* drain those commands.
602
+ /// See [WorkflowMachines::get_commands].
603
+ fn get_server_commands(&self) -> OutgoingServerCommands {
604
+ OutgoingServerCommands {
605
+ commands: self.machines.get_commands(),
606
+ replaying: self.machines.replaying,
607
+ }
608
+ }
609
+
610
+ /// Remove and return all queued local activities. Once this is called, they need to be
611
+ /// dispatched for execution.
612
+ fn drain_queued_local_activities(&mut self) -> Vec<LocalActRequest> {
613
+ self.machines.drain_queued_local_activities()
614
+ }
615
+
616
+ /// Feed the workflow machines new commands issued by the executing workflow code, and iterate
617
+ /// the machines.
618
+ async fn push_commands(&mut self, cmds: Vec<WFCommand>) -> Result<()> {
619
+ if let Some(cs) = self.command_sink.as_mut() {
620
+ cs.send(cmds).map_err(|_| {
621
+ WFMachinesError::Fatal("Internal error buffering workflow commands".to_string())
622
+ })?;
623
+ }
624
+ self.machines.iterate_machines().await?;
625
+ Ok(())
626
+ }
627
+ }