@temporalio/core-bridge 1.5.2 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/Cargo.lock +255 -48
  2. package/package.json +4 -4
  3. package/releases/aarch64-apple-darwin/index.node +0 -0
  4. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  5. package/releases/x86_64-apple-darwin/index.node +0 -0
  6. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  7. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  8. package/sdk-core/.buildkite/pipeline.yml +1 -3
  9. package/sdk-core/.cargo/config.toml +5 -2
  10. package/sdk-core/.github/workflows/heavy.yml +28 -0
  11. package/sdk-core/Cargo.toml +1 -1
  12. package/sdk-core/README.md +9 -5
  13. package/sdk-core/client/src/lib.rs +211 -36
  14. package/sdk-core/client/src/raw.rs +1 -1
  15. package/sdk-core/client/src/retry.rs +32 -20
  16. package/sdk-core/core/Cargo.toml +23 -9
  17. package/sdk-core/core/src/abstractions.rs +11 -0
  18. package/sdk-core/core/src/core_tests/activity_tasks.rs +6 -5
  19. package/sdk-core/core/src/core_tests/local_activities.rs +263 -22
  20. package/sdk-core/core/src/core_tests/queries.rs +2 -2
  21. package/sdk-core/core/src/core_tests/workflow_tasks.rs +249 -5
  22. package/sdk-core/core/src/ephemeral_server/mod.rs +5 -6
  23. package/sdk-core/core/src/lib.rs +2 -0
  24. package/sdk-core/core/src/protosext/mod.rs +1 -1
  25. package/sdk-core/core/src/telemetry/log_export.rs +1 -1
  26. package/sdk-core/core/src/telemetry/mod.rs +23 -8
  27. package/sdk-core/core/src/test_help/mod.rs +8 -1
  28. package/sdk-core/core/src/worker/activities/local_activities.rs +259 -125
  29. package/sdk-core/core/src/worker/activities.rs +3 -2
  30. package/sdk-core/core/src/worker/mod.rs +53 -26
  31. package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
  32. package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
  33. package/sdk-core/core/src/worker/workflow/history_update.rs +835 -277
  34. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +9 -17
  35. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +3 -5
  36. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +1 -2
  37. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +3 -5
  38. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +1 -2
  39. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +1 -2
  40. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +1 -2
  41. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +73 -51
  42. package/sdk-core/core/src/worker/workflow/machines/mod.rs +3 -3
  43. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +4 -4
  44. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +1 -2
  45. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +3 -5
  46. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +6 -7
  47. package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
  48. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +4 -4
  49. package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
  50. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +89 -58
  51. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +4 -7
  52. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +21 -9
  53. package/sdk-core/core/src/worker/workflow/managed_run.rs +1021 -360
  54. package/sdk-core/core/src/worker/workflow/mod.rs +306 -346
  55. package/sdk-core/core/src/worker/workflow/run_cache.rs +29 -53
  56. package/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
  57. package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
  58. package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +115 -0
  59. package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
  60. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +444 -714
  61. package/sdk-core/core-api/Cargo.toml +2 -0
  62. package/sdk-core/core-api/src/errors.rs +1 -34
  63. package/sdk-core/core-api/src/lib.rs +6 -2
  64. package/sdk-core/core-api/src/worker.rs +14 -1
  65. package/sdk-core/etc/deps.svg +115 -140
  66. package/sdk-core/etc/regen-depgraph.sh +5 -0
  67. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +6 -6
  68. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +7 -3
  69. package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
  70. package/sdk-core/protos/api_upstream/Makefile +5 -5
  71. package/sdk-core/protos/api_upstream/build/go.mod +7 -0
  72. package/sdk-core/protos/api_upstream/build/go.sum +5 -0
  73. package/sdk-core/protos/api_upstream/build/tools.go +29 -0
  74. package/sdk-core/protos/api_upstream/go.mod +6 -0
  75. package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
  76. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +12 -19
  77. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +2 -2
  78. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
  79. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -2
  80. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
  81. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +3 -3
  82. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +20 -2
  83. package/sdk-core/protos/api_upstream/temporal/api/{update/v1/message.proto → enums/v1/interaction_type.proto} +11 -18
  84. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
  85. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
  86. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
  87. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
  88. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
  89. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +2 -13
  90. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
  91. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
  92. package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
  93. package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
  94. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +13 -19
  95. package/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +87 -0
  96. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -2
  97. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +2 -2
  98. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
  99. package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
  100. package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
  101. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
  102. package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
  103. package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
  104. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
  105. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +13 -8
  106. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
  107. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +2 -0
  108. package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
  109. package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
  110. package/sdk-core/sdk/Cargo.toml +4 -3
  111. package/sdk-core/sdk/src/lib.rs +87 -21
  112. package/sdk-core/sdk/src/workflow_future.rs +7 -12
  113. package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
  114. package/sdk-core/sdk-core-protos/build.rs +36 -2
  115. package/sdk-core/sdk-core-protos/src/history_builder.rs +26 -19
  116. package/sdk-core/sdk-core-protos/src/history_info.rs +4 -0
  117. package/sdk-core/sdk-core-protos/src/lib.rs +78 -34
  118. package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
  119. package/sdk-core/test-utils/Cargo.toml +3 -1
  120. package/sdk-core/test-utils/src/histfetch.rs +1 -1
  121. package/sdk-core/test-utils/src/lib.rs +50 -18
  122. package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
  123. package/sdk-core/test-utils/src/workflows.rs +29 -0
  124. package/sdk-core/tests/fuzzy_workflow.rs +130 -0
  125. package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +114 -7
  126. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -2
  127. package/sdk-core/tests/integ_tests/metrics_tests.rs +1 -1
  128. package/sdk-core/tests/integ_tests/polling_tests.rs +1 -39
  129. package/sdk-core/tests/integ_tests/queries_tests.rs +2 -127
  130. package/sdk-core/tests/integ_tests/visibility_tests.rs +52 -5
  131. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +74 -1
  132. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +5 -13
  133. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +1 -1
  134. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +2 -10
  135. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +69 -197
  136. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +4 -28
  137. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +12 -7
  138. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +14 -14
  139. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +3 -19
  140. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +3 -19
  141. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +1 -1
  142. package/sdk-core/tests/integ_tests/workflow_tests.rs +5 -6
  143. package/sdk-core/tests/main.rs +2 -12
  144. package/sdk-core/tests/runner.rs +71 -34
  145. package/sdk-core/tests/wf_input_replay.rs +32 -0
  146. package/sdk-core/bridge-ffi/Cargo.toml +0 -24
  147. package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
  148. package/sdk-core/bridge-ffi/build.rs +0 -25
  149. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
  150. package/sdk-core/bridge-ffi/src/lib.rs +0 -746
  151. package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
  152. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
  153. package/sdk-core/sdk/src/conversions.rs +0 -8
@@ -8,36 +8,43 @@ mod history_update;
8
8
  mod machines;
9
9
  mod managed_run;
10
10
  mod run_cache;
11
+ mod wft_extraction;
11
12
  pub(crate) mod wft_poller;
12
13
  mod workflow_stream;
13
14
 
15
+ #[cfg(feature = "save_wf_inputs")]
16
+ pub use workflow_stream::replay_wf_state_inputs;
17
+
14
18
  pub(crate) use bridge::WorkflowBridge;
15
19
  pub(crate) use driven_workflow::{DrivenWorkflow, WorkflowFetcher};
16
- pub(crate) use history_update::{HistoryPaginator, HistoryUpdate};
17
- pub(crate) use machines::WFMachinesError;
20
+ pub(crate) use history_update::HistoryUpdate;
18
21
  #[cfg(test)]
19
22
  pub(crate) use managed_run::ManagedWFFunc;
20
23
 
21
24
  use crate::{
22
- abstractions::OwnedMeteredSemPermit,
23
- protosext::{legacy_query_failure, ValidPollWFTQResponse, WorkflowActivationExt},
24
- telemetry::VecDisplayer,
25
+ abstractions::{stream_when_allowed, MeteredSemaphore, OwnedMeteredSemPermit},
26
+ protosext::{legacy_query_failure, ValidPollWFTQResponse},
27
+ telemetry::{metrics::workflow_worker_type, VecDisplayer},
25
28
  worker::{
26
- activities::{ActivitiesFromWFTsHandle, PermittedTqResp},
29
+ activities::{ActivitiesFromWFTsHandle, LocalActivityManager, PermittedTqResp},
27
30
  client::{WorkerClient, WorkflowTaskCompletion},
28
31
  workflow::{
29
- managed_run::{ManagedRun, WorkflowManager},
32
+ history_update::HistoryPaginator,
33
+ managed_run::RunUpdateAct,
34
+ wft_extraction::{HistoryFetchReq, WFTExtractor},
30
35
  wft_poller::validate_wft,
31
36
  workflow_stream::{LocalInput, LocalInputs, WFStream},
32
37
  },
33
- LocalActRequest, LocalActivityResolution,
38
+ LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
34
39
  },
35
40
  MetricsContext,
36
41
  };
37
42
  use futures::{stream::BoxStream, Stream, StreamExt};
43
+ use futures_util::stream;
44
+ use prost_types::TimestampError;
38
45
  use std::{
39
- collections::HashSet,
40
- fmt::{Debug, Display, Formatter},
46
+ collections::VecDeque,
47
+ fmt::Debug,
41
48
  future::Future,
42
49
  ops::DerefMut,
43
50
  result,
@@ -59,8 +66,9 @@ use temporal_sdk_core_protos::{
59
66
  },
60
67
  temporal::api::{
61
68
  command::v1::{command::Attributes, Command as ProtoCommand, Command},
62
- common::v1::{Memo, RetryPolicy, SearchAttributes},
69
+ common::v1::{Memo, RetryPolicy, SearchAttributes, WorkflowExecution},
63
70
  enums::v1::WorkflowTaskFailedCause,
71
+ query::v1::WorkflowQuery,
64
72
  taskqueue::v1::StickyExecutionAttributes,
65
73
  workflowservice::v1::PollActivityTaskQueueResponse,
66
74
  },
@@ -68,7 +76,7 @@ use temporal_sdk_core_protos::{
68
76
  };
69
77
  use tokio::{
70
78
  sync::{
71
- mpsc::{unbounded_channel, UnboundedSender},
79
+ mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
72
80
  oneshot,
73
81
  },
74
82
  task,
@@ -79,6 +87,9 @@ use tokio_util::sync::CancellationToken;
79
87
  use tracing::Span;
80
88
 
81
89
  pub(crate) const LEGACY_QUERY_ID: &str = "legacy_query";
90
+ /// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
91
+ /// necessary.
92
+ const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
82
93
  const MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK: usize = 3;
83
94
 
84
95
  type Result<T, E = WFMachinesError> = result::Result<T, E>;
@@ -100,9 +111,11 @@ pub(crate) struct Workflows {
100
111
  sticky_attrs: Option<StickyExecutionAttributes>,
101
112
  /// If set, can be used to reserve activity task slots for eager-return of new activity tasks.
102
113
  activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
114
+ /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
115
+ wft_semaphore: MeteredSemaphore,
103
116
  }
104
117
 
105
- pub(super) struct WorkflowBasics {
118
+ pub(crate) struct WorkflowBasics {
106
119
  pub max_cached_workflows: usize,
107
120
  pub max_outstanding_wfts: usize,
108
121
  pub shutdown_token: CancellationToken,
@@ -110,6 +123,9 @@ pub(super) struct WorkflowBasics {
110
123
  pub namespace: String,
111
124
  pub task_queue: String,
112
125
  pub ignore_evicts_on_shutdown: bool,
126
+ pub fetching_concurrency: usize,
127
+ #[cfg(feature = "save_wf_inputs")]
128
+ pub wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
113
129
  }
114
130
 
115
131
  impl Workflows {
@@ -118,20 +134,38 @@ impl Workflows {
118
134
  sticky_attrs: Option<StickyExecutionAttributes>,
119
135
  client: Arc<dyn WorkerClient>,
120
136
  wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
121
- local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
122
- + Send
123
- + Sync
124
- + 'static,
137
+ local_activity_request_sink: impl LocalActivityRequestSink,
138
+ heartbeat_timeout_rx: UnboundedReceiver<HeartbeatTimeoutMsg>,
125
139
  activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
126
140
  ) -> Self {
127
141
  let (local_tx, local_rx) = unbounded_channel();
142
+ let (fetch_tx, fetch_rx) = unbounded_channel();
128
143
  let shutdown_tok = basics.shutdown_token.clone();
129
144
  let task_queue = basics.task_queue.clone();
130
- let mut stream = WFStream::build(
131
- basics,
145
+ let wft_semaphore = MeteredSemaphore::new(
146
+ basics.max_outstanding_wfts,
147
+ basics.metrics.with_new_attrs([workflow_worker_type()]),
148
+ MetricsContext::available_task_slots,
149
+ );
150
+ // Only allow polling of the new WFT stream if there are available task slots
151
+ let proceeder = stream::unfold(wft_semaphore.clone(), |sem| async move {
152
+ Some((sem.acquire_owned().await.unwrap(), sem))
153
+ });
154
+ let wft_stream = stream_when_allowed(wft_stream, proceeder);
155
+ let extracted_wft_stream = WFTExtractor::build(
156
+ client.clone(),
157
+ basics.fetching_concurrency,
132
158
  wft_stream,
159
+ UnboundedReceiverStream::new(fetch_rx),
160
+ );
161
+ let locals_stream = stream::select(
133
162
  UnboundedReceiverStream::new(local_rx),
134
- client.clone(),
163
+ UnboundedReceiverStream::new(heartbeat_timeout_rx).map(Into::into),
164
+ );
165
+ let mut stream = WFStream::build(
166
+ basics,
167
+ extracted_wft_stream,
168
+ locals_stream,
135
169
  local_activity_request_sink,
136
170
  );
137
171
  let (activation_tx, activation_rx) = unbounded_channel();
@@ -152,10 +186,24 @@ impl Workflows {
152
186
  if !do_poll {
153
187
  return;
154
188
  }
155
- while let Some(act) = stream.next().await {
156
- activation_tx
157
- .send(act)
158
- .expect("Activation processor channel not dropped");
189
+ while let Some(output) = stream.next().await {
190
+ match output {
191
+ Ok(o) => {
192
+ for fetchreq in o.fetch_histories {
193
+ fetch_tx
194
+ .send(fetchreq)
195
+ .expect("Fetch channel must not be dropped");
196
+ }
197
+ for act in o.activations {
198
+ activation_tx
199
+ .send(Ok(act))
200
+ .expect("Activation processor channel not dropped");
201
+ }
202
+ }
203
+ Err(e) => activation_tx
204
+ .send(Err(e))
205
+ .expect("Activation processor channel not dropped"),
206
+ }
159
207
  }
160
208
  });
161
209
  Self {
@@ -169,12 +217,13 @@ impl Workflows {
169
217
  client,
170
218
  sticky_attrs,
171
219
  activity_tasks_handle,
220
+ wft_semaphore,
172
221
  }
173
222
  }
174
223
 
175
224
  pub async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
176
225
  loop {
177
- let r = {
226
+ let al = {
178
227
  let mut lock = self.activation_stream.lock().await;
179
228
  let (ref mut stream, ref mut beginner) = lock.deref_mut();
180
229
  if let Some(beginner) = beginner.take() {
@@ -182,8 +231,8 @@ impl Workflows {
182
231
  }
183
232
  stream.next().await.unwrap_or(Err(PollWfError::ShutDown))?
184
233
  };
185
- Span::current().record("run_id", r.run_id());
186
- match r {
234
+ Span::current().record("run_id", al.run_id());
235
+ match al {
187
236
  ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
188
237
  debug!(activation=%act, "Sending activation to lang");
189
238
  break Ok(act);
@@ -202,7 +251,7 @@ impl Workflows {
202
251
  /// Queue an activation completion for processing, returning a future that will resolve with
203
252
  /// the outcome of that completion. See [ActivationCompletedOutcome].
204
253
  ///
205
- /// Returns the most-recently-processed event number for the run
254
+ /// Returns the most-recently-processed event number for the run.
206
255
  pub async fn activation_completed(
207
256
  &self,
208
257
  completion: WorkflowActivationCompletion,
@@ -213,7 +262,7 @@ impl Workflows {
213
262
  let (tx, rx) = oneshot::channel();
214
263
  let was_sent = self.send_local(WFActCompleteMsg {
215
264
  completion,
216
- response_tx: tx,
265
+ response_tx: Some(tx),
217
266
  });
218
267
  if !was_sent {
219
268
  if is_empty_completion {
@@ -230,7 +279,7 @@ impl Workflows {
230
279
  .await
231
280
  .expect("Send half of activation complete response not dropped");
232
281
  let mut wft_from_complete = None;
233
- let reported_wft_to_server = match completion_outcome.outcome {
282
+ let wft_report_status = match completion_outcome.outcome {
234
283
  ActivationCompleteOutcome::ReportWFTSuccess(report) => match report {
235
284
  ServerCommandsWithWorkflowInfo {
236
285
  task_token,
@@ -273,14 +322,14 @@ impl Workflows {
273
322
  Ok(())
274
323
  })
275
324
  .await;
276
- true
325
+ WFTReportStatus::Reported
277
326
  }
278
327
  ServerCommandsWithWorkflowInfo {
279
328
  task_token,
280
329
  action: ActivationAction::RespondLegacyQuery { result },
281
330
  } => {
282
331
  self.respond_legacy_query(task_token, *result).await;
283
- true
332
+ WFTReportStatus::Reported
284
333
  }
285
334
  },
286
335
  ActivationCompleteOutcome::ReportWFTFail(outcome) => match outcome {
@@ -292,22 +341,39 @@ impl Workflows {
292
341
  .await
293
342
  })
294
343
  .await;
295
- true
344
+ WFTReportStatus::Reported
296
345
  }
297
346
  FailedActivationWFTReport::ReportLegacyQueryFailure(task_token, failure) => {
298
347
  warn!(run_id=%run_id, failure=?failure, "Failing legacy query request");
299
348
  self.respond_legacy_query(task_token, legacy_query_failure(failure))
300
349
  .await;
301
- true
350
+ WFTReportStatus::Reported
302
351
  }
303
352
  },
304
- ActivationCompleteOutcome::DoNothing => false,
353
+ ActivationCompleteOutcome::WFTFailedDontReport => WFTReportStatus::DropWft,
354
+ ActivationCompleteOutcome::DoNothing => WFTReportStatus::NotReported,
355
+ };
356
+
357
+ let maybe_pwft = if let Some(wft) = wft_from_complete {
358
+ match HistoryPaginator::from_poll(wft, self.client.clone()).await {
359
+ Ok((paginator, pwft)) => Some((pwft, paginator)),
360
+ Err(e) => {
361
+ self.request_eviction(
362
+ &run_id,
363
+ format!("Failed to paginate workflow task from completion: {e:?}"),
364
+ EvictionReason::Fatal,
365
+ );
366
+ None
367
+ }
368
+ }
369
+ } else {
370
+ None
305
371
  };
306
372
 
307
373
  self.post_activation(PostActivationMsg {
308
374
  run_id,
309
- reported_wft_to_server,
310
- wft_from_complete,
375
+ wft_report_status,
376
+ wft_from_complete: maybe_pwft,
311
377
  });
312
378
 
313
379
  Ok(completion_outcome.most_recently_processed_event)
@@ -342,12 +408,16 @@ impl Workflows {
342
408
  async move { rx.await.ok() }
343
409
  }
344
410
 
411
+ pub fn available_wft_permits(&self) -> usize {
412
+ self.wft_semaphore.available_permits()
413
+ }
414
+
345
415
  pub async fn shutdown(&self) -> Result<(), JoinError> {
346
416
  let maybe_jh = self.processing_task.lock().await.take();
347
417
  if let Some(jh) = maybe_jh {
348
418
  // This acts as a final wake up in case the stream is still alive and wouldn't otherwise
349
419
  // receive another message. It allows it to shut itself down.
350
- let _ = self.get_state_info();
420
+ let _ = self.get_state_info().await;
351
421
  jh.await
352
422
  } else {
353
423
  Ok(())
@@ -393,7 +463,11 @@ impl Workflows {
393
463
  /// successfully.
394
464
  fn send_local(&self, msg: impl Into<LocalInputs>) -> bool {
395
465
  let msg = msg.into();
396
- let print_err = !matches!(msg, LocalInputs::GetStateInfo(_));
466
+ let print_err = match &msg {
467
+ LocalInputs::GetStateInfo(_) => false,
468
+ LocalInputs::LocalResolution(lr) if lr.res.is_la_cancel_confirmation() => false,
469
+ _ => true,
470
+ };
397
471
  if let Err(e) = self.local_tx.send(LocalInput {
398
472
  input: msg,
399
473
  span: Span::current(),
@@ -509,186 +583,30 @@ impl Workflows {
509
583
  }
510
584
  }
511
585
 
512
- /// Manages access to a specific workflow run, and contains various bookkeeping information that the
513
- /// [WFStream] may need to access quickly.
514
- #[derive(derive_more::DebugCustom)]
515
- #[debug(
516
- fmt = "ManagedRunHandle {{ wft: {:?}, activation: {:?}, buffered_resp: {:?} \
517
- have_seen_terminal_event: {}, most_recently_processed_event: {}, more_pending_work: {}, \
518
- trying_to_evict: {}, last_action_acked: {} }}",
519
- wft,
520
- activation,
521
- buffered_resp,
522
- have_seen_terminal_event,
523
- most_recently_processed_event_number,
524
- more_pending_work,
525
- "trying_to_evict.is_some()",
526
- last_action_acked
586
+ /// Returned when a cache miss happens and we need to fetch history from the beginning to
587
+ /// replay a run
588
+ #[derive(Debug, derive_more::Display)]
589
+ #[display(
590
+ fmt = "CacheMissFetchReq(run_id: {})",
591
+ "original_wft.work.execution.run_id"
527
592
  )]
528
- struct ManagedRunHandle {
529
- /// If set, the WFT this run is currently/will be processing.
530
- wft: Option<OutstandingTask>,
531
- /// An outstanding activation to lang
532
- activation: Option<OutstandingActivation>,
533
- /// If set, it indicates there is a buffered poll response from the server that applies to this
534
- /// run. This can happen when lang takes too long to complete a task and the task times out, for
535
- /// example. Upon next completion, the buffered response will be removed and can be made ready
536
- /// to be returned from polling
537
- buffered_resp: Option<PermittedWFT>,
538
- /// True if this machine has seen an event which ends the execution
539
- have_seen_terminal_event: bool,
540
- /// The most recently processed event id this machine has seen. 0 means it has seen nothing.
541
- most_recently_processed_event_number: usize,
542
- /// Is set true when the machines indicate that there is additional known work to be processed
543
- more_pending_work: bool,
544
- /// Is set if an eviction has been requested for this run
545
- trying_to_evict: Option<RequestEvictMsg>,
546
- /// Set to true if the last action we tried to take to this run has been processed (ie: the
547
- /// [RunUpdateResponse] for it has been seen.
548
- last_action_acked: bool,
549
- /// For sending work to the machines
550
- run_actions_tx: UnboundedSender<RunAction>,
551
- /// Handle to the task where the actual machines live
552
- handle: JoinHandle<()>,
553
-
554
- /// We track if we have recorded useful debugging values onto a certain span yet, to overcome
555
- /// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
556
- /// is fixed.
557
- recorded_span_ids: HashSet<tracing::Id>,
558
- metrics: MetricsContext,
593
+ #[must_use]
594
+ struct CacheMissFetchReq {
595
+ original_wft: PermittedWFT,
596
+ }
597
+ /// Bubbled up from inside workflow state if we're trying to apply the next workflow task but it
598
+ /// isn't in memory
599
+ #[derive(Debug)]
600
+ #[must_use]
601
+ struct NextPageReq {
602
+ paginator: HistoryPaginator,
603
+ span: Span,
559
604
  }
560
- impl ManagedRunHandle {
561
- fn new(
562
- wfm: WorkflowManager,
563
- activations_tx: UnboundedSender<RunUpdateResponse>,
564
- local_activity_request_sink: LocalActivityRequestSink,
565
- metrics: MetricsContext,
566
- ) -> Self {
567
- let (run_actions_tx, run_actions_rx) = unbounded_channel();
568
- let managed = ManagedRun::new(wfm, activations_tx, local_activity_request_sink);
569
- let handle = tokio::task::spawn(managed.run(run_actions_rx));
570
- Self {
571
- wft: None,
572
- activation: None,
573
- buffered_resp: None,
574
- have_seen_terminal_event: false,
575
- most_recently_processed_event_number: 0,
576
- more_pending_work: false,
577
- trying_to_evict: None,
578
- last_action_acked: true,
579
- run_actions_tx,
580
- handle,
581
- recorded_span_ids: Default::default(),
582
- metrics,
583
- }
584
- }
585
-
586
- fn incoming_wft(&mut self, wft: NewIncomingWFT) {
587
- if self.wft.is_some() {
588
- error!("Trying to send a new WFT for a run which already has one!");
589
- }
590
- self.send_run_action(RunActions::NewIncomingWFT(wft));
591
- }
592
- fn check_more_activations(&mut self) {
593
- // No point in checking for more activations if we have not acked the last update, or
594
- // if there's already an outstanding activation.
595
- if self.last_action_acked && self.activation.is_none() {
596
- self.send_run_action(RunActions::CheckMoreWork {
597
- want_to_evict: self.trying_to_evict.clone(),
598
- has_pending_queries: self
599
- .wft
600
- .as_ref()
601
- .map(|wft| !wft.pending_queries.is_empty())
602
- .unwrap_or_default(),
603
- has_wft: self.wft.is_some(),
604
- });
605
- }
606
- }
607
- fn send_completion(&mut self, c: RunActivationCompletion) {
608
- self.send_run_action(RunActions::ActivationCompletion(c));
609
- }
610
- fn send_local_resolution(&mut self, r: LocalResolution) {
611
- self.send_run_action(RunActions::LocalResolution(r));
612
- }
613
-
614
- fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
615
- let act_type = match &act {
616
- ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
617
- if act.is_legacy_query() {
618
- OutstandingActivation::LegacyQuery
619
- } else {
620
- OutstandingActivation::Normal {
621
- contains_eviction: act.eviction_index().is_some(),
622
- num_jobs: act.jobs.len(),
623
- }
624
- }
625
- }
626
- ActivationOrAuto::Autocomplete { .. } => OutstandingActivation::Autocomplete,
627
- };
628
- if let Some(old_act) = self.activation {
629
- // This is a panic because we have screwed up core logic if this is violated. It must be
630
- // upheld.
631
- panic!(
632
- "Attempted to insert a new outstanding activation {:?}, but there already was \
633
- one outstanding: {:?}",
634
- act, old_act
635
- );
636
- }
637
- self.activation = Some(act_type);
638
- }
639
-
640
- fn send_run_action(&mut self, action: RunActions) {
641
- self.last_action_acked = false;
642
- self.run_actions_tx
643
- .send(RunAction {
644
- action,
645
- trace_span: Span::current(),
646
- })
647
- .expect("Receive half of run actions not dropped");
648
- }
649
-
650
- /// Returns true if the managed run has any form of pending work
651
- /// If `ignore_evicts` is true, pending evictions do not count as pending work.
652
- /// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
653
- fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
654
- let evict_work = if ignore_evicts {
655
- false
656
- } else {
657
- self.trying_to_evict.is_some()
658
- };
659
- let act_work = if ignore_evicts {
660
- if let Some(ref act) = self.activation {
661
- !act.has_only_eviction()
662
- } else {
663
- false
664
- }
665
- } else {
666
- self.activation.is_some()
667
- };
668
- let buffered = if ignore_buffered {
669
- false
670
- } else {
671
- self.buffered_resp.is_some()
672
- };
673
- self.wft.is_some()
674
- || buffered
675
- || !self.last_action_acked
676
- || self.more_pending_work
677
- || act_work
678
- || evict_work
679
- }
680
605
 
681
- /// Returns true if the handle is currently processing a WFT which contains a legacy query.
682
- fn pending_work_is_legacy_query(&self) -> bool {
683
- // Either we know because there is a pending legacy query, or it's already been drained and
684
- // sent as an activation.
685
- matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
686
- || self
687
- .wft
688
- .as_ref()
689
- .map(|t| t.has_pending_legacy_query())
690
- .unwrap_or_default()
691
- }
606
+ #[derive(Debug)]
607
+ struct WFStreamOutput {
608
+ activations: VecDeque<ActivationOrAuto>,
609
+ fetch_histories: VecDeque<HistoryFetchReq>,
692
610
  }
693
611
 
694
612
  #[derive(Debug, derive_more::Display)]
@@ -697,6 +615,7 @@ enum ActivationOrAuto {
697
615
  /// This type should only be filled with an empty activation which is ready to have queries
698
616
  /// inserted into the joblist
699
617
  ReadyForQueries(WorkflowActivation),
618
+ #[display(fmt = "Autocomplete(run_id={run_id})")]
700
619
  Autocomplete {
701
620
  run_id: String,
702
621
  },
@@ -711,11 +630,48 @@ impl ActivationOrAuto {
711
630
  }
712
631
  }
713
632
 
633
+ /// A processed WFT which has been validated and had a history update extracted from it
714
634
  #[derive(derive_more::DebugCustom)]
715
- #[debug(fmt = "PermittedWft {{ {:?} }}", wft)]
635
+ #[cfg_attr(
636
+ feature = "save_wf_inputs",
637
+ derive(serde::Serialize, serde::Deserialize)
638
+ )]
639
+ #[debug(fmt = "PermittedWft({work:?})")]
716
640
  pub(crate) struct PermittedWFT {
717
- wft: ValidPollWFTQResponse,
641
+ work: PreparedWFT,
642
+ #[cfg_attr(
643
+ feature = "save_wf_inputs",
644
+ serde(skip, default = "OwnedMeteredSemPermit::fake_deserialized")
645
+ )]
718
646
  permit: OwnedMeteredSemPermit,
647
+ #[cfg_attr(
648
+ feature = "save_wf_inputs",
649
+ serde(skip, default = "HistoryPaginator::fake_deserialized")
650
+ )]
651
+ paginator: HistoryPaginator,
652
+ }
653
+ #[derive(Debug)]
654
+ #[cfg_attr(
655
+ feature = "save_wf_inputs",
656
+ derive(serde::Serialize, serde::Deserialize)
657
+ )]
658
+ struct PreparedWFT {
659
+ task_token: TaskToken,
660
+ attempt: u32,
661
+ execution: WorkflowExecution,
662
+ workflow_type: String,
663
+ legacy_query: Option<WorkflowQuery>,
664
+ query_requests: Vec<QueryWorkflow>,
665
+ update: HistoryUpdate,
666
+ }
667
+ impl PreparedWFT {
668
+ /// Returns true if the contained history update is incremental (IE: expects to hit a cached
669
+ /// workflow)
670
+ pub fn is_incremental(&self) -> bool {
671
+ let start_event_id = self.update.first_event_id();
672
+ let poll_resp_is_incremental = start_event_id.map(|eid| eid > 1).unwrap_or_default();
673
+ poll_resp_is_incremental || start_event_id.is_none()
674
+ }
719
675
  }
720
676
 
721
677
  #[derive(Debug)]
@@ -811,44 +767,74 @@ pub(crate) enum ActivationAction {
811
767
  RespondLegacyQuery { result: Box<QueryResult> },
812
768
  }
813
769
 
814
- #[derive(Debug, Eq, PartialEq, Hash)]
815
- pub(crate) enum EvictionRequestResult {
816
- EvictionRequested(Option<u32>),
770
+ #[derive(Debug)]
771
+ enum EvictionRequestResult {
772
+ EvictionRequested(Option<u32>, RunUpdateAct),
817
773
  NotFound,
818
774
  EvictionAlreadyRequested(Option<u32>),
819
775
  }
776
+ impl EvictionRequestResult {
777
+ fn into_run_update_resp(self) -> RunUpdateAct {
778
+ match self {
779
+ EvictionRequestResult::EvictionRequested(_, resp) => resp,
780
+ EvictionRequestResult::NotFound
781
+ | EvictionRequestResult::EvictionAlreadyRequested(_) => None,
782
+ }
783
+ }
784
+ }
820
785
 
821
786
  #[derive(Debug)]
822
787
  #[allow(dead_code)] // Not always used in non-test
823
788
  pub(crate) struct WorkflowStateInfo {
824
789
  pub cached_workflows: usize,
825
790
  pub outstanding_wft: usize,
826
- pub available_wft_permits: usize,
827
791
  }
828
792
 
829
793
  #[derive(Debug)]
794
+ #[cfg_attr(
795
+ feature = "save_wf_inputs",
796
+ derive(serde::Serialize, serde::Deserialize)
797
+ )]
830
798
  struct WFActCompleteMsg {
831
799
  completion: ValidatedCompletion,
832
- response_tx: oneshot::Sender<ActivationCompleteResult>,
800
+ #[cfg_attr(feature = "save_wf_inputs", serde(skip))]
801
+ response_tx: Option<oneshot::Sender<ActivationCompleteResult>>,
833
802
  }
834
803
  #[derive(Debug)]
804
+ #[cfg_attr(
805
+ feature = "save_wf_inputs",
806
+ derive(serde::Serialize, serde::Deserialize)
807
+ )]
835
808
  struct LocalResolutionMsg {
836
809
  run_id: String,
837
810
  res: LocalResolution,
838
811
  }
839
812
  #[derive(Debug)]
813
+ #[cfg_attr(
814
+ feature = "save_wf_inputs",
815
+ derive(serde::Serialize, serde::Deserialize)
816
+ )]
840
817
  struct PostActivationMsg {
841
818
  run_id: String,
842
- reported_wft_to_server: bool,
843
- wft_from_complete: Option<ValidPollWFTQResponse>,
819
+ wft_report_status: WFTReportStatus,
820
+ wft_from_complete: Option<(PreparedWFT, HistoryPaginator)>,
844
821
  }
845
822
  #[derive(Debug, Clone)]
823
+ #[cfg_attr(
824
+ feature = "save_wf_inputs",
825
+ derive(serde::Serialize, serde::Deserialize)
826
+ )]
846
827
  struct RequestEvictMsg {
847
828
  run_id: String,
848
829
  message: String,
849
830
  reason: EvictionReason,
850
831
  }
851
832
  #[derive(Debug)]
833
+ pub(crate) struct HeartbeatTimeoutMsg {
834
+ pub(crate) run_id: String,
835
+ pub(crate) span: Span,
836
+ }
837
+ #[derive(Debug)]
852
838
  struct GetStateInfoMsg {
853
839
  response_tx: oneshot::Sender<WorkflowStateInfo>,
854
840
  }
@@ -869,16 +855,24 @@ enum ActivationCompleteOutcome {
869
855
  ReportWFTFail(FailedActivationWFTReport),
870
856
  /// There's nothing to do right now. EX: The workflow needs to keep replaying.
871
857
  DoNothing,
858
+ /// The workflow task failed, but we shouldn't report it. EX: We have failed 2 or more attempts
859
+ /// in a row.
860
+ WFTFailedDontReport,
872
861
  }
873
- #[derive(Debug)]
874
- struct FulfillableActivationComplete {
875
- result: ActivationCompleteResult,
876
- resp_chan: oneshot::Sender<ActivationCompleteResult>,
877
- }
878
- impl FulfillableActivationComplete {
879
- fn fulfill(self) {
880
- let _ = self.resp_chan.send(self.result);
881
- }
862
+ /// Did we report, or not, completion of a WFT to server?
863
+ #[derive(Debug, Copy, Clone)]
864
+ #[cfg_attr(
865
+ feature = "save_wf_inputs",
866
+ derive(serde::Serialize, serde::Deserialize)
867
+ )]
868
+ enum WFTReportStatus {
869
+ Reported,
870
+ /// The WFT completion was not reported when finishing the activation, because there's still
871
+ /// work to be done. EX: Running LAs.
872
+ NotReported,
873
+ /// We didn't report, but we want to clear the outstanding workflow task anyway. See
874
+ /// [ActivationCompleteOutcome::WFTFailedDontReport]
875
+ DropWft,
882
876
  }
883
877
 
884
878
  fn validate_completion(
@@ -908,8 +902,7 @@ fn validate_completion(
908
902
  reason: format!(
909
903
  "Workflow completion had a legacy query response along with other \
910
904
  commands. This is not allowed and constitutes an error in the \
911
- lang SDK. Commands: {:?}",
912
- commands
905
+ lang SDK. Commands: {commands:?}"
913
906
  ),
914
907
  run_id: completion.run_id,
915
908
  });
@@ -934,6 +927,10 @@ fn validate_completion(
934
927
  }
935
928
 
936
929
  #[derive(Debug)]
930
+ #[cfg_attr(
931
+ feature = "save_wf_inputs",
932
+ derive(serde::Serialize, serde::Deserialize)
933
+ )]
937
934
  #[allow(clippy::large_enum_variant)]
938
935
  enum ValidatedCompletion {
939
936
  Success {
@@ -955,112 +952,6 @@ impl ValidatedCompletion {
955
952
  }
956
953
  }
957
954
 
958
- /// Input to run tasks, sent to [ManagedRun]s via [ManagedRunHandle]s
959
- #[derive(Debug)]
960
- struct RunAction {
961
- action: RunActions,
962
- trace_span: Span,
963
- }
964
- #[derive(Debug)]
965
- #[allow(clippy::large_enum_variant)]
966
- enum RunActions {
967
- NewIncomingWFT(NewIncomingWFT),
968
- ActivationCompletion(RunActivationCompletion),
969
- CheckMoreWork {
970
- want_to_evict: Option<RequestEvictMsg>,
971
- has_pending_queries: bool,
972
- has_wft: bool,
973
- },
974
- LocalResolution(LocalResolution),
975
- HeartbeatTimeout,
976
- }
977
- #[derive(Debug)]
978
- struct NewIncomingWFT {
979
- /// This field is only populated if the machines already exist. Otherwise the machines
980
- /// are instantiated with the workflow history.
981
- history_update: Option<HistoryUpdate>,
982
- /// Wft start time
983
- start_time: Instant,
984
- }
985
- #[derive(Debug)]
986
- struct RunActivationCompletion {
987
- task_token: TaskToken,
988
- start_time: Instant,
989
- commands: Vec<WFCommand>,
990
- activation_was_eviction: bool,
991
- activation_was_only_eviction: bool,
992
- has_pending_query: bool,
993
- query_responses: Vec<QueryResult>,
994
- /// Used to notify the worker when the completion is done processing and the completion can
995
- /// unblock. Must always be `Some` when initialized.
996
- resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
997
- }
998
-
999
- /// A response from a [ManagedRun] held by a [ManagedRunHandle]
1000
- #[derive(Debug)]
1001
- struct RunUpdateResponse {
1002
- kind: RunUpdateResponseKind,
1003
- span: Span,
1004
- }
1005
- #[derive(Debug, derive_more::Display)]
1006
- #[allow(clippy::large_enum_variant)]
1007
- enum RunUpdateResponseKind {
1008
- Good(GoodRunUpdate),
1009
- Fail(FailRunUpdate),
1010
- }
1011
- impl RunUpdateResponseKind {
1012
- pub(crate) fn run_id(&self) -> &str {
1013
- match self {
1014
- RunUpdateResponseKind::Good(g) => &g.run_id,
1015
- RunUpdateResponseKind::Fail(f) => &f.run_id,
1016
- }
1017
- }
1018
- }
1019
-
1020
- #[derive(Debug)]
1021
- struct GoodRunUpdate {
1022
- run_id: String,
1023
- outgoing_activation: Option<ActivationOrAuto>,
1024
- fulfillable_complete: Option<FulfillableActivationComplete>,
1025
- have_seen_terminal_event: bool,
1026
- /// Is true if there are more jobs that need to be sent to lang
1027
- more_pending_work: bool,
1028
- most_recently_processed_event_number: usize,
1029
- /// Is true if this update was in response to a new WFT
1030
- in_response_to_wft: bool,
1031
- }
1032
- impl Display for GoodRunUpdate {
1033
- fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1034
- write!(
1035
- f,
1036
- "GoodRunUpdate(run_id: {}, outgoing_activation: {}, more_pending_work: {})",
1037
- self.run_id,
1038
- if let Some(og) = self.outgoing_activation.as_ref() {
1039
- format!("{}", og)
1040
- } else {
1041
- "None".to_string()
1042
- },
1043
- self.more_pending_work
1044
- )
1045
- }
1046
- }
1047
- #[derive(Debug)]
1048
- pub(crate) struct FailRunUpdate {
1049
- run_id: String,
1050
- err: WFMachinesError,
1051
- /// This is populated if the run update failed while processing a completion - and thus we
1052
- /// must respond down it when handling the failure.
1053
- completion_resp: Option<oneshot::Sender<ActivationCompleteResult>>,
1054
- }
1055
- impl Display for FailRunUpdate {
1056
- fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1057
- write!(
1058
- f,
1059
- "FailRunUpdate(run_id: {}, error: {:?})",
1060
- self.run_id, self.err
1061
- )
1062
- }
1063
- }
1064
955
  #[derive(Debug)]
1065
956
  pub struct OutgoingServerCommands {
1066
957
  pub commands: Vec<ProtoCommand>,
@@ -1068,9 +959,22 @@ pub struct OutgoingServerCommands {
1068
959
  }
1069
960
 
1070
961
  #[derive(Debug)]
962
+ #[cfg_attr(
963
+ feature = "save_wf_inputs",
964
+ derive(serde::Serialize, serde::Deserialize)
965
+ )]
1071
966
  pub(crate) enum LocalResolution {
1072
967
  LocalActivity(LocalActivityResolution),
1073
968
  }
969
+ impl LocalResolution {
970
+ pub fn is_la_cancel_confirmation(&self) -> bool {
971
+ match self {
972
+ LocalResolution::LocalActivity(lar) => {
973
+ matches!(lar.result, LocalActivityExecutionResult::Cancelled(_))
974
+ }
975
+ }
976
+ }
977
+ }
1074
978
 
1075
979
  #[derive(thiserror::Error, Debug, derive_more::From)]
1076
980
  #[error("Lang provided workflow command with empty variant")]
@@ -1079,6 +983,10 @@ pub struct EmptyWorkflowCommandErr;
1079
983
  /// [DrivenWorkflow]s respond with these when called, to indicate what they want to do next.
1080
984
  /// EX: Create a new timer, complete the workflow, etc.
1081
985
  #[derive(Debug, derive_more::From, derive_more::Display)]
986
+ #[cfg_attr(
987
+ feature = "save_wf_inputs",
988
+ derive(serde::Serialize, serde::Deserialize)
989
+ )]
1082
990
  #[allow(clippy::large_enum_variant)]
1083
991
  pub enum WFCommand {
1084
992
  /// Returned when we need to wait for the lang sdk to send us something
@@ -1171,12 +1079,9 @@ pub struct WorkflowStartedInfo {
1171
1079
  retry_policy: Option<RetryPolicy>,
1172
1080
  }
1173
1081
 
1174
- type LocalActivityRequestSink =
1175
- Arc<dyn Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution> + Send + Sync>;
1176
-
1177
1082
  /// Wraps outgoing activation job protos with some internal details core might care about
1178
1083
  #[derive(Debug, derive_more::Display)]
1179
- #[display(fmt = "{}", variant)]
1084
+ #[display(fmt = "{variant}")]
1180
1085
  struct OutgoingJob {
1181
1086
  variant: workflow_activation_job::Variant,
1182
1087
  /// Since LA resolutions are not distinguished from non-LA resolutions as far as lang is
@@ -1198,3 +1103,58 @@ impl From<OutgoingJob> for WorkflowActivationJob {
1198
1103
  }
1199
1104
  }
1200
1105
  }
1106
+
1107
+ /// Errors thrown inside of workflow machines
1108
+ #[derive(thiserror::Error, Debug)]
1109
+ pub(crate) enum WFMachinesError {
1110
+ #[error("Nondeterminism error: {0}")]
1111
+ Nondeterminism(String),
1112
+ #[error("Fatal error in workflow machines: {0}")]
1113
+ Fatal(String),
1114
+ }
1115
+
1116
+ impl WFMachinesError {
1117
+ pub fn evict_reason(&self) -> EvictionReason {
1118
+ match self {
1119
+ WFMachinesError::Nondeterminism(_) => EvictionReason::Nondeterminism,
1120
+ WFMachinesError::Fatal(_) => EvictionReason::Fatal,
1121
+ }
1122
+ }
1123
+ }
1124
+
1125
+ impl From<TimestampError> for WFMachinesError {
1126
+ fn from(_: TimestampError) -> Self {
1127
+ Self::Fatal("Could not decode timestamp".to_string())
1128
+ }
1129
+ }
1130
+
1131
+ pub(crate) trait LocalActivityRequestSink: Send + Sync + 'static {
1132
+ fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution>;
1133
+ }
1134
+
1135
+ #[derive(derive_more::Constructor)]
1136
+ pub(super) struct LAReqSink {
1137
+ lam: Arc<LocalActivityManager>,
1138
+ /// If we're recording WF inputs, we also need to store immediate resolutions so they're
1139
+ /// available on replay.
1140
+ #[allow(dead_code)] // sometimes appears unused due to feature flagging
1141
+ recorder: Option<UnboundedSender<Vec<u8>>>,
1142
+ }
1143
+
1144
+ impl LocalActivityRequestSink for LAReqSink {
1145
+ fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution> {
1146
+ if reqs.is_empty() {
1147
+ return vec![];
1148
+ }
1149
+
1150
+ #[allow(clippy::let_and_return)] // When feature is off clippy doesn't like this
1151
+ let res = self.lam.enqueue(reqs);
1152
+
1153
+ // We always save when there are any reqs, even if the response might be empty, so that
1154
+ // calls/responses are 1:1
1155
+ #[cfg(feature = "save_wf_inputs")]
1156
+ self.write_req(&res);
1157
+
1158
+ res
1159
+ }
1160
+ }