@temporalio/core-bridge 1.6.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/Cargo.lock +520 -456
  2. package/lib/index.d.ts +8 -6
  3. package/lib/index.js.map +1 -1
  4. package/package.json +8 -3
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.buildkite/docker/Dockerfile +2 -2
  11. package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
  12. package/sdk-core/.buildkite/pipeline.yml +1 -1
  13. package/sdk-core/.github/workflows/heavy.yml +1 -0
  14. package/sdk-core/README.md +13 -7
  15. package/sdk-core/client/src/lib.rs +27 -9
  16. package/sdk-core/client/src/metrics.rs +17 -8
  17. package/sdk-core/client/src/raw.rs +3 -3
  18. package/sdk-core/core/Cargo.toml +3 -4
  19. package/sdk-core/core/src/abstractions/take_cell.rs +28 -0
  20. package/sdk-core/core/src/abstractions.rs +197 -18
  21. package/sdk-core/core/src/core_tests/activity_tasks.rs +137 -45
  22. package/sdk-core/core/src/core_tests/child_workflows.rs +6 -5
  23. package/sdk-core/core/src/core_tests/determinism.rs +212 -2
  24. package/sdk-core/core/src/core_tests/local_activities.rs +183 -36
  25. package/sdk-core/core/src/core_tests/queries.rs +32 -14
  26. package/sdk-core/core/src/core_tests/workers.rs +8 -5
  27. package/sdk-core/core/src/core_tests/workflow_tasks.rs +340 -51
  28. package/sdk-core/core/src/ephemeral_server/mod.rs +110 -8
  29. package/sdk-core/core/src/internal_flags.rs +141 -0
  30. package/sdk-core/core/src/lib.rs +14 -9
  31. package/sdk-core/core/src/replay/mod.rs +16 -27
  32. package/sdk-core/core/src/telemetry/metrics.rs +69 -35
  33. package/sdk-core/core/src/telemetry/mod.rs +38 -14
  34. package/sdk-core/core/src/telemetry/prometheus_server.rs +19 -13
  35. package/sdk-core/core/src/test_help/mod.rs +65 -13
  36. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +119 -160
  37. package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
  38. package/sdk-core/core/src/worker/activities/local_activities.rs +122 -6
  39. package/sdk-core/core/src/worker/activities.rs +347 -173
  40. package/sdk-core/core/src/worker/client/mocks.rs +22 -2
  41. package/sdk-core/core/src/worker/client.rs +18 -2
  42. package/sdk-core/core/src/worker/mod.rs +137 -44
  43. package/sdk-core/core/src/worker/workflow/history_update.rs +132 -51
  44. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +207 -166
  45. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +6 -7
  46. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +6 -7
  47. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +157 -82
  48. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +12 -12
  49. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +6 -7
  50. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +13 -15
  51. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +170 -60
  52. package/sdk-core/core/src/worker/workflow/machines/mod.rs +24 -16
  53. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +6 -8
  54. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +320 -204
  55. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +10 -13
  56. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +15 -23
  57. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +187 -46
  58. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +237 -111
  59. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +13 -13
  60. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +10 -6
  61. package/sdk-core/core/src/worker/workflow/managed_run.rs +81 -62
  62. package/sdk-core/core/src/worker/workflow/mod.rs +341 -79
  63. package/sdk-core/core/src/worker/workflow/run_cache.rs +18 -11
  64. package/sdk-core/core/src/worker/workflow/wft_extraction.rs +15 -3
  65. package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +2 -0
  66. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +75 -52
  67. package/sdk-core/core-api/Cargo.toml +0 -1
  68. package/sdk-core/core-api/src/lib.rs +13 -7
  69. package/sdk-core/core-api/src/telemetry.rs +4 -6
  70. package/sdk-core/core-api/src/worker.rs +5 -0
  71. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +80 -55
  72. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +22 -68
  73. package/sdk-core/histories/ends_empty_wft_complete.bin +0 -0
  74. package/sdk-core/histories/old_change_marker_format.bin +0 -0
  75. package/sdk-core/protos/api_upstream/.github/CODEOWNERS +2 -1
  76. package/sdk-core/protos/api_upstream/Makefile +1 -1
  77. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +5 -17
  78. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +11 -0
  79. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -6
  80. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +6 -6
  81. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +5 -0
  82. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +22 -6
  83. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +48 -19
  84. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -0
  85. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +3 -0
  86. package/sdk-core/protos/api_upstream/temporal/api/{enums/v1/interaction_type.proto → protocol/v1/message.proto} +29 -11
  87. package/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
  88. package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +111 -0
  89. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +59 -28
  90. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
  91. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +7 -8
  92. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +10 -7
  93. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +19 -30
  94. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
  95. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
  96. package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +8 -0
  97. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +65 -60
  98. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +85 -84
  99. package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +9 -3
  100. package/sdk-core/sdk/Cargo.toml +1 -1
  101. package/sdk-core/sdk/src/lib.rs +21 -5
  102. package/sdk-core/sdk/src/workflow_context/options.rs +7 -1
  103. package/sdk-core/sdk/src/workflow_context.rs +24 -17
  104. package/sdk-core/sdk/src/workflow_future.rs +9 -3
  105. package/sdk-core/sdk-core-protos/src/history_builder.rs +114 -89
  106. package/sdk-core/sdk-core-protos/src/history_info.rs +6 -1
  107. package/sdk-core/sdk-core-protos/src/lib.rs +205 -64
  108. package/sdk-core/test-utils/src/canned_histories.rs +106 -296
  109. package/sdk-core/test-utils/src/lib.rs +32 -5
  110. package/sdk-core/tests/heavy_tests.rs +10 -43
  111. package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
  112. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -3
  113. package/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
  114. package/sdk-core/tests/integ_tests/polling_tests.rs +3 -8
  115. package/sdk-core/tests/integ_tests/queries_tests.rs +4 -2
  116. package/sdk-core/tests/integ_tests/visibility_tests.rs +34 -23
  117. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +97 -81
  118. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
  119. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -0
  120. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +80 -3
  121. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +5 -1
  122. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +1 -0
  123. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +25 -3
  124. package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +2 -4
  125. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +30 -0
  126. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +64 -0
  127. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
  128. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +4 -0
  129. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +3 -1
  130. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +7 -2
  131. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -7
  132. package/sdk-core/tests/integ_tests/workflow_tests.rs +8 -8
  133. package/sdk-core/tests/main.rs +16 -25
  134. package/sdk-core/tests/runner.rs +11 -9
  135. package/src/conversions.rs +14 -8
  136. package/src/runtime.rs +9 -8
  137. package/ts/index.ts +8 -6
  138. package/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +0 -87
@@ -22,11 +22,18 @@ pub(crate) use history_update::HistoryUpdate;
22
22
  pub(crate) use managed_run::ManagedWFFunc;
23
23
 
24
24
  use crate::{
25
- abstractions::{stream_when_allowed, MeteredSemaphore, OwnedMeteredSemPermit},
25
+ abstractions::{
26
+ dbg_panic, stream_when_allowed, take_cell::TakeCell, MeteredSemaphore,
27
+ TrackedOwnedMeteredSemPermit, UsedMeteredSemPermit,
28
+ },
29
+ internal_flags::InternalFlags,
26
30
  protosext::{legacy_query_failure, ValidPollWFTQResponse},
27
- telemetry::{metrics::workflow_worker_type, VecDisplayer},
31
+ telemetry::{
32
+ metrics::workflow_worker_type, set_trace_subscriber_for_current_thread, TelemetryInstance,
33
+ VecDisplayer,
34
+ },
28
35
  worker::{
29
- activities::{ActivitiesFromWFTsHandle, LocalActivityManager, PermittedTqResp},
36
+ activities::{ActivitiesFromWFTsHandle, LocalActivityManager, TrackedPermittedTqResp},
30
37
  client::{WorkerClient, WorkflowTaskCompletion},
31
38
  workflow::{
32
39
  history_update::HistoryPaginator,
@@ -36,19 +43,26 @@ use crate::{
36
43
  workflow_stream::{LocalInput, LocalInputs, WFStream},
37
44
  },
38
45
  LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
46
+ PostActivateHookData,
39
47
  },
40
48
  MetricsContext,
41
49
  };
50
+ use anyhow::anyhow;
42
51
  use futures::{stream::BoxStream, Stream, StreamExt};
43
- use futures_util::stream;
52
+ use futures_util::{future::abortable, stream};
44
53
  use prost_types::TimestampError;
45
54
  use std::{
55
+ cell::RefCell,
56
+ cmp::Ordering,
46
57
  collections::VecDeque,
47
58
  fmt::Debug,
48
59
  future::Future,
60
+ mem::discriminant,
49
61
  ops::DerefMut,
62
+ rc::Rc,
50
63
  result,
51
- sync::Arc,
64
+ sync::{atomic, atomic::AtomicBool, Arc},
65
+ thread,
52
66
  time::{Duration, Instant},
53
67
  };
54
68
  use temporal_sdk_core_api::errors::{CompleteWfError, PollWfError};
@@ -66,11 +80,12 @@ use temporal_sdk_core_protos::{
66
80
  },
67
81
  temporal::api::{
68
82
  command::v1::{command::Attributes, Command as ProtoCommand, Command},
69
- common::v1::{Memo, RetryPolicy, SearchAttributes, WorkflowExecution},
83
+ common::v1::{Memo, MeteringMetadata, RetryPolicy, SearchAttributes, WorkflowExecution},
70
84
  enums::v1::WorkflowTaskFailedCause,
71
85
  query::v1::WorkflowQuery,
86
+ sdk::v1::WorkflowTaskCompletedMetadata,
72
87
  taskqueue::v1::StickyExecutionAttributes,
73
- workflowservice::v1::PollActivityTaskQueueResponse,
88
+ workflowservice::v1::{get_system_info_response, PollActivityTaskQueueResponse},
74
89
  },
75
90
  TaskToken,
76
91
  };
@@ -79,8 +94,7 @@ use tokio::{
79
94
  mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
80
95
  oneshot,
81
96
  },
82
- task,
83
- task::{JoinError, JoinHandle},
97
+ task::{spawn_blocking, LocalSet},
84
98
  };
85
99
  use tokio_stream::wrappers::UnboundedReceiverStream;
86
100
  use tokio_util::sync::CancellationToken;
@@ -94,12 +108,13 @@ const MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK: usize = 3;
94
108
 
95
109
  type Result<T, E = WFMachinesError> = result::Result<T, E>;
96
110
  type BoxedActivationStream = BoxStream<'static, Result<ActivationOrAuto, PollWfError>>;
111
+ type InternalFlagsRef = Rc<RefCell<InternalFlags>>;
97
112
 
98
113
  /// Centralizes all state related to workflows and workflow tasks
99
114
  pub(crate) struct Workflows {
100
115
  task_queue: String,
101
116
  local_tx: UnboundedSender<LocalInput>,
102
- processing_task: tokio::sync::Mutex<Option<JoinHandle<()>>>,
117
+ processing_task: TakeCell<thread::JoinHandle<()>>,
103
118
  activation_stream: tokio::sync::Mutex<(
104
119
  BoxedActivationStream,
105
120
  // Used to indicate polling may begin
@@ -113,6 +128,8 @@ pub(crate) struct Workflows {
113
128
  activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
114
129
  /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
115
130
  wft_semaphore: MeteredSemaphore,
131
+ local_act_mgr: Arc<LocalActivityManager>,
132
+ ever_polled: AtomicBool,
116
133
  }
117
134
 
118
135
  pub(crate) struct WorkflowBasics {
@@ -124,19 +141,33 @@ pub(crate) struct WorkflowBasics {
124
141
  pub task_queue: String,
125
142
  pub ignore_evicts_on_shutdown: bool,
126
143
  pub fetching_concurrency: usize,
144
+ pub server_capabilities: get_system_info_response::Capabilities,
127
145
  #[cfg(feature = "save_wf_inputs")]
128
146
  pub wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
129
147
  }
130
148
 
149
+ pub(crate) struct RunBasics<'a> {
150
+ pub namespace: String,
151
+ pub workflow_id: String,
152
+ pub workflow_type: String,
153
+ pub run_id: String,
154
+ pub history: HistoryUpdate,
155
+ pub metrics: MetricsContext,
156
+ pub capabilities: &'a get_system_info_response::Capabilities,
157
+ }
158
+
131
159
  impl Workflows {
160
+ #[allow(clippy::too_many_arguments)] // Not much worth combining here
132
161
  pub(super) fn new(
133
162
  basics: WorkflowBasics,
134
163
  sticky_attrs: Option<StickyExecutionAttributes>,
135
164
  client: Arc<dyn WorkerClient>,
136
165
  wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
137
166
  local_activity_request_sink: impl LocalActivityRequestSink,
167
+ local_act_mgr: Arc<LocalActivityManager>,
138
168
  heartbeat_timeout_rx: UnboundedReceiver<HeartbeatTimeoutMsg>,
139
169
  activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
170
+ telem_instance: Option<&TelemetryInstance>,
140
171
  ) -> Self {
141
172
  let (local_tx, local_rx) = unbounded_channel();
142
173
  let (fetch_tx, fetch_rx) = unbounded_channel();
@@ -162,54 +193,67 @@ impl Workflows {
162
193
  UnboundedReceiverStream::new(local_rx),
163
194
  UnboundedReceiverStream::new(heartbeat_timeout_rx).map(Into::into),
164
195
  );
165
- let mut stream = WFStream::build(
166
- basics,
167
- extracted_wft_stream,
168
- locals_stream,
169
- local_activity_request_sink,
170
- );
171
196
  let (activation_tx, activation_rx) = unbounded_channel();
172
197
  let (start_polling_tx, start_polling_rx) = oneshot::channel();
173
198
  // We must spawn a task to constantly poll the activation stream, because otherwise
174
199
  // activation completions would not cause anything to happen until the next poll.
175
- let processing_task = task::spawn(async move {
176
- // However, we want to avoid plowing ahead until we've been asked to poll at least once.
177
- // This supports activity-only workers.
178
- let do_poll = tokio::select! {
179
- sp = start_polling_rx => {
180
- sp.is_ok()
181
- }
182
- _ = shutdown_tok.cancelled() => {
183
- false
184
- }
185
- };
186
- if !do_poll {
187
- return;
200
+ let tracing_sub = telem_instance.map(|ti| ti.trace_subscriber());
201
+ let processing_task = thread::spawn(move || {
202
+ if let Some(ts) = tracing_sub {
203
+ set_trace_subscriber_for_current_thread(ts);
188
204
  }
189
- while let Some(output) = stream.next().await {
190
- match output {
191
- Ok(o) => {
192
- for fetchreq in o.fetch_histories {
193
- fetch_tx
194
- .send(fetchreq)
195
- .expect("Fetch channel must not be dropped");
196
- }
197
- for act in o.activations {
198
- activation_tx
199
- .send(Ok(act))
200
- .expect("Activation processor channel not dropped");
205
+ let rt = tokio::runtime::Builder::new_current_thread()
206
+ .enable_all()
207
+ .thread_name("workflow-processing")
208
+ .build()
209
+ .unwrap();
210
+ let local = LocalSet::new();
211
+ local.block_on(&rt, async move {
212
+ let mut stream = WFStream::build(
213
+ basics,
214
+ extracted_wft_stream,
215
+ locals_stream,
216
+ local_activity_request_sink,
217
+ );
218
+
219
+ // However, we want to avoid plowing ahead until we've been asked to poll at least
220
+ // once. This supports activity-only workers.
221
+ let do_poll = tokio::select! {
222
+ sp = start_polling_rx => {
223
+ sp.is_ok()
224
+ }
225
+ _ = shutdown_tok.cancelled() => {
226
+ false
227
+ }
228
+ };
229
+ if !do_poll {
230
+ return;
231
+ }
232
+ while let Some(output) = stream.next().await {
233
+ match output {
234
+ Ok(o) => {
235
+ for fetchreq in o.fetch_histories {
236
+ fetch_tx
237
+ .send(fetchreq)
238
+ .expect("Fetch channel must not be dropped");
239
+ }
240
+ for act in o.activations {
241
+ activation_tx
242
+ .send(Ok(act))
243
+ .expect("Activation processor channel not dropped");
244
+ }
201
245
  }
246
+ Err(e) => activation_tx
247
+ .send(Err(e))
248
+ .expect("Activation processor channel not dropped"),
202
249
  }
203
- Err(e) => activation_tx
204
- .send(Err(e))
205
- .expect("Activation processor channel not dropped"),
206
250
  }
207
- }
251
+ });
208
252
  });
209
253
  Self {
210
254
  task_queue,
211
255
  local_tx,
212
- processing_task: tokio::sync::Mutex::new(Some(processing_task)),
256
+ processing_task: TakeCell::new(processing_task),
213
257
  activation_stream: tokio::sync::Mutex::new((
214
258
  UnboundedReceiverStream::new(activation_rx).boxed(),
215
259
  Some(start_polling_tx),
@@ -218,10 +262,13 @@ impl Workflows {
218
262
  sticky_attrs,
219
263
  activity_tasks_handle,
220
264
  wft_semaphore,
265
+ local_act_mgr,
266
+ ever_polled: AtomicBool::new(false),
221
267
  }
222
268
  }
223
269
 
224
- pub async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
270
+ pub(super) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
271
+ self.ever_polled.store(true, atomic::Ordering::Release);
225
272
  loop {
226
273
  let al = {
227
274
  let mut lock = self.activation_stream.lock().await;
@@ -233,15 +280,39 @@ impl Workflows {
233
280
  };
234
281
  Span::current().record("run_id", al.run_id());
235
282
  match al {
236
- ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
283
+ ActivationOrAuto::LangActivation(mut act)
284
+ | ActivationOrAuto::ReadyForQueries(mut act) => {
285
+ sort_act_jobs(&mut act);
237
286
  debug!(activation=%act, "Sending activation to lang");
238
287
  break Ok(act);
239
288
  }
240
289
  ActivationOrAuto::Autocomplete { run_id } => {
241
- self.activation_completed(WorkflowActivationCompletion {
242
- run_id,
243
- status: Some(workflow_completion::Success::from_variants(vec![]).into()),
244
- })
290
+ self.activation_completed(
291
+ WorkflowActivationCompletion {
292
+ run_id,
293
+ status: Some(
294
+ workflow_completion::Success::from_variants(vec![]).into(),
295
+ ),
296
+ },
297
+ true,
298
+ // We need to say a type, but the type is irrelevant, so imagine some
299
+ // boxed function we'll never call.
300
+ Option::<Box<dyn Fn(PostActivateHookData) + Send>>::None,
301
+ )
302
+ .await?;
303
+ }
304
+ ActivationOrAuto::AutoFail {
305
+ run_id,
306
+ machines_err,
307
+ } => {
308
+ self.activation_completed(
309
+ WorkflowActivationCompletion {
310
+ run_id,
311
+ status: Some(auto_fail_to_complete_status(machines_err)),
312
+ },
313
+ true,
314
+ Option::<Box<dyn Fn(PostActivateHookData) + Send>>::None,
315
+ )
245
316
  .await?;
246
317
  }
247
318
  }
@@ -252,10 +323,12 @@ impl Workflows {
252
323
  /// the outcome of that completion. See [ActivationCompletedOutcome].
253
324
  ///
254
325
  /// Returns the most-recently-processed event number for the run.
255
- pub async fn activation_completed(
326
+ pub(super) async fn activation_completed(
256
327
  &self,
257
328
  completion: WorkflowActivationCompletion,
258
- ) -> Result<usize, CompleteWfError> {
329
+ is_autocomplete: bool,
330
+ post_activate_hook: Option<impl Fn(PostActivateHookData)>,
331
+ ) -> Result<(), CompleteWfError> {
259
332
  let is_empty_completion = completion.is_empty();
260
333
  let completion = validate_completion(completion)?;
261
334
  let run_id = completion.run_id().to_string();
@@ -267,7 +340,7 @@ impl Workflows {
267
340
  if !was_sent {
268
341
  if is_empty_completion {
269
342
  // Empty complete which is likely an evict reply, we can just ignore.
270
- return Ok(0);
343
+ return Ok(());
271
344
  }
272
345
  panic!(
273
346
  "A non-empty completion was not processed. Workflow processing may have \
@@ -275,9 +348,18 @@ impl Workflows {
275
348
  );
276
349
  }
277
350
 
278
- let completion_outcome = rx
279
- .await
280
- .expect("Send half of activation complete response not dropped");
351
+ let completion_outcome = if let Ok(c) = rx.await {
352
+ c
353
+ } else {
354
+ dbg_panic!("Send half of activation complete response channel went missing");
355
+ self.request_eviction(
356
+ run_id,
357
+ "Send half of activation complete response channel went missing",
358
+ EvictionReason::Fatal,
359
+ );
360
+ return Ok(());
361
+ };
362
+
281
363
  let mut wft_from_complete = None;
282
364
  let wft_report_status = match completion_outcome.outcome {
283
365
  ActivationCompleteOutcome::ReportWFTSuccess(report) => match report {
@@ -288,6 +370,7 @@ impl Workflows {
288
370
  mut commands,
289
371
  query_responses,
290
372
  force_new_wft,
373
+ sdk_metadata,
291
374
  },
292
375
  } => {
293
376
  let reserved_act_permits =
@@ -301,6 +384,13 @@ impl Workflows {
301
384
  sticky_attributes: None,
302
385
  return_new_workflow_task: true,
303
386
  force_create_new_workflow_task: force_new_wft,
387
+ sdk_metadata,
388
+ metering_metadata: MeteringMetadata {
389
+ nonfirst_local_activity_execution_attempts: self
390
+ .local_act_mgr
391
+ .get_nonfirst_attempt_count(&run_id)
392
+ as u32,
393
+ },
304
394
  };
305
395
  let sticky_attrs = self.sticky_attrs.clone();
306
396
  // Do not return new WFT if we would not cache, because returned new WFTs are
@@ -370,17 +460,30 @@ impl Workflows {
370
460
  None
371
461
  };
372
462
 
463
+ if let Some(h) = post_activate_hook {
464
+ h(PostActivateHookData {
465
+ run_id: &run_id,
466
+ most_recent_event: completion_outcome.most_recently_processed_event,
467
+ replaying: completion_outcome.replaying,
468
+ });
469
+ }
470
+
373
471
  self.post_activation(PostActivationMsg {
374
472
  run_id,
375
473
  wft_report_status,
376
474
  wft_from_complete: maybe_pwft,
475
+ is_autocomplete,
377
476
  });
378
477
 
379
- Ok(completion_outcome.most_recently_processed_event)
478
+ Ok(())
380
479
  }
381
480
 
382
481
  /// Tell workflow that a local activity has finished with the provided result
383
- pub fn notify_of_local_result(&self, run_id: impl Into<String>, resolved: LocalResolution) {
482
+ pub(super) fn notify_of_local_result(
483
+ &self,
484
+ run_id: impl Into<String>,
485
+ resolved: LocalResolution,
486
+ ) {
384
487
  self.send_local(LocalResolutionMsg {
385
488
  run_id: run_id.into(),
386
489
  res: resolved,
@@ -388,7 +491,7 @@ impl Workflows {
388
491
  }
389
492
 
390
493
  /// Request eviction of a workflow
391
- pub fn request_eviction(
494
+ pub(super) fn request_eviction(
392
495
  &self,
393
496
  run_id: impl Into<String>,
394
497
  message: impl Into<String>,
@@ -398,30 +501,47 @@ impl Workflows {
398
501
  run_id: run_id.into(),
399
502
  message: message.into(),
400
503
  reason,
504
+ auto_reply_fail_tt: None,
401
505
  });
402
506
  }
403
507
 
404
508
  /// Query the state of workflow management. Can return `None` if workflow state is shut down.
405
- pub fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
509
+ pub(super) fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
406
510
  let (tx, rx) = oneshot::channel();
407
511
  self.send_local(GetStateInfoMsg { response_tx: tx });
408
512
  async move { rx.await.ok() }
409
513
  }
410
514
 
411
- pub fn available_wft_permits(&self) -> usize {
515
+ pub(super) fn available_wft_permits(&self) -> usize {
412
516
  self.wft_semaphore.available_permits()
413
517
  }
414
518
 
415
- pub async fn shutdown(&self) -> Result<(), JoinError> {
416
- let maybe_jh = self.processing_task.lock().await.take();
417
- if let Some(jh) = maybe_jh {
418
- // This acts as a final wake up in case the stream is still alive and wouldn't otherwise
419
- // receive another message. It allows it to shut itself down.
420
- let _ = self.get_state_info().await;
421
- jh.await
422
- } else {
423
- Ok(())
519
+ pub(super) async fn shutdown(&self) -> Result<(), anyhow::Error> {
520
+ if let Some(jh) = self.processing_task.take_once() {
521
+ // This serves to drive the stream if it is still alive and wouldn't otherwise receive
522
+ // another message. It allows it to shut itself down.
523
+ let (waker, stop_waker) = abortable(async {
524
+ let mut interval = tokio::time::interval(Duration::from_millis(10));
525
+ loop {
526
+ interval.tick().await;
527
+ let _ = self.get_state_info().await;
528
+ }
529
+ });
530
+ let (_, jh_res) = tokio::join!(
531
+ waker,
532
+ spawn_blocking(move || {
533
+ let r = jh.join();
534
+ stop_waker.abort();
535
+ r
536
+ })
537
+ );
538
+ jh_res?.map_err(|e| anyhow!("Error joining workflow processing thread: {e:?}"))?;
424
539
  }
540
+ Ok(())
541
+ }
542
+
543
+ pub(super) fn ever_polled(&self) -> bool {
544
+ self.ever_polled.load(atomic::Ordering::Acquire)
425
545
  }
426
546
 
427
547
  /// Must be called after every activation completion has finished
@@ -488,7 +608,7 @@ impl Workflows {
488
608
  /// Process eagerly returned activities from WFT completion
489
609
  fn handle_eager_activities(
490
610
  &self,
491
- reserved_act_permits: Vec<OwnedMeteredSemPermit>,
611
+ reserved_act_permits: Vec<TrackedOwnedMeteredSemPermit>,
492
612
  eager_acts: Vec<PollActivityTaskQueueResponse>,
493
613
  ) {
494
614
  if let Some(at_handle) = self.activity_tasks_handle.as_ref() {
@@ -509,7 +629,7 @@ impl Workflows {
509
629
  let with_permits = reserved_act_permits
510
630
  .into_iter()
511
631
  .zip(eager_acts.into_iter())
512
- .map(|(permit, resp)| PermittedTqResp { permit, resp });
632
+ .map(|(permit, resp)| TrackedPermittedTqResp { permit, resp });
513
633
  if with_permits.len() > 0 {
514
634
  debug!(
515
635
  "Adding {} activity tasks received from WFT complete",
@@ -532,7 +652,7 @@ impl Workflows {
532
652
  fn reserve_activity_slots_for_outgoing_commands(
533
653
  &self,
534
654
  commands: &mut [Command],
535
- ) -> Vec<OwnedMeteredSemPermit> {
655
+ ) -> Vec<TrackedOwnedMeteredSemPermit> {
536
656
  let mut reserved = vec![];
537
657
  for cmd in commands {
538
658
  if let Some(Attributes::ScheduleActivityTaskCommandAttributes(attrs)) =
@@ -619,6 +739,11 @@ enum ActivationOrAuto {
619
739
  Autocomplete {
620
740
  run_id: String,
621
741
  },
742
+ #[display(fmt = "AutoFail(run_id={run_id})")]
743
+ AutoFail {
744
+ run_id: String,
745
+ machines_err: WFMachinesError,
746
+ },
622
747
  }
623
748
  impl ActivationOrAuto {
624
749
  pub fn run_id(&self) -> &str {
@@ -626,6 +751,7 @@ impl ActivationOrAuto {
626
751
  ActivationOrAuto::LangActivation(act) => &act.run_id,
627
752
  ActivationOrAuto::Autocomplete { run_id, .. } => run_id,
628
753
  ActivationOrAuto::ReadyForQueries(act) => &act.run_id,
754
+ ActivationOrAuto::AutoFail { run_id, .. } => run_id,
629
755
  }
630
756
  }
631
757
  }
@@ -641,9 +767,9 @@ pub(crate) struct PermittedWFT {
641
767
  work: PreparedWFT,
642
768
  #[cfg_attr(
643
769
  feature = "save_wf_inputs",
644
- serde(skip, default = "OwnedMeteredSemPermit::fake_deserialized")
770
+ serde(skip, default = "UsedMeteredSemPermit::fake_deserialized")
645
771
  )]
646
- permit: OwnedMeteredSemPermit,
772
+ permit: UsedMeteredSemPermit,
647
773
  #[cfg_attr(
648
774
  feature = "save_wf_inputs",
649
775
  serde(skip, default = "HistoryPaginator::fake_deserialized")
@@ -683,7 +809,7 @@ pub(crate) struct OutstandingTask {
683
809
  pub start_time: Instant,
684
810
  /// The WFT permit owned by this task, ensures we don't exceed max concurrent WFT, and makes
685
811
  /// sure the permit is automatically freed when we delete the task.
686
- pub permit: OwnedMeteredSemPermit,
812
+ pub permit: UsedMeteredSemPermit,
687
813
  }
688
814
 
689
815
  impl OutstandingTask {
@@ -762,6 +888,7 @@ pub(crate) enum ActivationAction {
762
888
  commands: Vec<ProtoCommand>,
763
889
  query_responses: Vec<QueryResult>,
764
890
  force_new_wft: bool,
891
+ sdk_metadata: WorkflowTaskCompletedMetadata,
765
892
  },
766
893
  /// We should respond to a legacy query request
767
894
  RespondLegacyQuery { result: Box<QueryResult> },
@@ -818,6 +945,7 @@ struct PostActivationMsg {
818
945
  run_id: String,
819
946
  wft_report_status: WFTReportStatus,
820
947
  wft_from_complete: Option<(PreparedWFT, HistoryPaginator)>,
948
+ is_autocomplete: bool,
821
949
  }
822
950
  #[derive(Debug, Clone)]
823
951
  #[cfg_attr(
@@ -828,6 +956,10 @@ struct RequestEvictMsg {
828
956
  run_id: String,
829
957
  message: String,
830
958
  reason: EvictionReason,
959
+ /// If set, we requested eviction because something went wrong processing a brand new poll task,
960
+ /// which means we won't have stored the WFT and we need to track the task token separately so
961
+ /// we can reply with a failure to server after the evict goes through.
962
+ auto_reply_fail_tt: Option<TaskToken>,
831
963
  }
832
964
  #[derive(Debug)]
833
965
  pub(crate) struct HeartbeatTimeoutMsg {
@@ -843,6 +975,7 @@ struct GetStateInfoMsg {
843
975
  #[derive(Debug)]
844
976
  struct ActivationCompleteResult {
845
977
  most_recently_processed_event: usize,
978
+ replaying: bool,
846
979
  outcome: ActivationCompleteOutcome,
847
980
  }
848
981
  /// What needs to be done after calling [Workflows::activation_completed]
@@ -881,7 +1014,7 @@ fn validate_completion(
881
1014
  match completion.status {
882
1015
  Some(workflow_activation_completion::Status::Successful(success)) => {
883
1016
  // Convert to wf commands
884
- let commands = success
1017
+ let mut commands = success
885
1018
  .commands
886
1019
  .into_iter()
887
1020
  .map(|c| c.try_into())
@@ -908,9 +1041,20 @@ fn validate_completion(
908
1041
  });
909
1042
  }
910
1043
 
1044
+ // Any non-query-response commands after a terminal command should be ignored
1045
+ if let Some(term_cmd_pos) = commands.iter().position(|c| c.is_terminal()) {
1046
+ // Query responses are just fine, so keep them.
1047
+ let queries = commands
1048
+ .split_off(term_cmd_pos + 1)
1049
+ .into_iter()
1050
+ .filter(|c| matches!(c, WFCommand::QueryResponse(_)));
1051
+ commands.extend(queries);
1052
+ }
1053
+
911
1054
  Ok(ValidatedCompletion::Success {
912
1055
  run_id: completion.run_id,
913
1056
  commands,
1057
+ used_flags: success.used_internal_flags,
914
1058
  })
915
1059
  }
916
1060
  Some(workflow_activation_completion::Status::Failed(failure)) => {
@@ -936,6 +1080,7 @@ enum ValidatedCompletion {
936
1080
  Success {
937
1081
  run_id: String,
938
1082
  commands: Vec<WFCommand>,
1083
+ used_flags: Vec<u32>,
939
1084
  },
940
1085
  Fail {
941
1086
  run_id: String,
@@ -1058,6 +1203,23 @@ impl TryFrom<WorkflowCommand> for WFCommand {
1058
1203
  }
1059
1204
  }
1060
1205
 
1206
+ impl WFCommand {
1207
+ /// Returns true if the command is one which ends the workflow:
1208
+ /// * Completed
1209
+ /// * Failed
1210
+ /// * Cancelled
1211
+ /// * Continue-as-new
1212
+ pub fn is_terminal(&self) -> bool {
1213
+ matches!(
1214
+ self,
1215
+ WFCommand::CompleteWorkflow(_)
1216
+ | WFCommand::FailWorkflow(_)
1217
+ | WFCommand::CancelWorkflow(_)
1218
+ | WFCommand::ContinueAsNew(_)
1219
+ )
1220
+ }
1221
+ }
1222
+
1061
1223
  #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
1062
1224
  enum CommandID {
1063
1225
  Timer(u32),
@@ -1128,6 +1290,28 @@ impl From<TimestampError> for WFMachinesError {
1128
1290
  }
1129
1291
  }
1130
1292
 
1293
+ impl From<anyhow::Error> for WFMachinesError {
1294
+ fn from(value: anyhow::Error) -> Self {
1295
+ WFMachinesError::Fatal(value.to_string())
1296
+ }
1297
+ }
1298
+
1299
+ fn auto_fail_to_complete_status(err: WFMachinesError) -> workflow_activation_completion::Status {
1300
+ workflow_activation_completion::Status::Failed(Failure {
1301
+ failure: Some(
1302
+ temporal_sdk_core_protos::temporal::api::failure::v1::Failure {
1303
+ message: "Error while processing workflow task".to_string(),
1304
+ source: err.to_string(),
1305
+ stack_trace: "".to_string(),
1306
+ encoded_attributes: None,
1307
+ cause: None,
1308
+ failure_info: None,
1309
+ },
1310
+ ),
1311
+ force_cause: WorkflowTaskFailedCause::from(err.evict_reason()) as i32,
1312
+ })
1313
+ }
1314
+
1131
1315
  pub(crate) trait LocalActivityRequestSink: Send + Sync + 'static {
1132
1316
  fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution>;
1133
1317
  }
@@ -1158,3 +1342,81 @@ impl LocalActivityRequestSink for LAReqSink {
1158
1342
  res
1159
1343
  }
1160
1344
  }
1345
+
1346
+ /// Sorts jobs in an activation to be in the order lang expects:
1347
+ /// `patches -> signals -> other -> queries`
1348
+ fn sort_act_jobs(wfa: &mut WorkflowActivation) {
1349
+ wfa.jobs.sort_by(|j1, j2| {
1350
+ // Unwrapping is fine here since we'll never issue empty variants
1351
+ let j1v = j1.variant.as_ref().unwrap();
1352
+ let j2v = j2.variant.as_ref().unwrap();
1353
+ if discriminant(j1v) == discriminant(j2v) {
1354
+ return Ordering::Equal;
1355
+ }
1356
+ fn variant_ordinal(v: &workflow_activation_job::Variant) -> u8 {
1357
+ match v {
1358
+ workflow_activation_job::Variant::NotifyHasPatch(_) => 1,
1359
+ workflow_activation_job::Variant::SignalWorkflow(_) => 2,
1360
+ workflow_activation_job::Variant::QueryWorkflow(_) => 4,
1361
+ _ => 3,
1362
+ }
1363
+ }
1364
+ variant_ordinal(j1v).cmp(&variant_ordinal(j2v))
1365
+ })
1366
+ }
1367
+
1368
+ #[cfg(test)]
1369
+ mod tests {
1370
+ use super::*;
1371
+ use itertools::Itertools;
1372
+
1373
+ #[test]
1374
+ fn jobs_sort() {
1375
+ let mut act = WorkflowActivation {
1376
+ jobs: vec![
1377
+ WorkflowActivationJob {
1378
+ variant: Some(workflow_activation_job::Variant::SignalWorkflow(
1379
+ Default::default(),
1380
+ )),
1381
+ },
1382
+ WorkflowActivationJob {
1383
+ variant: Some(workflow_activation_job::Variant::NotifyHasPatch(
1384
+ Default::default(),
1385
+ )),
1386
+ },
1387
+ WorkflowActivationJob {
1388
+ variant: Some(workflow_activation_job::Variant::QueryWorkflow(
1389
+ Default::default(),
1390
+ )),
1391
+ },
1392
+ WorkflowActivationJob {
1393
+ variant: Some(workflow_activation_job::Variant::FireTimer(
1394
+ Default::default(),
1395
+ )),
1396
+ },
1397
+ WorkflowActivationJob {
1398
+ variant: Some(workflow_activation_job::Variant::ResolveActivity(
1399
+ Default::default(),
1400
+ )),
1401
+ },
1402
+ ],
1403
+ ..Default::default()
1404
+ };
1405
+ sort_act_jobs(&mut act);
1406
+ let variants = act
1407
+ .jobs
1408
+ .into_iter()
1409
+ .map(|j| j.variant.unwrap())
1410
+ .collect_vec();
1411
+ assert_matches!(
1412
+ variants.as_slice(),
1413
+ &[
1414
+ workflow_activation_job::Variant::NotifyHasPatch(_),
1415
+ workflow_activation_job::Variant::SignalWorkflow(_),
1416
+ workflow_activation_job::Variant::FireTimer(_),
1417
+ workflow_activation_job::Variant::ResolveActivity(_),
1418
+ workflow_activation_job::Variant::QueryWorkflow(_)
1419
+ ]
1420
+ )
1421
+ }
1422
+ }