@temporalio/core-bridge 1.5.2 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/Cargo.lock +304 -112
  2. package/lib/index.d.ts +8 -6
  3. package/lib/index.js.map +1 -1
  4. package/package.json +9 -4
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.buildkite/docker/Dockerfile +2 -2
  11. package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
  12. package/sdk-core/.buildkite/pipeline.yml +2 -4
  13. package/sdk-core/.cargo/config.toml +5 -2
  14. package/sdk-core/.github/workflows/heavy.yml +29 -0
  15. package/sdk-core/Cargo.toml +1 -1
  16. package/sdk-core/README.md +20 -10
  17. package/sdk-core/client/src/lib.rs +215 -39
  18. package/sdk-core/client/src/metrics.rs +17 -8
  19. package/sdk-core/client/src/raw.rs +4 -4
  20. package/sdk-core/client/src/retry.rs +32 -20
  21. package/sdk-core/core/Cargo.toml +25 -12
  22. package/sdk-core/core/src/abstractions/take_cell.rs +28 -0
  23. package/sdk-core/core/src/abstractions.rs +204 -14
  24. package/sdk-core/core/src/core_tests/activity_tasks.rs +143 -50
  25. package/sdk-core/core/src/core_tests/child_workflows.rs +6 -5
  26. package/sdk-core/core/src/core_tests/determinism.rs +165 -2
  27. package/sdk-core/core/src/core_tests/local_activities.rs +431 -43
  28. package/sdk-core/core/src/core_tests/queries.rs +34 -16
  29. package/sdk-core/core/src/core_tests/workers.rs +8 -5
  30. package/sdk-core/core/src/core_tests/workflow_tasks.rs +588 -55
  31. package/sdk-core/core/src/ephemeral_server/mod.rs +113 -12
  32. package/sdk-core/core/src/internal_flags.rs +155 -0
  33. package/sdk-core/core/src/lib.rs +16 -9
  34. package/sdk-core/core/src/protosext/mod.rs +1 -1
  35. package/sdk-core/core/src/replay/mod.rs +16 -27
  36. package/sdk-core/core/src/telemetry/log_export.rs +1 -1
  37. package/sdk-core/core/src/telemetry/metrics.rs +69 -35
  38. package/sdk-core/core/src/telemetry/mod.rs +60 -21
  39. package/sdk-core/core/src/telemetry/prometheus_server.rs +19 -13
  40. package/sdk-core/core/src/test_help/mod.rs +73 -14
  41. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +119 -160
  42. package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
  43. package/sdk-core/core/src/worker/activities/local_activities.rs +379 -129
  44. package/sdk-core/core/src/worker/activities.rs +350 -175
  45. package/sdk-core/core/src/worker/client/mocks.rs +22 -2
  46. package/sdk-core/core/src/worker/client.rs +18 -2
  47. package/sdk-core/core/src/worker/mod.rs +183 -64
  48. package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
  49. package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
  50. package/sdk-core/core/src/worker/workflow/history_update.rs +916 -277
  51. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +216 -183
  52. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +9 -12
  53. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +7 -9
  54. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +160 -87
  55. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +13 -14
  56. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -9
  57. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +14 -17
  58. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +242 -110
  59. package/sdk-core/core/src/worker/workflow/machines/mod.rs +27 -19
  60. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +9 -11
  61. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +321 -206
  62. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +13 -18
  63. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +20 -29
  64. package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
  65. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +257 -51
  66. package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
  67. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +310 -150
  68. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +17 -20
  69. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +31 -15
  70. package/sdk-core/core/src/worker/workflow/managed_run.rs +1052 -380
  71. package/sdk-core/core/src/worker/workflow/mod.rs +598 -390
  72. package/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
  73. package/sdk-core/core/src/worker/workflow/wft_extraction.rs +137 -0
  74. package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
  75. package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
  76. package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
  77. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +469 -718
  78. package/sdk-core/core-api/Cargo.toml +2 -1
  79. package/sdk-core/core-api/src/errors.rs +1 -34
  80. package/sdk-core/core-api/src/lib.rs +19 -9
  81. package/sdk-core/core-api/src/telemetry.rs +4 -6
  82. package/sdk-core/core-api/src/worker.rs +19 -1
  83. package/sdk-core/etc/deps.svg +115 -140
  84. package/sdk-core/etc/regen-depgraph.sh +5 -0
  85. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +86 -61
  86. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +29 -71
  87. package/sdk-core/histories/ends_empty_wft_complete.bin +0 -0
  88. package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
  89. package/sdk-core/histories/old_change_marker_format.bin +0 -0
  90. package/sdk-core/protos/api_upstream/.github/CODEOWNERS +2 -1
  91. package/sdk-core/protos/api_upstream/Makefile +6 -6
  92. package/sdk-core/protos/api_upstream/build/go.mod +7 -0
  93. package/sdk-core/protos/api_upstream/build/go.sum +5 -0
  94. package/sdk-core/protos/api_upstream/build/tools.go +29 -0
  95. package/sdk-core/protos/api_upstream/go.mod +6 -0
  96. package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
  97. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -26
  98. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
  99. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
  100. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -7
  101. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
  102. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +8 -8
  103. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +25 -2
  104. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
  105. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
  106. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
  107. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
  108. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
  109. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
  110. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
  111. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
  112. package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
  113. package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
  114. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +49 -26
  115. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
  116. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +5 -2
  117. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
  118. package/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
  119. package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
  120. package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
  121. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
  122. package/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
  123. package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
  124. package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
  125. package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
  126. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
  127. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +64 -28
  128. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -4
  129. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +7 -8
  130. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +10 -7
  131. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +19 -30
  132. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
  133. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
  134. package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +8 -0
  135. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +67 -60
  136. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +85 -84
  137. package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +9 -3
  138. package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
  139. package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
  140. package/sdk-core/sdk/Cargo.toml +5 -4
  141. package/sdk-core/sdk/src/lib.rs +108 -26
  142. package/sdk-core/sdk/src/workflow_context/options.rs +7 -1
  143. package/sdk-core/sdk/src/workflow_context.rs +24 -17
  144. package/sdk-core/sdk/src/workflow_future.rs +16 -15
  145. package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
  146. package/sdk-core/sdk-core-protos/build.rs +36 -2
  147. package/sdk-core/sdk-core-protos/src/history_builder.rs +138 -106
  148. package/sdk-core/sdk-core-protos/src/history_info.rs +10 -1
  149. package/sdk-core/sdk-core-protos/src/lib.rs +272 -87
  150. package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
  151. package/sdk-core/test-utils/Cargo.toml +3 -1
  152. package/sdk-core/test-utils/src/canned_histories.rs +106 -296
  153. package/sdk-core/test-utils/src/histfetch.rs +1 -1
  154. package/sdk-core/test-utils/src/lib.rs +82 -23
  155. package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
  156. package/sdk-core/test-utils/src/workflows.rs +29 -0
  157. package/sdk-core/tests/fuzzy_workflow.rs +130 -0
  158. package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
  159. package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
  160. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
  161. package/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
  162. package/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
  163. package/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
  164. package/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
  165. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +161 -72
  166. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
  167. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
  168. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +80 -3
  169. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
  170. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
  171. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +94 -200
  172. package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +2 -4
  173. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +34 -28
  174. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +76 -7
  175. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
  176. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
  177. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
  178. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
  179. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +7 -8
  180. package/sdk-core/tests/integ_tests/workflow_tests.rs +13 -14
  181. package/sdk-core/tests/main.rs +3 -13
  182. package/sdk-core/tests/runner.rs +75 -36
  183. package/sdk-core/tests/wf_input_replay.rs +32 -0
  184. package/src/conversions.rs +14 -8
  185. package/src/runtime.rs +9 -8
  186. package/ts/index.ts +8 -6
  187. package/sdk-core/bridge-ffi/Cargo.toml +0 -24
  188. package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
  189. package/sdk-core/bridge-ffi/build.rs +0 -25
  190. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
  191. package/sdk-core/bridge-ffi/src/lib.rs +0 -746
  192. package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
  193. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
  194. package/sdk-core/sdk/src/conversions.rs +0 -8
@@ -1,250 +1,182 @@
1
+ #[cfg(feature = "save_wf_inputs")]
2
+ mod saved_wf_inputs;
3
+ #[cfg(feature = "save_wf_inputs")]
4
+ mod tonic_status_serde;
5
+
6
+ #[cfg(feature = "save_wf_inputs")]
7
+ pub use saved_wf_inputs::replay_wf_state_inputs;
8
+
1
9
  use crate::{
2
- abstractions::{dbg_panic, stream_when_allowed, MeteredSemaphore},
3
- protosext::ValidPollWFTQResponse,
4
- telemetry::metrics::workflow_worker_type,
5
- worker::{
6
- workflow::{history_update::NextPageToken, run_cache::RunCache, *},
7
- LocalActRequest, LocalActivityResolution, LEGACY_QUERY_ID,
10
+ abstractions::dbg_panic,
11
+ worker::workflow::{
12
+ managed_run::RunUpdateAct,
13
+ run_cache::RunCache,
14
+ wft_extraction::{HistfetchRC, HistoryFetchReq, WFTExtractorOutput},
15
+ *,
8
16
  },
9
17
  MetricsContext,
10
18
  };
11
19
  use futures::{stream, stream::PollNext, Stream, StreamExt};
12
- use std::{collections::VecDeque, fmt::Debug, future, sync::Arc, time::Instant};
13
- use temporal_sdk_core_api::errors::{PollWfError, WFMachinesError};
14
- use temporal_sdk_core_protos::{
15
- coresdk::{
16
- workflow_activation::{
17
- create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
18
- workflow_activation_job,
19
- },
20
- workflow_completion::Failure,
21
- },
22
- temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure as TFailure},
23
- };
24
- use tokio::sync::{mpsc::unbounded_channel, oneshot};
25
- use tokio_stream::wrappers::UnboundedReceiverStream;
20
+ use std::{collections::VecDeque, fmt::Debug, future, sync::Arc};
21
+ use temporal_sdk_core_api::errors::PollWfError;
22
+ use temporal_sdk_core_protos::coresdk::workflow_activation::remove_from_cache::EvictionReason;
26
23
  use tokio_util::sync::CancellationToken;
27
24
  use tracing::{Level, Span};
28
25
 
29
- /// This struct holds all the state needed for tracking what workflow runs are currently cached
30
- /// and how WFTs should be dispatched to them, etc.
26
+ /// This struct holds all the state needed for tracking the state of currently cached workflow runs
27
+ /// and directs all actions which affect them. It is ultimately the top-level arbiter of nearly
28
+ /// everything important relating to workflow state.
31
29
  ///
32
30
  /// See [WFStream::build] for more
33
- pub(crate) struct WFStream {
31
+ pub(super) struct WFStream {
34
32
  runs: RunCache,
35
33
  /// Buffered polls for new runs which need a cache slot to open up before we can handle them
36
34
  buffered_polls_need_cache_slot: VecDeque<PermittedWFT>,
35
+ /// Is filled with runs that we decided need to have their history fetched during state
36
+ /// manipulation. Must be drained after handling each input.
37
+ runs_needing_fetching: VecDeque<HistoryFetchReq>,
37
38
 
38
- /// Client for accessing server for history pagination etc.
39
- client: Arc<dyn WorkerClient>,
40
-
41
- /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
42
- wft_semaphore: MeteredSemaphore,
39
+ history_fetch_refcounter: Arc<HistfetchRC>,
43
40
  shutdown_token: CancellationToken,
44
41
  ignore_evicts_on_shutdown: bool,
45
42
 
46
43
  metrics: MetricsContext,
47
- }
48
- impl WFStream {
49
- fn record_span_fields(&mut self, run_id: &str, span: &Span) {
50
- if let Some(run_handle) = self.runs.get_mut(run_id) {
51
- if let Some(spid) = span.id() {
52
- if run_handle.recorded_span_ids.contains(&spid) {
53
- return;
54
- }
55
- run_handle.recorded_span_ids.insert(spid);
56
-
57
- if let Some(wid) = run_handle.wft.as_ref().map(|wft| &wft.info.wf_id) {
58
- span.record("workflow_id", wid.as_str());
59
- }
60
- }
61
- }
62
- }
63
- }
64
44
 
65
- /// All possible inputs to the [WFStream]
66
- #[derive(derive_more::From, Debug)]
67
- enum WFStreamInput {
68
- NewWft(PermittedWFT),
69
- Local(LocalInput),
70
- /// The stream given to us which represents the poller (or a mock) terminated.
71
- PollerDead,
72
- /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
73
- /// error while polling
74
- PollerError(tonic::Status),
75
- }
76
- impl From<RunUpdateResponse> for WFStreamInput {
77
- fn from(r: RunUpdateResponse) -> Self {
78
- WFStreamInput::Local(LocalInput {
79
- input: LocalInputs::RunUpdateResponse(r.kind),
80
- span: r.span,
81
- })
82
- }
83
- }
84
- /// A non-poller-received input to the [WFStream]
85
- #[derive(derive_more::DebugCustom)]
86
- #[debug(fmt = "LocalInput {{ {:?} }}", input)]
87
- pub(super) struct LocalInput {
88
- pub input: LocalInputs,
89
- pub span: Span,
90
- }
91
- /// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
92
- /// new polls.
93
- #[derive(Debug, derive_more::From)]
94
- pub(super) enum LocalInputs {
95
- Completion(WFActCompleteMsg),
96
- LocalResolution(LocalResolutionMsg),
97
- PostActivation(PostActivationMsg),
98
- RunUpdateResponse(RunUpdateResponseKind),
99
- RequestEviction(RequestEvictMsg),
100
- GetStateInfo(GetStateInfoMsg),
101
- }
102
- impl LocalInputs {
103
- fn run_id(&self) -> Option<&str> {
104
- Some(match self {
105
- LocalInputs::Completion(c) => c.completion.run_id(),
106
- LocalInputs::LocalResolution(lr) => &lr.run_id,
107
- LocalInputs::PostActivation(pa) => &pa.run_id,
108
- LocalInputs::RunUpdateResponse(rur) => rur.run_id(),
109
- LocalInputs::RequestEviction(re) => &re.run_id,
110
- LocalInputs::GetStateInfo(_) => return None,
111
- })
112
- }
113
- }
114
- #[derive(Debug, derive_more::From)]
115
- #[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
116
- enum ExternalPollerInputs {
117
- NewWft(PermittedWFT),
118
- PollerDead,
119
- PollerError(tonic::Status),
120
- }
121
- impl From<ExternalPollerInputs> for WFStreamInput {
122
- fn from(l: ExternalPollerInputs) -> Self {
123
- match l {
124
- ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
125
- ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
126
- ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
127
- }
128
- }
45
+ #[cfg(feature = "save_wf_inputs")]
46
+ wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
129
47
  }
130
-
131
48
  impl WFStream {
132
49
  /// Constructs workflow state management and returns a stream which outputs activations.
133
50
  ///
134
- /// * `external_wfts` is a stream of validated poll responses as returned by a poller (or mock)
135
- /// * `wfts_from_complete` is the recv side of a channel that new WFTs from completions should
136
- /// come down.
51
+ /// * `wft_stream` is a stream of validated poll responses and fetched history pages as returned
52
+ /// by a poller (or mock), via [WFTExtractor].
137
53
  /// * `local_rx` is a stream of actions that workflow state needs to see. Things like
138
- /// completions, local activities finishing, etc. See [LocalInputs].
54
+ /// completions, local activities finishing, etc. See [LocalInputs].
55
+ /// * `local_activity_request_sink` is used to handle outgoing requests to start or cancel
56
+ /// local activities, and may return resolutions that need to be handled immediately.
139
57
  ///
140
- /// These inputs are combined, along with an internal feedback channel for run-specific updates,
141
- /// to form the inputs to a stream of [WFActStreamInput]s. The stream processor then takes
142
- /// action on those inputs, and then may yield activations.
58
+ /// The stream inputs are combined into a stream of [WFActStreamInput]s. The stream processor
59
+ /// then takes action on those inputs, mutating the [WFStream] state, and then may yield
60
+ /// activations.
143
61
  ///
144
- /// Updating runs may need to do async work like fetching additional history. In order to
145
- /// facilitate this, each run lives in its own task which is communicated with by sending
146
- /// [RunAction]s and receiving [RunUpdateResponse]s via its [ManagedRunHandle].
62
+ /// Importantly, nothing async happens while actually mutating state. This means all changes to
63
+ /// all workflow state can be represented purely via the stream of inputs, plus the
64
+ /// calls/retvals from the LA request sink, which is the last unfortunate bit of impurity in
65
+ /// the design. Eliminating it would be nice, so that all inputs come from the passed-in streams
66
+ /// and all outputs flow from the return stream, but it's difficult to do so since it would
67
+ /// require "pausing" in-progress changes to a run while sending & waiting for response from
68
+ /// local activity management. Likely the best option would be to move the pure state info
69
+ /// needed to determine immediate responses into LA state machines themselves (out of the LA
70
+ /// manager), which is a quite substantial change.
147
71
  pub(super) fn build(
148
72
  basics: WorkflowBasics,
149
- external_wfts: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
73
+ wft_stream: impl Stream<Item = Result<WFTExtractorOutput, tonic::Status>> + Send + 'static,
150
74
  local_rx: impl Stream<Item = LocalInput> + Send + 'static,
151
- client: Arc<dyn WorkerClient>,
152
- local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
153
- + Send
154
- + Sync
155
- + 'static,
156
- ) -> impl Stream<Item = Result<ActivationOrAuto, PollWfError>> {
157
- let wft_semaphore = MeteredSemaphore::new(
158
- basics.max_outstanding_wfts,
159
- basics.metrics.with_new_attrs([workflow_worker_type()]),
160
- MetricsContext::available_task_slots,
161
- );
162
- let wft_sem_clone = wft_semaphore.clone();
163
- let proceeder = stream::unfold(wft_sem_clone, |sem| async move {
164
- Some((sem.acquire_owned().await.unwrap(), sem))
165
- });
166
- let poller_wfts = stream_when_allowed(external_wfts, proceeder);
167
- let (run_update_tx, run_update_rx) = unbounded_channel();
168
- let local_rx = stream::select(
169
- local_rx.map(Into::into),
170
- UnboundedReceiverStream::new(run_update_rx).map(Into::into),
171
- );
75
+ local_activity_request_sink: impl LocalActivityRequestSink,
76
+ ) -> impl Stream<Item = Result<WFStreamOutput, PollWfError>> {
172
77
  let all_inputs = stream::select_with_strategy(
173
- local_rx,
174
- poller_wfts
175
- .map(|(wft, permit)| match wft {
176
- Ok(wft) => ExternalPollerInputs::NewWft(PermittedWFT { wft, permit }),
177
- Err(e) => ExternalPollerInputs::PollerError(e),
178
- })
78
+ local_rx.map(Into::into),
79
+ wft_stream
80
+ .map(Into::into)
179
81
  .chain(stream::once(async { ExternalPollerInputs::PollerDead }))
180
82
  .map(Into::into)
181
83
  .boxed(),
182
84
  // Priority always goes to the local stream
183
85
  |_: &mut ()| PollNext::Left,
184
86
  );
87
+ Self::build_internal(all_inputs, basics, local_activity_request_sink)
88
+ }
89
+
90
+ fn build_internal(
91
+ all_inputs: impl Stream<Item = WFStreamInput>,
92
+ basics: WorkflowBasics,
93
+ local_activity_request_sink: impl LocalActivityRequestSink,
94
+ ) -> impl Stream<Item = Result<WFStreamOutput, PollWfError>> {
185
95
  let mut state = WFStream {
186
96
  buffered_polls_need_cache_slot: Default::default(),
187
97
  runs: RunCache::new(
188
98
  basics.max_cached_workflows,
189
99
  basics.namespace.clone(),
190
- run_update_tx,
191
- Arc::new(local_activity_request_sink),
100
+ basics.server_capabilities.clone(),
101
+ local_activity_request_sink,
192
102
  basics.metrics.clone(),
193
103
  ),
194
- client,
195
- wft_semaphore,
196
104
  shutdown_token: basics.shutdown_token,
197
105
  ignore_evicts_on_shutdown: basics.ignore_evicts_on_shutdown,
198
106
  metrics: basics.metrics,
107
+ runs_needing_fetching: Default::default(),
108
+ history_fetch_refcounter: Arc::new(HistfetchRC {}),
109
+
110
+ #[cfg(feature = "save_wf_inputs")]
111
+ wf_state_inputs: basics.wf_state_inputs,
199
112
  };
200
113
  all_inputs
201
- .map(move |action| {
114
+ .map(move |action: WFStreamInput| {
202
115
  let span = span!(Level::DEBUG, "new_stream_input", action=?action);
203
116
  let _span_g = span.enter();
204
117
 
205
- let maybe_activation = match action {
118
+ #[cfg(feature = "save_wf_inputs")]
119
+ let maybe_write = state.prep_input(&action);
120
+
121
+ let mut activations = vec![];
122
+ let maybe_act = match action {
206
123
  WFStreamInput::NewWft(pwft) => {
207
- debug!(run_id=%pwft.wft.workflow_execution.run_id, "New WFT");
208
- state.instantiate_or_update(pwft);
209
- None
124
+ debug!(run_id=%pwft.work.execution.run_id, "New WFT");
125
+ state.instantiate_or_update(pwft)
210
126
  }
211
127
  WFStreamInput::Local(local_input) => {
212
128
  let _span_g = local_input.span.enter();
213
129
  if let Some(rid) = local_input.input.run_id() {
214
- state.record_span_fields(rid, &local_input.span);
130
+ if let Some(rh) = state.runs.get_mut(rid) {
131
+ rh.record_span_fields(&local_input.span);
132
+ }
215
133
  }
216
134
  match local_input.input {
217
- LocalInputs::RunUpdateResponse(resp) => {
218
- state.process_run_update_response(resp)
219
- }
220
135
  LocalInputs::Completion(completion) => {
221
- state.process_completion(completion);
222
- None
136
+ activations.extend(
137
+ state.process_completion(NewOrFetchedComplete::New(completion)),
138
+ );
139
+ None // completions can return more than one activation
140
+ }
141
+ LocalInputs::FetchedPageCompletion { paginator, update } => {
142
+ activations.extend(state.process_completion(
143
+ NewOrFetchedComplete::Fetched(update, paginator),
144
+ ));
145
+ None // completions can return more than one activation
223
146
  }
224
147
  LocalInputs::PostActivation(report) => {
225
- state.process_post_activation(report);
226
- None
148
+ state.process_post_activation(report)
227
149
  }
228
- LocalInputs::LocalResolution(res) => {
229
- state.local_resolution(res);
230
- None
150
+ LocalInputs::LocalResolution(res) => state.local_resolution(res),
151
+ LocalInputs::HeartbeatTimeout(hbt) => {
152
+ state.process_heartbeat_timeout(hbt)
231
153
  }
232
154
  LocalInputs::RequestEviction(evict) => {
233
- state.request_eviction(evict);
234
- None
155
+ state.request_eviction(evict).into_run_update_resp()
235
156
  }
236
157
  LocalInputs::GetStateInfo(gsi) => {
237
158
  let _ = gsi.response_tx.send(WorkflowStateInfo {
238
159
  cached_workflows: state.runs.len(),
239
160
  outstanding_wft: state.outstanding_wfts(),
240
- available_wft_permits: state.wft_semaphore.available_permits(),
241
161
  });
242
162
  None
243
163
  }
244
164
  }
245
165
  }
166
+ WFStreamInput::FailedFetch {
167
+ run_id,
168
+ err,
169
+ auto_reply_fail_tt,
170
+ } => state
171
+ .request_eviction(RequestEvictMsg {
172
+ run_id,
173
+ message: format!("Fetching history failed: {err:?}"),
174
+ reason: EvictionReason::PaginationOrHistoryFetch,
175
+ auto_reply_fail_tt,
176
+ })
177
+ .into_run_update_resp(),
246
178
  WFStreamInput::PollerDead => {
247
- debug!("WFT poller died, shutting down");
179
+ debug!("WFT poller died, beginning shutdown");
248
180
  state.shutdown_token.cancel();
249
181
  None
250
182
  }
@@ -254,457 +186,228 @@ impl WFStream {
254
186
  }
255
187
  };
256
188
 
257
- if let Some(ref act) = maybe_activation {
258
- if let Some(run_handle) = state.runs.get_mut(act.run_id()) {
259
- run_handle.insert_outstanding_activation(act);
260
- } else {
261
- dbg_panic!("Tried to insert activation for missing run!");
262
- }
189
+ activations.extend(maybe_act.into_iter());
190
+ activations.extend(state.reconcile_buffered());
191
+
192
+ // Always flush *after* actually handling the input, as this allows LA sink
193
+ // responses to be recorded before the input, so they can be read and buffered to be
194
+ // replayed during the handling of the input itself.
195
+ #[cfg(feature = "save_wf_inputs")]
196
+ if let Some(write) = maybe_write {
197
+ state.flush_write(write);
263
198
  }
264
- state.reconcile_buffered();
199
+
265
200
  if state.shutdown_done() {
201
+ info!("Workflow shutdown is done");
266
202
  return Err(PollWfError::ShutDown);
267
203
  }
268
204
 
269
- Ok(maybe_activation)
205
+ Ok(WFStreamOutput {
206
+ activations: activations.into(),
207
+ fetch_histories: std::mem::take(&mut state.runs_needing_fetching),
208
+ })
270
209
  })
271
- .filter_map(|o| {
272
- future::ready(match o {
273
- Ok(None) => None,
274
- Ok(Some(v)) => Some(Ok(v)),
275
- Err(e) => {
276
- if !matches!(e, PollWfError::ShutDown) {
277
- error!(
210
+ .inspect(|o| {
211
+ if let Some(e) = o.as_ref().err() {
212
+ if !matches!(e, PollWfError::ShutDown) {
213
+ error!(
278
214
  "Workflow processing encountered fatal error and must shut down {:?}",
279
215
  e
280
- );
281
- }
282
- Some(Err(e))
216
+ );
283
217
  }
284
- })
218
+ }
285
219
  })
286
220
  // Stop the stream once we have shut down
287
221
  .take_while(|o| future::ready(!matches!(o, Err(PollWfError::ShutDown))))
288
222
  }
289
223
 
290
- fn process_run_update_response(
291
- &mut self,
292
- resp: RunUpdateResponseKind,
293
- ) -> Option<ActivationOrAuto> {
294
- debug!(resp=%resp, "Processing run update response from machines");
295
- match resp {
296
- RunUpdateResponseKind::Good(mut resp) => {
297
- let run_handle = self
298
- .runs
299
- .get_mut(&resp.run_id)
300
- .expect("Workflow must exist, it just sent us an update response");
301
- run_handle.have_seen_terminal_event = resp.have_seen_terminal_event;
302
- run_handle.more_pending_work = resp.more_pending_work;
303
- run_handle.last_action_acked = true;
304
- run_handle.most_recently_processed_event_number =
305
- resp.most_recently_processed_event_number;
306
-
307
- let r = match resp.outgoing_activation {
308
- Some(ActivationOrAuto::LangActivation(mut activation)) => {
309
- if resp.in_response_to_wft {
310
- let wft = run_handle
311
- .wft
312
- .as_mut()
313
- .expect("WFT must exist for run just updated with one");
314
- // If there are in-poll queries, insert jobs for those queries into the
315
- // activation, but only if we hit the cache. If we didn't, those queries
316
- // will need to be dealt with once replay is over
317
- if wft.hit_cache {
318
- put_queries_in_act(&mut activation, wft);
319
- }
320
- }
321
-
322
- if activation.jobs.is_empty() {
323
- dbg_panic!("Should not send lang activation with no jobs");
324
- }
325
- Some(ActivationOrAuto::LangActivation(activation))
326
- }
327
- Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
328
- if let Some(wft) = run_handle.wft.as_mut() {
329
- put_queries_in_act(&mut act, wft);
330
- Some(ActivationOrAuto::LangActivation(act))
331
- } else {
332
- dbg_panic!("Ready for queries but no WFT!");
333
- None
334
- }
335
- }
336
- a @ Some(ActivationOrAuto::Autocomplete { .. }) => a,
337
- None => {
338
- // If the response indicates there is no activation to send yet but there
339
- // is more pending work, we should check again.
340
- if run_handle.more_pending_work {
341
- run_handle.check_more_activations();
342
- None
343
- } else if let Some(reason) = run_handle.trying_to_evict.as_ref() {
344
- // If a run update came back and had nothing to do, but we're trying to
345
- // evict, just do that now as long as there's no other outstanding work.
346
- if run_handle.activation.is_none() && !run_handle.more_pending_work {
347
- let mut evict_act = create_evict_activation(
348
- resp.run_id,
349
- reason.message.clone(),
350
- reason.reason,
351
- );
352
- evict_act.history_length =
353
- run_handle.most_recently_processed_event_number as u32;
354
- Some(ActivationOrAuto::LangActivation(evict_act))
355
- } else {
356
- None
357
- }
358
- } else {
359
- None
360
- }
361
- }
362
- };
363
- if let Some(f) = resp.fulfillable_complete.take() {
364
- f.fulfill();
365
- }
366
-
367
- // After each run update, check if it's ready to handle any buffered poll
368
- if matches!(&r, Some(ActivationOrAuto::Autocomplete { .. }) | None)
369
- && !run_handle.has_any_pending_work(false, true)
370
- {
371
- if let Some(bufft) = run_handle.buffered_resp.take() {
372
- self.instantiate_or_update(bufft);
373
- }
374
- }
375
- r
376
- }
377
- RunUpdateResponseKind::Fail(fail) => {
378
- if let Some(r) = self.runs.get_mut(&fail.run_id) {
379
- r.last_action_acked = true;
380
- }
381
-
382
- if let Some(resp_chan) = fail.completion_resp {
383
- // Automatically fail the workflow task in the event we couldn't update machines
384
- let fail_cause = if matches!(&fail.err, WFMachinesError::Nondeterminism(_)) {
385
- WorkflowTaskFailedCause::NonDeterministicError
386
- } else {
387
- WorkflowTaskFailedCause::Unspecified
388
- };
389
- let wft_fail_str = format!("{:?}", fail.err);
390
- self.failed_completion(
391
- fail.run_id,
392
- fail_cause,
393
- fail.err.evict_reason(),
394
- TFailure::application_failure(wft_fail_str, false).into(),
395
- resp_chan,
396
- );
397
- } else {
398
- // TODO: This should probably also fail workflow tasks, but that wasn't
399
- // implemented pre-refactor either.
400
- warn!(error=?fail.err, run_id=%fail.run_id, "Error while updating workflow");
401
- self.request_eviction(RequestEvictMsg {
402
- run_id: fail.run_id,
403
- message: format!("Error while updating workflow: {:?}", fail.err),
404
- reason: fail.err.evict_reason(),
405
- });
406
- }
407
- None
224
+ /// Instantiate or update run machines with a new WFT
225
+ #[instrument(skip(self, pwft)
226
+ fields(run_id=%pwft.work.execution.run_id,
227
+ workflow_id=%pwft.work.execution.workflow_id))]
228
+ fn instantiate_or_update(&mut self, pwft: PermittedWFT) -> RunUpdateAct {
229
+ match self._instantiate_or_update(pwft) {
230
+ Err(histfetch) => {
231
+ self.runs_needing_fetching.push_back(histfetch);
232
+ Default::default()
408
233
  }
234
+ Ok(r) => r,
409
235
  }
410
236
  }
411
237
 
412
- #[instrument(skip(self, pwft),
413
- fields(run_id=%pwft.wft.workflow_execution.run_id,
414
- workflow_id=%pwft.wft.workflow_execution.workflow_id))]
415
- fn instantiate_or_update(&mut self, pwft: PermittedWFT) {
416
- let (mut work, permit) = if let Some(w) = self.buffer_resp_if_outstanding_work(pwft) {
417
- (w.wft, w.permit)
238
+ fn _instantiate_or_update(
239
+ &mut self,
240
+ pwft: PermittedWFT,
241
+ ) -> Result<RunUpdateAct, HistoryFetchReq> {
242
+ // If the run already exists, possibly buffer the work and return early if we can't handle
243
+ // it yet.
244
+ let pwft = if let Some(rh) = self.runs.get_mut(&pwft.work.execution.run_id) {
245
+ if let Some(w) = rh.buffer_wft_if_outstanding_work(pwft) {
246
+ w
247
+ } else {
248
+ return Ok(None);
249
+ }
418
250
  } else {
419
- return;
251
+ pwft
420
252
  };
421
253
 
422
- let run_id = work.workflow_execution.run_id.clone();
254
+ let run_id = pwft.work.execution.run_id.clone();
423
255
  // If our cache is full and this WFT is for an unseen run we must first evict a run before
424
256
  // we can deal with this task. So, buffer the task in that case.
425
257
  if !self.runs.has_run(&run_id) && self.runs.is_full() {
426
- self.buffer_resp_on_full_cache(PermittedWFT { wft: work, permit });
427
- return;
258
+ self.buffer_resp_on_full_cache(pwft);
259
+ return Ok(None);
428
260
  }
429
261
 
430
- let start_event_id = work.history.events.first().map(|e| e.event_id);
431
- debug!(
432
- run_id = %run_id,
433
- task_token = %&work.task_token,
434
- history_length = %work.history.events.len(),
435
- start_event_id = ?start_event_id,
436
- has_legacy_query = %work.legacy_query.is_some(),
437
- attempt = %work.attempt,
438
- "Applying new workflow task from server"
439
- );
440
-
441
- let wft_info = WorkflowTaskInfo {
442
- attempt: work.attempt,
443
- task_token: work.task_token,
444
- wf_id: work.workflow_execution.workflow_id.clone(),
445
- };
446
- let poll_resp_is_incremental = work
447
- .history
448
- .events
449
- .get(0)
450
- .map(|ev| ev.event_id > 1)
451
- .unwrap_or_default();
452
- let poll_resp_is_incremental = poll_resp_is_incremental || work.history.events.is_empty();
453
-
454
- let mut did_miss_cache = !poll_resp_is_incremental;
455
-
456
- let page_token = if !self.runs.has_run(&run_id) && poll_resp_is_incremental {
262
+ // This check can't really be lifted up higher since we could EX: See it's in the cache,
263
+ // not fetch more history, send the task, see cache is full, buffer it, then evict that
264
+ // run, and now we still have a cache miss.
265
+ if !self.runs.has_run(&run_id) && pwft.work.is_incremental() {
457
266
  debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
458
267
  cache. Will fetch history");
459
268
  self.metrics.sticky_cache_miss();
460
- did_miss_cache = true;
461
- NextPageToken::FetchFromStart
462
- } else {
463
- work.next_page_token.into()
464
- };
465
- let history_update = HistoryUpdate::new(
466
- HistoryPaginator::new(
467
- work.history,
468
- work.workflow_execution.workflow_id.clone(),
469
- run_id.clone(),
470
- page_token,
471
- self.client.clone(),
472
- ),
473
- work.previous_started_event_id,
474
- );
475
- let legacy_query_from_poll = work
476
- .legacy_query
477
- .take()
478
- .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
479
-
480
- let mut pending_queries = work.query_requests.into_iter().collect::<Vec<_>>();
481
- if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
482
- error!(
483
- "Server issued both normal and legacy queries. This should not happen. Please \
484
- file a bug report."
485
- );
486
- self.request_eviction(RequestEvictMsg {
487
- run_id,
488
- message: "Server issued both normal and legacy query".to_string(),
489
- reason: EvictionReason::Fatal,
490
- });
491
- return;
492
- }
493
- if let Some(lq) = legacy_query_from_poll {
494
- pending_queries.push(lq);
269
+ return Err(HistoryFetchReq::Full(
270
+ CacheMissFetchReq { original_wft: pwft },
271
+ self.history_fetch_refcounter.clone(),
272
+ ));
495
273
  }
496
274
 
497
- let start_time = Instant::now();
498
- let run_handle = self.runs.instantiate_or_update(
499
- &run_id,
500
- &work.workflow_execution.workflow_id,
501
- &work.workflow_type,
502
- history_update,
503
- start_time,
504
- );
505
- run_handle.wft = Some(OutstandingTask {
506
- info: wft_info,
507
- hit_cache: !did_miss_cache,
508
- pending_queries,
509
- start_time,
510
- permit,
511
- })
275
+ let rur = self.runs.instantiate_or_update(pwft);
276
+ Ok(rur)
512
277
  }
513
278
 
514
- fn process_completion(&mut self, complete: WFActCompleteMsg) {
515
- match complete.completion {
516
- ValidatedCompletion::Success { run_id, commands } => {
517
- self.successful_completion(run_id, commands, complete.response_tx);
518
- }
519
- ValidatedCompletion::Fail { run_id, failure } => {
520
- self.failed_completion(
521
- run_id,
522
- WorkflowTaskFailedCause::Unspecified,
279
+ fn process_completion(&mut self, complete: NewOrFetchedComplete) -> Vec<ActivationOrAuto> {
280
+ let rh = if let Some(rh) = self.runs.get_mut(complete.run_id()) {
281
+ rh
282
+ } else {
283
+ dbg_panic!("Run missing during completion {:?}", complete);
284
+ return vec![];
285
+ };
286
+ let mut acts: Vec<_> = match complete {
287
+ NewOrFetchedComplete::New(complete) => match complete.completion {
288
+ ValidatedCompletion::Success {
289
+ commands,
290
+ used_flags,
291
+ ..
292
+ } => match rh.successful_completion(commands, used_flags, complete.response_tx) {
293
+ Ok(acts) => acts,
294
+ Err(npr) => {
295
+ self.runs_needing_fetching
296
+ .push_back(HistoryFetchReq::NextPage(
297
+ npr,
298
+ self.history_fetch_refcounter.clone(),
299
+ ));
300
+ None
301
+ }
302
+ },
303
+ ValidatedCompletion::Fail { failure, .. } => rh.failed_completion(
304
+ failure.force_cause(),
523
305
  EvictionReason::LangFail,
524
306
  failure,
525
307
  complete.response_tx,
526
- );
308
+ ),
309
+ },
310
+ NewOrFetchedComplete::Fetched(update, paginator) => {
311
+ rh.fetched_page_completion(update, paginator)
527
312
  }
528
313
  }
314
+ .into_iter()
315
+ .collect();
529
316
  // Always queue evictions after completion when we have a zero-size cache
530
317
  if self.runs.cache_capacity() == 0 {
531
- self.request_eviction_of_lru_run();
318
+ acts.extend(self.request_eviction_of_lru_run().into_run_update_resp())
532
319
  }
320
+ acts
533
321
  }
534
322
 
535
- fn successful_completion(
536
- &mut self,
537
- run_id: String,
538
- mut commands: Vec<WFCommand>,
539
- resp_chan: oneshot::Sender<ActivationCompleteResult>,
540
- ) {
541
- let activation_was_only_eviction = self.activation_has_only_eviction(&run_id);
542
- let (task_token, has_pending_query, start_time) =
543
- if let Some(entry) = self.get_task(&run_id) {
544
- (
545
- entry.info.task_token.clone(),
546
- !entry.pending_queries.is_empty(),
547
- entry.start_time,
548
- )
549
- } else {
550
- if !activation_was_only_eviction {
551
- // Not an error if this was an eviction, since it's normal to issue eviction
552
- // activations without an associated workflow task in that case.
553
- dbg_panic!(
554
- "Attempted to complete activation for run {} without associated workflow task",
323
+ fn process_post_activation(&mut self, report: PostActivationMsg) -> RunUpdateAct {
324
+ let run_id = &report.run_id;
325
+ let wft_from_complete = report.wft_from_complete;
326
+ if let Some((wft, _)) = &wft_from_complete {
327
+ if &wft.execution.run_id != run_id {
328
+ dbg_panic!(
329
+ "Server returned a WFT on completion for a different run ({}) than the \
330
+ one being completed ({}). This is a server bug.",
331
+ wft.execution.run_id,
555
332
  run_id
556
- );
557
- }
558
- self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
559
- return;
560
- };
561
-
562
- // If the only command from the activation is a legacy query response, that means we need
563
- // to respond differently than a typical activation.
564
- if matches!(&commands.as_slice(),
565
- &[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
566
- {
567
- let qr = match commands.remove(0) {
568
- WFCommand::QueryResponse(qr) => qr,
569
- _ => unreachable!("We just verified this is the only command"),
570
- };
571
- self.reply_to_complete(
572
- &run_id,
573
- ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
574
- task_token,
575
- action: ActivationAction::RespondLegacyQuery {
576
- result: Box::new(qr),
577
- },
578
- }),
579
- resp_chan,
580
- );
581
- } else {
582
- // First strip out query responses from other commands that actually affect machines
583
- // Would be prettier with `drain_filter`
584
- let mut i = 0;
585
- let mut query_responses = vec![];
586
- while i < commands.len() {
587
- if matches!(commands[i], WFCommand::QueryResponse(_)) {
588
- if let WFCommand::QueryResponse(qr) = commands.remove(i) {
589
- query_responses.push(qr);
590
- }
591
- } else {
592
- i += 1;
593
- }
594
- }
595
-
596
- let activation_was_eviction = self.activation_has_eviction(&run_id);
597
- if let Some(rh) = self.runs.get_mut(&run_id) {
598
- rh.send_completion(RunActivationCompletion {
599
- task_token,
600
- start_time,
601
- commands,
602
- activation_was_eviction,
603
- activation_was_only_eviction,
604
- has_pending_query,
605
- query_responses,
606
- resp_chan: Some(resp_chan),
607
- });
608
- } else {
609
- dbg_panic!("Run {} missing during completion", run_id);
333
+ );
610
334
  }
611
- };
612
- }
613
-
614
- fn failed_completion(
615
- &mut self,
616
- run_id: String,
617
- cause: WorkflowTaskFailedCause,
618
- reason: EvictionReason,
619
- failure: Failure,
620
- resp_chan: oneshot::Sender<ActivationCompleteResult>,
621
- ) {
622
- let tt = if let Some(tt) = self.get_task(&run_id).map(|t| t.info.task_token.clone()) {
623
- tt
624
- } else {
625
- dbg_panic!(
626
- "No workflow task for run id {} found when trying to fail activation",
627
- run_id
628
- );
629
- self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
630
- return;
631
- };
632
-
633
- if let Some(m) = self.run_metrics(&run_id) {
634
- m.wf_task_failed();
635
335
  }
636
- let message = format!("Workflow activation completion failed: {:?}", &failure);
637
- // Blow up any cached data associated with the workflow
638
- let should_report = match self.request_eviction(RequestEvictMsg {
639
- run_id: run_id.clone(),
640
- message,
641
- reason,
642
- }) {
643
- EvictionRequestResult::EvictionRequested(Some(attempt))
644
- | EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
645
- _ => false,
646
- };
647
- // If the outstanding WFT is a legacy query task, report that we need to fail it
648
- let outcome = if self
649
- .runs
650
- .get(&run_id)
651
- .map(|rh| rh.pending_work_is_legacy_query())
652
- .unwrap_or_default()
653
- {
654
- ActivationCompleteOutcome::ReportWFTFail(
655
- FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
656
- )
657
- } else if should_report {
658
- ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
659
- tt, cause, failure,
660
- ))
661
- } else {
662
- ActivationCompleteOutcome::DoNothing
663
- };
664
- self.reply_to_complete(&run_id, outcome, resp_chan);
665
- }
666
336
 
667
- fn process_post_activation(&mut self, report: PostActivationMsg) {
668
- let run_id = &report.run_id;
337
+ let mut res = None;
669
338
 
670
339
  // If we reported to server, we always want to mark it complete.
671
- let maybe_t = self.complete_wft(run_id, report.reported_wft_to_server);
340
+ let maybe_t = self.complete_wft(run_id, report.wft_report_status);
341
+ // Delete the activation
342
+ let activation = self
343
+ .runs
344
+ .get_mut(run_id)
345
+ .and_then(|rh| rh.delete_activation());
346
+
347
+ // Evict the run if the activation contained an eviction
348
+ let mut applied_buffered_poll_for_this_run = false;
349
+ if activation.map(|a| a.has_eviction()).unwrap_or_default() {
350
+ debug!(run_id=%run_id, "Evicting run");
351
+
352
+ if let Some(mut rh) = self.runs.remove(run_id) {
353
+ if let Some(buff) = rh.take_buffered_wft() {
354
+ // Don't try to apply a buffered poll for this run if we just got a new WFT
355
+ // from completing, because by definition that buffered poll is now an
356
+ // out-of-date WFT.
357
+ if wft_from_complete.is_none() {
358
+ res = self.instantiate_or_update(buff);
359
+ applied_buffered_poll_for_this_run = true;
360
+ }
361
+ }
362
+ }
672
363
 
673
- if self
674
- .get_activation(run_id)
675
- .map(|a| a.has_eviction())
676
- .unwrap_or_default()
677
- {
678
- self.evict_run(run_id);
364
+ // Attempt to apply a buffered poll for some *other* run, if we didn't have a wft
365
+ // from complete or a buffered poll for *this* run.
366
+ if wft_from_complete.is_none() && !applied_buffered_poll_for_this_run {
367
+ if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
368
+ res = self.instantiate_or_update(buff);
369
+ }
370
+ }
679
371
  };
680
372
 
681
- if let Some(wft) = report.wft_from_complete {
682
- debug!(run_id=%wft.workflow_execution.run_id, "New WFT from completion");
373
+ if let Some((wft, pag)) = wft_from_complete {
374
+ debug!(run_id=%wft.execution.run_id, "New WFT from completion");
683
375
  if let Some(t) = maybe_t {
684
- self.instantiate_or_update(PermittedWFT {
685
- wft,
376
+ res = self.instantiate_or_update(PermittedWFT {
377
+ work: wft,
686
378
  permit: t.permit,
687
- })
379
+ paginator: pag,
380
+ });
688
381
  }
689
382
  }
690
383
 
691
- if let Some(rh) = self.runs.get_mut(run_id) {
692
- // Delete the activation
693
- rh.activation.take();
694
- // Attempt to produce the next activation if needed
695
- rh.check_more_activations();
384
+ if res.is_none() {
385
+ if let Some(rh) = self.runs.get_mut(run_id) {
386
+ // Attempt to produce the next activation if needed
387
+ res = rh.check_more_activations();
388
+ }
696
389
  }
390
+ res
697
391
  }
698
392
 
699
- fn local_resolution(&mut self, msg: LocalResolutionMsg) {
393
+ fn local_resolution(&mut self, msg: LocalResolutionMsg) -> RunUpdateAct {
700
394
  let run_id = msg.run_id;
701
395
  if let Some(rh) = self.runs.get_mut(&run_id) {
702
- rh.send_local_resolution(msg.res)
396
+ rh.local_resolution(msg.res)
703
397
  } else {
704
398
  // It isn't an explicit error if the machine is missing when a local activity resolves.
705
399
  // This can happen if an activity reports a timeout after we stopped caring about it.
706
400
  debug!(run_id = %run_id,
707
401
  "Tried to resolve a local activity for a run we are no longer tracking");
402
+ None
403
+ }
404
+ }
405
+
406
+ fn process_heartbeat_timeout(&mut self, run_id: String) -> RunUpdateAct {
407
+ if let Some(rh) = self.runs.get_mut(&run_id) {
408
+ rh.heartbeat_timeout()
409
+ } else {
410
+ None
708
411
  }
709
412
  }
710
413
 
@@ -712,17 +415,8 @@ impl WFStream {
712
415
  /// activation to evict the workflow from the lang side. Workflow will not *actually* be evicted
713
416
  /// until lang replies to that activation
714
417
  fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
715
- let activation_has_eviction = self.activation_has_eviction(&info.run_id);
716
418
  if let Some(rh) = self.runs.get_mut(&info.run_id) {
717
- let attempts = rh.wft.as_ref().map(|wt| wt.info.attempt);
718
- if !activation_has_eviction && rh.trying_to_evict.is_none() {
719
- debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
720
- rh.trying_to_evict = Some(info);
721
- rh.check_more_activations();
722
- EvictionRequestResult::EvictionRequested(attempts)
723
- } else {
724
- EvictionRequestResult::EvictionAlreadyRequested(attempts)
725
- }
419
+ rh.request_eviction(info)
726
420
  } else {
727
421
  debug!(run_id=%info.run_id, "Eviction requested for unknown run");
728
422
  EvictionRequestResult::NotFound
@@ -736,6 +430,7 @@ impl WFStream {
736
430
  run_id,
737
431
  message: "Workflow cache full".to_string(),
738
432
  reason: EvictionReason::CacheFull,
433
+ auto_reply_fail_tt: None,
739
434
  })
740
435
  } else {
741
436
  // This branch shouldn't really be possible
@@ -743,36 +438,10 @@ impl WFStream {
743
438
  }
744
439
  }
745
440
 
746
- /// Evict a workflow from the cache by its run id. Any existing pending activations will be
747
- /// destroyed, and any outstanding activations invalidated.
748
- fn evict_run(&mut self, run_id: &str) {
749
- debug!(run_id=%run_id, "Evicting run");
750
-
751
- let mut did_take_buff = false;
752
- // Now it can safely be deleted, it'll get recreated once the un-buffered poll is handled if
753
- // there was one.
754
- if let Some(mut rh) = self.runs.remove(run_id) {
755
- rh.handle.abort();
756
-
757
- if let Some(buff) = rh.buffered_resp.take() {
758
- self.instantiate_or_update(buff);
759
- did_take_buff = true;
760
- }
761
- }
762
-
763
- if !did_take_buff {
764
- // If there wasn't a buffered poll, there might be one for a different run which needs
765
- // a free cache slot, and now there is.
766
- if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
767
- self.instantiate_or_update(buff);
768
- }
769
- }
770
- }
771
-
772
441
  fn complete_wft(
773
442
  &mut self,
774
443
  run_id: &str,
775
- reported_wft_to_server: bool,
444
+ wft_report_status: WFTReportStatus,
776
445
  ) -> Option<OutstandingTask> {
777
446
  // If the WFT completion wasn't sent to the server, but we did see the final event, we still
778
447
  // want to clear the workflow task. This can really only happen in replay testing, where we
@@ -782,9 +451,9 @@ impl WFStream {
782
451
  let saw_final = self
783
452
  .runs
784
453
  .get(run_id)
785
- .map(|r| r.have_seen_terminal_event)
454
+ .map(|r| r.have_seen_terminal_event())
786
455
  .unwrap_or_default();
787
- if !saw_final && !reported_wft_to_server {
456
+ if !saw_final && matches!(wft_report_status, WFTReportStatus::NotReported) {
788
457
  return None;
789
458
  }
790
459
 
@@ -792,60 +461,26 @@ impl WFStream {
792
461
  // Can't mark the WFT complete if there are pending queries, as doing so would destroy
793
462
  // them.
794
463
  if rh
795
- .wft
796
- .as_ref()
464
+ .wft()
797
465
  .map(|wft| !wft.pending_queries.is_empty())
798
466
  .unwrap_or_default()
799
467
  {
800
468
  return None;
801
469
  }
802
470
 
803
- debug!("Marking WFT completed");
804
- let retme = rh.wft.take();
805
- if let Some(ot) = &retme {
806
- if let Some(m) = self.run_metrics(run_id) {
807
- m.wf_task_latency(ot.start_time.elapsed());
808
- }
809
- }
810
- retme
471
+ rh.mark_wft_complete(wft_report_status)
811
472
  } else {
812
473
  None
813
474
  }
814
475
  }
815
476
 
816
- /// Stores some work if there is any outstanding WFT or activation for the run. If there was
817
- /// not, returns the work back out inside the option.
818
- fn buffer_resp_if_outstanding_work(&mut self, work: PermittedWFT) -> Option<PermittedWFT> {
819
- let run_id = &work.wft.workflow_execution.run_id;
820
- if let Some(mut run) = self.runs.get_mut(run_id) {
821
- let about_to_issue_evict = run.trying_to_evict.is_some() && !run.last_action_acked;
822
- let has_wft = run.wft.is_some();
823
- let has_activation = run.activation.is_some();
824
- if has_wft
825
- || has_activation
826
- || about_to_issue_evict
827
- || run.more_pending_work
828
- || !run.last_action_acked
829
- {
830
- debug!(run_id = %run_id, run = ?run,
831
- "Got new WFT for a run with outstanding work, buffering it");
832
- run.buffered_resp = Some(work);
833
- None
834
- } else {
835
- Some(work)
836
- }
837
- } else {
838
- Some(work)
839
- }
840
- }
841
-
842
477
  fn buffer_resp_on_full_cache(&mut self, work: PermittedWFT) {
843
- debug!(run_id=%work.wft.workflow_execution.run_id, "Buffering WFT because cache is full");
478
+ debug!(run_id=%work.work.execution.run_id, "Buffering WFT because cache is full");
844
479
  // If there's already a buffered poll for the run, replace it.
845
480
  if let Some(rh) = self
846
481
  .buffered_polls_need_cache_slot
847
482
  .iter_mut()
848
- .find(|w| w.wft.workflow_execution.run_id == work.wft.workflow_execution.run_id)
483
+ .find(|w| w.work.execution.run_id == work.work.execution.run_id)
849
484
  {
850
485
  *rh = work;
851
486
  } else {
@@ -856,7 +491,7 @@ impl WFStream {
856
491
 
857
492
  /// Makes sure we have enough pending evictions to fulfill the needs of buffered WFTs who are
858
493
  /// waiting on a cache slot
859
- fn reconcile_buffered(&mut self) {
494
+ fn reconcile_buffered(&mut self) -> Vec<ActivationOrAuto> {
860
495
  // We must ensure that there are at least as many pending evictions as there are tasks
861
496
  // that we might need to un-buffer (skipping runs which already have buffered tasks for
862
497
  // themselves)
@@ -865,121 +500,237 @@ impl WFStream {
865
500
  let num_existing_evictions = self
866
501
  .runs
867
502
  .runs_lru_order()
868
- .filter(|(_, h)| h.trying_to_evict.is_some())
503
+ .filter(|(_, h)| h.is_trying_to_evict())
869
504
  .count();
870
505
  let mut num_evicts_needed = num_in_buff.saturating_sub(num_existing_evictions);
871
506
  for (rid, handle) in self.runs.runs_lru_order() {
872
507
  if num_evicts_needed == 0 {
873
508
  break;
874
509
  }
875
- if handle.buffered_resp.is_none() {
510
+ if !handle.has_buffered_wft() {
876
511
  num_evicts_needed -= 1;
877
512
  evict_these.push(rid.to_string());
878
513
  }
879
514
  }
515
+ let mut acts = vec![];
880
516
  for run_id in evict_these {
881
- self.request_eviction(RequestEvictMsg {
882
- run_id,
883
- message: "Workflow cache full".to_string(),
884
- reason: EvictionReason::CacheFull,
885
- });
517
+ acts.extend(
518
+ self.request_eviction(RequestEvictMsg {
519
+ run_id,
520
+ message: "Workflow cache full".to_string(),
521
+ reason: EvictionReason::CacheFull,
522
+ auto_reply_fail_tt: None,
523
+ })
524
+ .into_run_update_resp(),
525
+ );
886
526
  }
887
- }
888
-
889
- fn reply_to_complete(
890
- &self,
891
- run_id: &str,
892
- outcome: ActivationCompleteOutcome,
893
- chan: oneshot::Sender<ActivationCompleteResult>,
894
- ) {
895
- let most_recently_processed_event = self
896
- .runs
897
- .peek(run_id)
898
- .map(|rh| rh.most_recently_processed_event_number)
899
- .unwrap_or_default();
900
- chan.send(ActivationCompleteResult {
901
- most_recently_processed_event,
902
- outcome,
903
- })
904
- .expect("Rcv half of activation reply not dropped");
527
+ acts
905
528
  }
906
529
 
907
530
  fn shutdown_done(&self) -> bool {
908
- let all_runs_ready = self
909
- .runs
910
- .handles()
911
- .all(|r| !r.has_any_pending_work(self.ignore_evicts_on_shutdown, false));
912
- if self.shutdown_token.is_cancelled() && all_runs_ready {
913
- info!("Workflow shutdown is done");
914
- true
915
- } else {
916
- false
531
+ if self.shutdown_token.is_cancelled() {
532
+ if Arc::strong_count(&self.history_fetch_refcounter) > 1 {
533
+ // Don't exit if there are outstanding fetch requests
534
+ return false;
535
+ }
536
+ let all_runs_ready = self
537
+ .runs
538
+ .handles()
539
+ .all(|r| !r.has_any_pending_work(self.ignore_evicts_on_shutdown, false));
540
+ if all_runs_ready {
541
+ return true;
542
+ }
917
543
  }
918
- }
919
-
920
- fn get_task(&mut self, run_id: &str) -> Option<&OutstandingTask> {
921
- self.runs.get(run_id).and_then(|rh| rh.wft.as_ref())
922
- }
923
-
924
- fn get_activation(&mut self, run_id: &str) -> Option<&OutstandingActivation> {
925
- self.runs.get(run_id).and_then(|rh| rh.activation.as_ref())
926
- }
927
-
928
- fn run_metrics(&mut self, run_id: &str) -> Option<&MetricsContext> {
929
- self.runs.get(run_id).map(|r| &r.metrics)
930
- }
931
-
932
- fn activation_has_only_eviction(&mut self, run_id: &str) -> bool {
933
- self.runs
934
- .get(run_id)
935
- .and_then(|rh| rh.activation)
936
- .map(OutstandingActivation::has_only_eviction)
937
- .unwrap_or_default()
938
- }
939
-
940
- fn activation_has_eviction(&mut self, run_id: &str) -> bool {
941
- self.runs
942
- .get(run_id)
943
- .and_then(|rh| rh.activation)
944
- .map(OutstandingActivation::has_eviction)
945
- .unwrap_or_default()
544
+ false
946
545
  }
947
546
 
948
547
  fn outstanding_wfts(&self) -> usize {
949
- self.runs.handles().filter(|r| r.wft.is_some()).count()
548
+ self.runs.handles().filter(|r| r.wft().is_some()).count()
950
549
  }
951
550
 
952
551
  // Useful when debugging
953
552
  #[allow(dead_code)]
954
553
  fn info_dump(&self, run_id: &str) {
955
554
  if let Some(r) = self.runs.peek(run_id) {
956
- info!(run_id, wft=?r.wft, activation=?r.activation, buffered=r.buffered_resp.is_some(),
957
- trying_to_evict=r.trying_to_evict.is_some(), more_work=r.more_pending_work,
958
- last_action_acked=r.last_action_acked);
555
+ info!(run_id, wft=?r.wft(), activation=?r.activation(),
556
+ buffered_wft=r.has_buffered_wft(),
557
+ trying_to_evict=r.is_trying_to_evict(), more_work=r.more_pending_work());
959
558
  } else {
960
559
  info!(run_id, "Run not found");
961
560
  }
962
561
  }
963
562
  }
964
563
 
965
- /// Drains pending queries from the workflow task and appends them to the activation's jobs
966
- fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
967
- // Nothing to do if there are no pending queries
968
- if wft.pending_queries.is_empty() {
969
- return;
970
- }
564
+ /// All possible inputs to the [WFStream]
565
+ #[derive(derive_more::From, Debug)]
566
+ #[cfg_attr(
567
+ feature = "save_wf_inputs",
568
+ derive(serde::Serialize, serde::Deserialize)
569
+ )]
570
+ enum WFStreamInput {
571
+ NewWft(PermittedWFT),
572
+ Local(LocalInput),
573
+ /// The stream given to us which represents the poller (or a mock) terminated.
574
+ PollerDead,
575
+ /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
576
+ /// error while polling
577
+ PollerError(
578
+ #[cfg_attr(
579
+ feature = "save_wf_inputs",
580
+ serde(with = "tonic_status_serde::SerdeStatus")
581
+ )]
582
+ tonic::Status,
583
+ ),
584
+ FailedFetch {
585
+ run_id: String,
586
+ #[cfg_attr(
587
+ feature = "save_wf_inputs",
588
+ serde(with = "tonic_status_serde::SerdeStatus")
589
+ )]
590
+ err: tonic::Status,
591
+ auto_reply_fail_tt: Option<TaskToken>,
592
+ },
593
+ }
971
594
 
972
- let has_legacy = wft.has_pending_legacy_query();
973
- // Cannot dispatch legacy query if there are any other jobs - which can happen if, ex, a local
974
- // activity resolves while we've gotten a legacy query after heartbeating.
975
- if has_legacy && !act.jobs.is_empty() {
976
- return;
595
+ /// A non-poller-received input to the [WFStream]
596
+ #[derive(derive_more::DebugCustom)]
597
+ #[cfg_attr(
598
+ feature = "save_wf_inputs",
599
+ derive(serde::Serialize, serde::Deserialize)
600
+ )]
601
+ #[debug(fmt = "LocalInput {{ {input:?} }}")]
602
+ pub(super) struct LocalInput {
603
+ pub input: LocalInputs,
604
+ #[cfg_attr(feature = "save_wf_inputs", serde(skip, default = "Span::current"))]
605
+ pub span: Span,
606
+ }
607
+ impl From<HeartbeatTimeoutMsg> for LocalInput {
608
+ fn from(hb: HeartbeatTimeoutMsg) -> Self {
609
+ Self {
610
+ input: LocalInputs::HeartbeatTimeout(hb.run_id),
611
+ span: hb.span,
612
+ }
613
+ }
614
+ }
615
+ /// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
616
+ /// new polls.
617
+ #[derive(Debug, derive_more::From)]
618
+ #[cfg_attr(
619
+ feature = "save_wf_inputs",
620
+ derive(serde::Serialize, serde::Deserialize)
621
+ )]
622
+ pub(super) enum LocalInputs {
623
+ Completion(WFActCompleteMsg),
624
+ FetchedPageCompletion {
625
+ paginator: HistoryPaginator,
626
+ update: HistoryUpdate,
627
+ },
628
+ LocalResolution(LocalResolutionMsg),
629
+ PostActivation(PostActivationMsg),
630
+ RequestEviction(RequestEvictMsg),
631
+ HeartbeatTimeout(String),
632
+ #[cfg_attr(feature = "save_wf_inputs", serde(skip))]
633
+ GetStateInfo(GetStateInfoMsg),
634
+ }
635
+ impl LocalInputs {
636
+ fn run_id(&self) -> Option<&str> {
637
+ Some(match self {
638
+ LocalInputs::Completion(c) => c.completion.run_id(),
639
+ LocalInputs::FetchedPageCompletion { paginator, .. } => &paginator.run_id,
640
+ LocalInputs::LocalResolution(lr) => &lr.run_id,
641
+ LocalInputs::PostActivation(pa) => &pa.run_id,
642
+ LocalInputs::RequestEviction(re) => &re.run_id,
643
+ LocalInputs::HeartbeatTimeout(hb) => hb,
644
+ LocalInputs::GetStateInfo(_) => return None,
645
+ })
646
+ }
647
+ }
648
+ #[derive(Debug)]
649
+ #[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
650
+ enum ExternalPollerInputs {
651
+ NewWft(PermittedWFT),
652
+ PollerDead,
653
+ PollerError(tonic::Status),
654
+ FetchedUpdate(PermittedWFT),
655
+ NextPage {
656
+ paginator: HistoryPaginator,
657
+ update: HistoryUpdate,
658
+ span: Span,
659
+ },
660
+ FailedFetch {
661
+ run_id: String,
662
+ err: tonic::Status,
663
+ auto_reply_fail_tt: Option<TaskToken>,
664
+ },
665
+ }
666
+ impl From<ExternalPollerInputs> for WFStreamInput {
667
+ fn from(l: ExternalPollerInputs) -> Self {
668
+ match l {
669
+ ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
670
+ ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
671
+ ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
672
+ ExternalPollerInputs::FetchedUpdate(wft) => WFStreamInput::NewWft(wft),
673
+ ExternalPollerInputs::FailedFetch {
674
+ run_id,
675
+ err,
676
+ auto_reply_fail_tt,
677
+ } => WFStreamInput::FailedFetch {
678
+ run_id,
679
+ err,
680
+ auto_reply_fail_tt,
681
+ },
682
+ ExternalPollerInputs::NextPage {
683
+ paginator,
684
+ update,
685
+ span,
686
+ } => WFStreamInput::Local(LocalInput {
687
+ input: LocalInputs::FetchedPageCompletion { paginator, update },
688
+ span,
689
+ }),
690
+ }
691
+ }
692
+ }
693
+ impl From<Result<WFTExtractorOutput, tonic::Status>> for ExternalPollerInputs {
694
+ fn from(v: Result<WFTExtractorOutput, tonic::Status>) -> Self {
695
+ match v {
696
+ Ok(WFTExtractorOutput::NewWFT(pwft)) => ExternalPollerInputs::NewWft(pwft),
697
+ Ok(WFTExtractorOutput::FetchResult(updated_wft, _)) => {
698
+ ExternalPollerInputs::FetchedUpdate(updated_wft)
699
+ }
700
+ Ok(WFTExtractorOutput::NextPage {
701
+ paginator,
702
+ update,
703
+ span,
704
+ rc: _rc,
705
+ }) => ExternalPollerInputs::NextPage {
706
+ paginator,
707
+ update,
708
+ span,
709
+ },
710
+ Ok(WFTExtractorOutput::FailedFetch {
711
+ run_id,
712
+ err,
713
+ auto_reply_fail_tt,
714
+ }) => ExternalPollerInputs::FailedFetch {
715
+ run_id,
716
+ err,
717
+ auto_reply_fail_tt,
718
+ },
719
+ Ok(WFTExtractorOutput::PollerDead) => ExternalPollerInputs::PollerDead,
720
+ Err(e) => ExternalPollerInputs::PollerError(e),
721
+ }
722
+ }
723
+ }
724
+ #[derive(Debug)]
725
+ enum NewOrFetchedComplete {
726
+ New(WFActCompleteMsg),
727
+ Fetched(HistoryUpdate, HistoryPaginator),
728
+ }
729
+ impl NewOrFetchedComplete {
730
+ fn run_id(&self) -> &str {
731
+ match self {
732
+ NewOrFetchedComplete::New(c) => c.completion.run_id(),
733
+ NewOrFetchedComplete::Fetched(_, p) => &p.run_id,
734
+ }
977
735
  }
978
-
979
- debug!(queries=?wft.pending_queries, "Dispatching queries");
980
- let query_jobs = wft
981
- .pending_queries
982
- .drain(..)
983
- .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
984
- act.jobs.extend(query_jobs);
985
736
  }