RubyGems - temporalio - Versions diffs - 0.0.1 → 0.1.0 - Mend

temporalio 0.0.1 → 0.1.0

Files changed (310) hide show

data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs CHANGED Viewed

@@ -1,217 +1,177 @@
+#[cfg(feature = "save_wf_inputs")]
+mod saved_wf_inputs;
+#[cfg(feature = "save_wf_inputs")]
+mod tonic_status_serde;
+#[cfg(feature = "save_wf_inputs")]
+pub use saved_wf_inputs::replay_wf_state_inputs;
 use crate::{
-    abstractions::{dbg_panic, stream_when_allowed, MeteredSemaphore},
-    protosext::ValidPollWFTQResponse,
-    telemetry::metrics::workflow_worker_type,
-    worker::{
-        workflow::{history_update::NextPageToken, run_cache::RunCache, *},
-        LocalActRequest, LocalActivityResolution, LEGACY_QUERY_ID,
+    abstractions::dbg_panic,
+    worker::workflow::{
+        managed_run::RunUpdateAct,
+        run_cache::RunCache,
+        wft_extraction::{HistfetchRC, HistoryFetchReq, WFTExtractorOutput},
+        *,
     },
     MetricsContext,
 };
 use futures::{stream, stream::PollNext, Stream, StreamExt};
-use std::{collections::VecDeque, fmt::Debug, future, sync::Arc, time::Instant};
-use temporal_sdk_core_api::errors::{PollWfError, WFMachinesError};
-use temporal_sdk_core_protos::{
-    coresdk::{
-        workflow_activation::{
-            create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
-            workflow_activation_job,
-        },
-        workflow_completion::Failure,
-    },
-    temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure as TFailure},
-};
-use tokio::sync::{mpsc::unbounded_channel, oneshot};
-use tokio_stream::wrappers::UnboundedReceiverStream;
+use std::{collections::VecDeque, fmt::Debug, future, sync::Arc};
+use temporal_sdk_core_api::errors::PollWfError;
+use temporal_sdk_core_protos::coresdk::workflow_activation::remove_from_cache::EvictionReason;
 use tokio_util::sync::CancellationToken;
 use tracing::{Level, Span};
-/// This struct holds all the state needed for tracking what workflow runs are currently cached
-/// and how WFTs should be dispatched to them, etc.
+/// This struct holds all the state needed for tracking the state of currently cached workflow runs
+/// and directs all actions which affect them. It is ultimately the top-level arbiter of nearly
+/// everything important relating to workflow state.
 ///
 /// See [WFStream::build] for more
-pub(crate) struct WFStream {
+pub(super) struct WFStream {
     runs: RunCache,
     /// Buffered polls for new runs which need a cache slot to open up before we can handle them
     buffered_polls_need_cache_slot: VecDeque<PermittedWFT>,
+    /// Is filled with runs that we decided need to have their history fetched during state
+    /// manipulation. Must be drained after handling each input.
+    runs_needing_fetching: VecDeque<HistoryFetchReq>,
-    /// Client for accessing server for history pagination etc.
-    client: Arc<dyn WorkerClient>,
-    /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
-    wft_semaphore: MeteredSemaphore,
+    history_fetch_refcounter: Arc<HistfetchRC>,
     shutdown_token: CancellationToken,
+    ignore_evicts_on_shutdown: bool,
     metrics: MetricsContext,
-}
-/// All possible inputs to the [WFStream]
-#[derive(derive_more::From, Debug)]
-enum WFStreamInput {
-    NewWft(PermittedWFT),
-    Local(LocalInput),
-    /// The stream given to us which represents the poller (or a mock) terminated.
-    PollerDead,
-    /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
-    /// error while polling
-    PollerError(tonic::Status),
-}
-impl From<RunUpdateResponse> for WFStreamInput {
-    fn from(r: RunUpdateResponse) -> Self {
-        WFStreamInput::Local(LocalInput {
-            input: LocalInputs::RunUpdateResponse(r.kind),
-            span: r.span,
-        })
-    }
-}
-/// A non-poller-received input to the [WFStream]
-#[derive(derive_more::DebugCustom)]
-#[debug(fmt = "LocalInput {{ {:?} }}", input)]
-pub(super) struct LocalInput {
-    pub input: LocalInputs,
-    pub span: Span,
-}
-/// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
-/// new polls.
-#[derive(Debug, derive_more::From)]
-pub(super) enum LocalInputs {
-    Completion(WFActCompleteMsg),
-    LocalResolution(LocalResolutionMsg),
-    PostActivation(PostActivationMsg),
-    RunUpdateResponse(RunUpdateResponseKind),
-    RequestEviction(RequestEvictMsg),
-    GetStateInfo(GetStateInfoMsg),
-}
-#[derive(Debug, derive_more::From)]
-#[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
-enum ExternalPollerInputs {
-    NewWft(PermittedWFT),
-    PollerDead,
-    PollerError(tonic::Status),
-}
-impl From<ExternalPollerInputs> for WFStreamInput {
-    fn from(l: ExternalPollerInputs) -> Self {
-        match l {
-            ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
-            ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
-            ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
-        }
-    }
-}
+    #[cfg(feature = "save_wf_inputs")]
+    wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
+}
 impl WFStream {
     /// Constructs workflow state management and returns a stream which outputs activations.
     ///
-    /// * `external_wfts` is a stream of validated poll responses as returned by a poller (or mock)
-    /// * `wfts_from_complete` is the recv side of a channel that new WFTs from completions should
-    ///   come down.
+    /// * `wft_stream` is a stream of validated poll responses and fetched history pages as returned
+    ///    by a poller (or mock), via [WFTExtractor].
     /// * `local_rx` is a stream of actions that workflow state needs to see. Things like
-    ///   completions, local activities finishing, etc. See [LocalInputs].
+    ///    completions, local activities finishing, etc. See [LocalInputs].
+    /// * `local_activity_request_sink` is used to handle outgoing requests to start or cancel
+    ///    local activities, and may return resolutions that need to be handled immediately.
     ///
-    /// These inputs are combined, along with an internal feedback channel for run-specific updates,
-    /// to form the inputs to a stream of [WFActStreamInput]s. The stream processor then takes
-    /// action on those inputs, and then may yield activations.
+    /// The stream inputs are combined into a stream of [WFActStreamInput]s. The stream processor
+    /// then takes action on those inputs, mutating the [WFStream] state, and then may yield
+    /// activations.
     ///
-    /// Updating runs may need to do async work like fetching additional history. In order to
-    /// facilitate this, each run lives in its own task which is communicated with by sending
-    /// [RunAction]s and receiving [RunUpdateResponse]s via its [ManagedRunHandle].
+    /// Importantly, nothing async happens while actually mutating state. This means all changes to
+    /// all workflow state can be represented purely via the stream of inputs, plus the
+    /// calls/retvals from the LA request sink, which is the last unfortunate bit of impurity in
+    /// the design. Eliminating it would be nice, so that all inputs come from the passed-in streams
+    /// and all outputs flow from the return stream, but it's difficult to do so since it would
+    /// require "pausing" in-progress changes to a run while sending & waiting for response from
+    /// local activity management. Likely the best option would be to move the pure state info
+    /// needed to determine immediate responses into LA state machines themselves (out of the LA
+    /// manager), which is a quite substantial change.
     pub(super) fn build(
         basics: WorkflowBasics,
-        external_wfts: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
+        wft_stream: impl Stream<Item = Result<WFTExtractorOutput, tonic::Status>> + Send + 'static,
         local_rx: impl Stream<Item = LocalInput> + Send + 'static,
-        client: Arc<dyn WorkerClient>,
-        local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
-            + Send
-            + Sync
-            + 'static,
-    ) -> impl Stream<Item = Result<ActivationOrAuto, PollWfError>> {
-        let wft_semaphore = MeteredSemaphore::new(
-            basics.max_outstanding_wfts,
-            basics.metrics.with_new_attrs([workflow_worker_type()]),
-            MetricsContext::available_task_slots,
-        );
-        let wft_sem_clone = wft_semaphore.clone();
-        let proceeder = move || {
-            let wft_sem_clone = wft_sem_clone.clone();
-            async move { wft_sem_clone.acquire_owned().await.unwrap() }
-        };
-        let poller_wfts = stream_when_allowed(external_wfts, proceeder);
-        let (run_update_tx, run_update_rx) = unbounded_channel();
-        let local_rx = stream::select(
-            local_rx.map(Into::into),
-            UnboundedReceiverStream::new(run_update_rx).map(Into::into),
-        );
+        local_activity_request_sink: impl LocalActivityRequestSink,
+    ) -> impl Stream<Item = Result<WFStreamOutput, PollWfError>> {
         let all_inputs = stream::select_with_strategy(
-            local_rx,
-            poller_wfts
-                .map(|(wft, permit)| match wft {
-                    Ok(wft) => ExternalPollerInputs::NewWft(PermittedWFT { wft, permit }),
-                    Err(e) => ExternalPollerInputs::PollerError(e),
-                })
+            local_rx.map(Into::into),
+            wft_stream
+                .map(Into::into)
                 .chain(stream::once(async { ExternalPollerInputs::PollerDead }))
                 .map(Into::into)
                 .boxed(),
             // Priority always goes to the local stream
             |_: &mut ()| PollNext::Left,
         );
+        Self::build_internal(all_inputs, basics, local_activity_request_sink)
+    }
+    fn build_internal(
+        all_inputs: impl Stream<Item = WFStreamInput>,
+        basics: WorkflowBasics,
+        local_activity_request_sink: impl LocalActivityRequestSink,
+    ) -> impl Stream<Item = Result<WFStreamOutput, PollWfError>> {
         let mut state = WFStream {
             buffered_polls_need_cache_slot: Default::default(),
             runs: RunCache::new(
                 basics.max_cached_workflows,
                 basics.namespace.clone(),
-                run_update_tx,
-                Arc::new(local_activity_request_sink),
+                basics.server_capabilities.clone(),
+                local_activity_request_sink,
                 basics.metrics.clone(),
             ),
-            client,
-            wft_semaphore,
             shutdown_token: basics.shutdown_token,
+            ignore_evicts_on_shutdown: basics.ignore_evicts_on_shutdown,
             metrics: basics.metrics,
+            runs_needing_fetching: Default::default(),
+            history_fetch_refcounter: Arc::new(HistfetchRC {}),
+            #[cfg(feature = "save_wf_inputs")]
+            wf_state_inputs: basics.wf_state_inputs,
         };
         all_inputs
-            .map(move |action| {
+            .map(move |action: WFStreamInput| {
                 let span = span!(Level::DEBUG, "new_stream_input", action=?action);
                 let _span_g = span.enter();
-                let maybe_activation = match action {
+                #[cfg(feature = "save_wf_inputs")]
+                let maybe_write = state.prep_input(&action);
+                let mut activations = vec![];
+                let maybe_act = match action {
                     WFStreamInput::NewWft(pwft) => {
-                        debug!(run_id=%pwft.wft.workflow_execution.run_id, "New WFT");
-                        state.instantiate_or_update(pwft);
-                        None
+                        debug!(run_id=%pwft.work.execution.run_id, "New WFT");
+                        state.instantiate_or_update(pwft)
                     }
                     WFStreamInput::Local(local_input) => {
                         let _span_g = local_input.span.enter();
-                        match local_input.input {
-                            LocalInputs::RunUpdateResponse(resp) => {
-                                state.process_run_update_response(resp)
+                        if let Some(rid) = local_input.input.run_id() {
+                            if let Some(rh) = state.runs.get_mut(rid) {
+                                rh.record_span_fields(&local_input.span);
                             }
+                        }
+                        match local_input.input {
                             LocalInputs::Completion(completion) => {
-                                state.process_completion(completion);
-                                None
+                                activations.extend(
+                                    state.process_completion(NewOrFetchedComplete::New(completion)),
+                                );
+                                None // completions can return more than one activation
+                            }
+                            LocalInputs::FetchedPageCompletion { paginator, update } => {
+                                activations.extend(state.process_completion(
+                                    NewOrFetchedComplete::Fetched(update, paginator),
+                                ));
+                                None // completions can return more than one activation
                             }
                             LocalInputs::PostActivation(report) => {
-                                state.process_post_activation(report);
-                                None
+                                state.process_post_activation(report)
                             }
-                            LocalInputs::LocalResolution(res) => {
-                                state.local_resolution(res);
-                                None
+                            LocalInputs::LocalResolution(res) => state.local_resolution(res),
+                            LocalInputs::HeartbeatTimeout(hbt) => {
+                                state.process_heartbeat_timeout(hbt)
                             }
                             LocalInputs::RequestEviction(evict) => {
-                                state.request_eviction(evict);
-                                None
+                                state.request_eviction(evict).into_run_update_resp()
                             }
                             LocalInputs::GetStateInfo(gsi) => {
                                 let _ = gsi.response_tx.send(WorkflowStateInfo {
                                     cached_workflows: state.runs.len(),
                                     outstanding_wft: state.outstanding_wfts(),
-                                    available_wft_permits: state.wft_semaphore.available_permits(),
                                 });
                                 None
                             }
                         }
                     }
+                    WFStreamInput::FailedFetch { run_id, err } => state
+                        .request_eviction(RequestEvictMsg {
+                            run_id,
+                            message: format!("Fetching history failed: {err:?}"),
+                            reason: EvictionReason::PaginationOrHistoryFetch,
+                        })
+                        .into_run_update_resp(),
                     WFStreamInput::PollerDead => {
-                        debug!("WFT poller died, shutting down");
+                        debug!("WFT poller died, beginning shutdown");
                         state.shutdown_token.cancel();
                         None
                     }
@@ -221,457 +181,228 @@ impl WFStream {
                     }
                 };
-                if let Some(ref act) = maybe_activation {
-                    if let Some(run_handle) = state.runs.get_mut(act.run_id()) {
-                        run_handle.insert_outstanding_activation(act);
-                    } else {
-                        dbg_panic!("Tried to insert activation for missing run!");
-                    }
+                activations.extend(maybe_act.into_iter());
+                activations.extend(state.reconcile_buffered());
+                // Always flush *after* actually handling the input, as this allows LA sink
+                // responses to be recorded before the input, so they can be read and buffered to be
+                // replayed during the handling of the input itself.
+                #[cfg(feature = "save_wf_inputs")]
+                if let Some(write) = maybe_write {
+                    state.flush_write(write);
                 }
-                state.reconcile_buffered();
                 if state.shutdown_done() {
+                    info!("Workflow shutdown is done");
                     return Err(PollWfError::ShutDown);
                 }
-                Ok(maybe_activation)
+                Ok(WFStreamOutput {
+                    activations: activations.into(),
+                    fetch_histories: std::mem::take(&mut state.runs_needing_fetching),
+                })
             })
-            .filter_map(|o| {
-                future::ready(match o {
-                    Ok(None) => None,
-                    Ok(Some(v)) => Some(Ok(v)),
-                    Err(e) => {
-                        if !matches!(e, PollWfError::ShutDown) {
-                            error!(
+            .inspect(|o| {
+                if let Some(e) = o.as_ref().err() {
+                    if !matches!(e, PollWfError::ShutDown) {
+                        error!(
                             "Workflow processing encountered fatal error and must shut down {:?}",
                             e
-                            );
-                        }
-                        Some(Err(e))
+                        );
                     }
-                })
+                }
             })
             // Stop the stream once we have shut down
             .take_while(|o| future::ready(!matches!(o, Err(PollWfError::ShutDown))))
     }
-    fn process_run_update_response(
-        &mut self,
-        resp: RunUpdateResponseKind,
-    ) -> Option<ActivationOrAuto> {
-        debug!(resp=%resp, "Processing run update response from machines");
-        match resp {
-            RunUpdateResponseKind::Good(mut resp) => {
-                let run_handle = self
-                    .runs
-                    .get_mut(&resp.run_id)
-                    .expect("Workflow must exist, it just sent us an update response");
-                run_handle.have_seen_terminal_event = resp.have_seen_terminal_event;
-                run_handle.more_pending_work = resp.more_pending_work;
-                run_handle.last_action_acked = true;
-                run_handle.most_recently_processed_event_number =
-                    resp.most_recently_processed_event_number;
-                let r = match resp.outgoing_activation {
-                    Some(ActivationOrAuto::LangActivation(mut activation)) => {
-                        if resp.in_response_to_wft {
-                            let wft = run_handle
-                                .wft
-                                .as_mut()
-                                .expect("WFT must exist for run just updated with one");
-                            // If there are in-poll queries, insert jobs for those queries into the
-                            // activation, but only if we hit the cache. If we didn't, those queries
-                            // will need to be dealt with once replay is over
-                            if !wft.pending_queries.is_empty() && wft.hit_cache {
-                                put_queries_in_act(&mut activation, wft);
-                            }
-                        }
-                        if activation.jobs.is_empty() {
-                            dbg_panic!("Should not send lang activation with no jobs");
-                        }
-                        Some(ActivationOrAuto::LangActivation(activation))
-                    }
-                    Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
-                        if let Some(wft) = run_handle.wft.as_mut() {
-                            put_queries_in_act(&mut act, wft);
-                            Some(ActivationOrAuto::LangActivation(act))
-                        } else {
-                            dbg_panic!("Ready for queries but no WFT!");
-                            None
-                        }
-                    }
-                    a @ Some(ActivationOrAuto::Autocomplete { .. }) => a,
-                    None => {
-                        // If the response indicates there is no activation to send yet but there
-                        // is more pending work, we should check again.
-                        if run_handle.more_pending_work {
-                            run_handle.check_more_activations();
-                            None
-                        } else if let Some(reason) = run_handle.trying_to_evict.as_ref() {
-                            // If a run update came back and had nothing to do, but we're trying to
-                            // evict, just do that now as long as there's no other outstanding work.
-                            if run_handle.activation.is_none() && !run_handle.more_pending_work {
-                                let mut evict_act = create_evict_activation(
-                                    resp.run_id,
-                                    reason.message.clone(),
-                                    reason.reason,
-                                );
-                                evict_act.history_length =
-                                    run_handle.most_recently_processed_event_number as u32;
-                                Some(ActivationOrAuto::LangActivation(evict_act))
-                            } else {
-                                None
-                            }
-                        } else {
-                            None
-                        }
-                    }
-                };
-                if let Some(f) = resp.fulfillable_complete.take() {
-                    f.fulfill();
-                }
-                // After each run update, check if it's ready to handle any buffered poll
-                if matches!(&r, Some(ActivationOrAuto::Autocomplete { .. }) | None)
-                    && !run_handle.has_any_pending_work(false, true)
-                {
-                    if let Some(bufft) = run_handle.buffered_resp.take() {
-                        self.instantiate_or_update(bufft);
-                    }
-                }
-                r
-            }
-            RunUpdateResponseKind::Fail(fail) => {
-                if let Some(r) = self.runs.get_mut(&fail.run_id) {
-                    r.last_action_acked = true;
-                }
-                if let Some(resp_chan) = fail.completion_resp {
-                    // Automatically fail the workflow task in the event we couldn't update machines
-                    let fail_cause = if matches!(&fail.err, WFMachinesError::Nondeterminism(_)) {
-                        WorkflowTaskFailedCause::NonDeterministicError
-                    } else {
-                        WorkflowTaskFailedCause::Unspecified
-                    };
-                    let wft_fail_str = format!("{:?}", fail.err);
-                    self.failed_completion(
-                        fail.run_id,
-                        fail_cause,
-                        fail.err.evict_reason(),
-                        TFailure::application_failure(wft_fail_str, false).into(),
-                        resp_chan,
-                    );
-                } else {
-                    // TODO: This should probably also fail workflow tasks, but that wasn't
-                    //  implemented pre-refactor either.
-                    warn!(error=?fail.err, run_id=%fail.run_id, "Error while updating workflow");
-                    self.request_eviction(RequestEvictMsg {
-                        run_id: fail.run_id,
-                        message: format!("Error while updating workflow: {:?}", fail.err),
-                        reason: fail.err.evict_reason(),
-                    });
-                }
-                None
+    /// Instantiate or update run machines with a new WFT
+    #[instrument(skip(self, pwft)
+                 fields(run_id=%pwft.work.execution.run_id,
+                        workflow_id=%pwft.work.execution.workflow_id))]
+    fn instantiate_or_update(&mut self, pwft: PermittedWFT) -> RunUpdateAct {
+        match self._instantiate_or_update(pwft) {
+            Err(histfetch) => {
+                self.runs_needing_fetching.push_back(histfetch);
+                Default::default()
             }
+            Ok(r) => r,
         }
     }
-    #[instrument(level = "debug", skip(self, pwft),
-                 fields(run_id=%pwft.wft.workflow_execution.run_id))]
-    fn instantiate_or_update(&mut self, pwft: PermittedWFT) {
-        let (mut work, permit) = if let Some(w) = self.buffer_resp_if_outstanding_work(pwft) {
-            (w.wft, w.permit)
+    fn _instantiate_or_update(
+        &mut self,
+        pwft: PermittedWFT,
+    ) -> Result<RunUpdateAct, HistoryFetchReq> {
+        // If the run already exists, possibly buffer the work and return early if we can't handle
+        // it yet.
+        let pwft = if let Some(rh) = self.runs.get_mut(&pwft.work.execution.run_id) {
+            if let Some(w) = rh.buffer_wft_if_outstanding_work(pwft) {
+                w
+            } else {
+                return Ok(None);
+            }
         } else {
-            return;
+            pwft
         };
-        let run_id = work.workflow_execution.run_id.clone();
+        let run_id = pwft.work.execution.run_id.clone();
         // If our cache is full and this WFT is for an unseen run we must first evict a run before
         // we can deal with this task. So, buffer the task in that case.
         if !self.runs.has_run(&run_id) && self.runs.is_full() {
-            self.buffer_resp_on_full_cache(PermittedWFT { wft: work, permit });
-            return;
+            self.buffer_resp_on_full_cache(pwft);
+            return Ok(None);
         }
-        let start_event_id = work.history.events.first().map(|e| e.event_id);
-        debug!(
-            run_id = %run_id,
-            task_token = %&work.task_token,
-            history_length = %work.history.events.len(),
-            start_event_id = ?start_event_id,
-            has_legacy_query = %work.legacy_query.is_some(),
-            attempt = %work.attempt,
-            "Applying new workflow task from server"
-        );
-        let wft_info = WorkflowTaskInfo {
-            attempt: work.attempt,
-            task_token: work.task_token,
-        };
-        let poll_resp_is_incremental = work
-            .history
-            .events
-            .get(0)
-            .map(|ev| ev.event_id > 1)
-            .unwrap_or_default();
-        let poll_resp_is_incremental = poll_resp_is_incremental || work.history.events.is_empty();
-        let mut did_miss_cache = !poll_resp_is_incremental;
-        let page_token = if !self.runs.has_run(&run_id) && poll_resp_is_incremental {
+        // This check can't really be lifted up higher since we could EX: See it's in the cache,
+        // not fetch more history, send the task, see cache is full, buffer it, then evict that
+        // run, and now we still have a cache miss.
+        if !self.runs.has_run(&run_id) && pwft.work.is_incremental() {
             debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
                    cache. Will fetch history");
             self.metrics.sticky_cache_miss();
-            did_miss_cache = true;
-            NextPageToken::FetchFromStart
-        } else {
-            work.next_page_token.into()
-        };
-        let history_update = HistoryUpdate::new(
-            HistoryPaginator::new(
-                work.history,
-                work.workflow_execution.workflow_id.clone(),
-                run_id.clone(),
-                page_token,
-                self.client.clone(),
-            ),
-            work.previous_started_event_id,
-        );
-        let legacy_query_from_poll = work
-            .legacy_query
-            .take()
-            .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
-        let mut pending_queries = work.query_requests.into_iter().collect::<Vec<_>>();
-        if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
-            error!(
-                "Server issued both normal and legacy queries. This should not happen. Please \
-                 file a bug report."
-            );
-            self.request_eviction(RequestEvictMsg {
-                run_id,
-                message: "Server issued both normal and legacy query".to_string(),
-                reason: EvictionReason::Fatal,
-            });
-            return;
-        }
-        if let Some(lq) = legacy_query_from_poll {
-            pending_queries.push(lq);
+            return Err(HistoryFetchReq::Full(
+                CacheMissFetchReq { original_wft: pwft },
+                self.history_fetch_refcounter.clone(),
+            ));
         }
-        let start_time = Instant::now();
-        let run_handle = self.runs.instantiate_or_update(
-            &run_id,
-            &work.workflow_execution.workflow_id,
-            &work.workflow_type,
-            history_update,
-            start_time,
-        );
-        run_handle.wft = Some(OutstandingTask {
-            info: wft_info,
-            hit_cache: !did_miss_cache,
-            pending_queries,
-            start_time,
-            permit,
-        })
+        let rur = self.runs.instantiate_or_update(pwft);
+        Ok(rur)
     }
-    #[instrument(level = "debug", skip(self, complete),
-                 fields(run_id=%complete.completion.run_id()))]
-    fn process_completion(&mut self, complete: WFActCompleteMsg) {
-        match complete.completion {
-            ValidatedCompletion::Success { run_id, commands } => {
-                self.successful_completion(run_id, commands, complete.response_tx);
-            }
-            ValidatedCompletion::Fail { run_id, failure } => {
-                self.failed_completion(
-                    run_id,
-                    WorkflowTaskFailedCause::Unspecified,
+    fn process_completion(&mut self, complete: NewOrFetchedComplete) -> Vec<ActivationOrAuto> {
+        let rh = if let Some(rh) = self.runs.get_mut(complete.run_id()) {
+            rh
+        } else {
+            dbg_panic!("Run missing during completion {:?}", complete);
+            return vec![];
+        };
+        let mut acts: Vec<_> = match complete {
+            NewOrFetchedComplete::New(complete) => match complete.completion {
+                ValidatedCompletion::Success {
+                    commands,
+                    used_flags,
+                    ..
+                } => match rh.successful_completion(commands, used_flags, complete.response_tx) {
+                    Ok(acts) => acts,
+                    Err(npr) => {
+                        self.runs_needing_fetching
+                            .push_back(HistoryFetchReq::NextPage(
+                                npr,
+                                self.history_fetch_refcounter.clone(),
+                            ));
+                        None
+                    }
+                },
+                ValidatedCompletion::Fail { failure, .. } => rh.failed_completion(
+                    failure.force_cause(),
                     EvictionReason::LangFail,
                     failure,
                     complete.response_tx,
-                );
+                ),
+            },
+            NewOrFetchedComplete::Fetched(update, paginator) => {
+                rh.fetched_page_completion(update, paginator)
             }
         }
+        .into_iter()
+        .collect();
         // Always queue evictions after completion when we have a zero-size cache
         if self.runs.cache_capacity() == 0 {
-            self.request_eviction_of_lru_run();
+            acts.extend(self.request_eviction_of_lru_run().into_run_update_resp())
         }
+        acts
     }
-    fn successful_completion(
-        &mut self,
-        run_id: String,
-        mut commands: Vec<WFCommand>,
-        resp_chan: oneshot::Sender<ActivationCompleteResult>,
-    ) {
-        let activation_was_only_eviction = self.activation_has_only_eviction(&run_id);
-        let (task_token, has_pending_query, start_time) =
-            if let Some(entry) = self.get_task(&run_id) {
-                (
-                    entry.info.task_token.clone(),
-                    !entry.pending_queries.is_empty(),
-                    entry.start_time,
-                )
-            } else {
-                if !activation_was_only_eviction {
-                    // Not an error if this was an eviction, since it's normal to issue eviction
-                    // activations without an associated workflow task in that case.
-                    dbg_panic!(
-                    "Attempted to complete activation for run {} without associated workflow task",
+    fn process_post_activation(&mut self, report: PostActivationMsg) -> RunUpdateAct {
+        let run_id = &report.run_id;
+        let wft_from_complete = report.wft_from_complete;
+        if let Some((wft, _)) = &wft_from_complete {
+            if &wft.execution.run_id != run_id {
+                dbg_panic!(
+                    "Server returned a WFT on completion for a different run ({}) than the \
+                     one being completed ({}). This is a server bug.",
+                    wft.execution.run_id,
                     run_id
-                    );
-                }
-                self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
-                return;
-            };
-        // If the only command from the activation is a legacy query response, that means we need
-        // to respond differently than a typical activation.
-        if matches!(&commands.as_slice(),
-                    &[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
-        {
-            let qr = match commands.remove(0) {
-                WFCommand::QueryResponse(qr) => qr,
-                _ => unreachable!("We just verified this is the only command"),
-            };
-            self.reply_to_complete(
-                &run_id,
-                ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
-                    task_token,
-                    action: ActivationAction::RespondLegacyQuery {
-                        result: Box::new(qr),
-                    },
-                }),
-                resp_chan,
-            );
-        } else {
-            // First strip out query responses from other commands that actually affect machines
-            // Would be prettier with `drain_filter`
-            let mut i = 0;
-            let mut query_responses = vec![];
-            while i < commands.len() {
-                if matches!(commands[i], WFCommand::QueryResponse(_)) {
-                    if let WFCommand::QueryResponse(qr) = commands.remove(i) {
-                        query_responses.push(qr);
-                    }
-                } else {
-                    i += 1;
-                }
-            }
-            let activation_was_eviction = self.activation_has_eviction(&run_id);
-            if let Some(rh) = self.runs.get_mut(&run_id) {
-                rh.send_completion(RunActivationCompletion {
-                    task_token,
-                    start_time,
-                    commands,
-                    activation_was_eviction,
-                    activation_was_only_eviction,
-                    has_pending_query,
-                    query_responses,
-                    resp_chan: Some(resp_chan),
-                });
-            } else {
-                dbg_panic!("Run {} missing during completion", run_id);
+                );
             }
-        };
-    }
-    fn failed_completion(
-        &mut self,
-        run_id: String,
-        cause: WorkflowTaskFailedCause,
-        reason: EvictionReason,
-        failure: Failure,
-        resp_chan: oneshot::Sender<ActivationCompleteResult>,
-    ) {
-        let tt = if let Some(tt) = self.get_task(&run_id).map(|t| t.info.task_token.clone()) {
-            tt
-        } else {
-            dbg_panic!(
-                "No workflow task for run id {} found when trying to fail activation",
-                run_id
-            );
-            self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
-            return;
-        };
-        if let Some(m) = self.run_metrics(&run_id) {
-            m.wf_task_failed();
         }
-        let message = format!("Workflow activation completion failed: {:?}", &failure);
-        // Blow up any cached data associated with the workflow
-        let should_report = match self.request_eviction(RequestEvictMsg {
-            run_id: run_id.clone(),
-            message,
-            reason,
-        }) {
-            EvictionRequestResult::EvictionRequested(Some(attempt))
-            | EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
-            _ => false,
-        };
-        // If the outstanding WFT is a legacy query task, report that we need to fail it
-        let outcome = if self
-            .runs
-            .get(&run_id)
-            .map(|rh| rh.pending_work_is_legacy_query())
-            .unwrap_or_default()
-        {
-            ActivationCompleteOutcome::ReportWFTFail(
-                FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
-            )
-        } else if should_report {
-            ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
-                tt, cause, failure,
-            ))
-        } else {
-            ActivationCompleteOutcome::DoNothing
-        };
-        self.reply_to_complete(&run_id, outcome, resp_chan);
-    }
-    fn process_post_activation(&mut self, report: PostActivationMsg) {
-        let run_id = &report.run_id;
+        let mut res = None;
         // If we reported to server, we always want to mark it complete.
-        let maybe_t = self.complete_wft(run_id, report.reported_wft_to_server);
+        let maybe_t = self.complete_wft(run_id, report.wft_report_status);
+        // Delete the activation
+        let activation = self
+            .runs
+            .get_mut(run_id)
+            .and_then(|rh| rh.delete_activation());
+        // Evict the run if the activation contained an eviction
+        let mut applied_buffered_poll_for_this_run = false;
+        if activation.map(|a| a.has_eviction()).unwrap_or_default() {
+            debug!(run_id=%run_id, "Evicting run");
+            if let Some(mut rh) = self.runs.remove(run_id) {
+                if let Some(buff) = rh.take_buffered_wft() {
+                    // Don't try to apply a buffered poll for this run if we just got a new WFT
+                    // from completing, because by definition that buffered poll is now an
+                    // out-of-date WFT.
+                    if wft_from_complete.is_none() {
+                        res = self.instantiate_or_update(buff);
+                        applied_buffered_poll_for_this_run = true;
+                    }
+                }
+            }
-        if self
-            .get_activation(run_id)
-            .map(|a| a.has_eviction())
-            .unwrap_or_default()
-        {
-            self.evict_run(run_id);
+            // Attempt to apply a buffered poll for some *other* run, if we didn't have a wft
+            // from complete or a buffered poll for *this* run.
+            if wft_from_complete.is_none() && !applied_buffered_poll_for_this_run {
+                if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
+                    res = self.instantiate_or_update(buff);
+                }
+            }
         };
-        if let Some(wft) = report.wft_from_complete {
-            debug!(run_id=%wft.workflow_execution.run_id, "New WFT from completion");
+        if let Some((wft, pag)) = wft_from_complete {
+            debug!(run_id=%wft.execution.run_id, "New WFT from completion");
             if let Some(t) = maybe_t {
-                self.instantiate_or_update(PermittedWFT {
-                    wft,
+                res = self.instantiate_or_update(PermittedWFT {
+                    work: wft,
                     permit: t.permit,
-                })
+                    paginator: pag,
+                });
             }
         }
-        if let Some(rh) = self.runs.get_mut(run_id) {
-            // Delete the activation
-            rh.activation.take();
-            // Attempt to produce the next activation if needed
-            rh.check_more_activations();
+        if res.is_none() {
+            if let Some(rh) = self.runs.get_mut(run_id) {
+                // Attempt to produce the next activation if needed
+                res = rh.check_more_activations();
+            }
         }
+        res
     }
-    fn local_resolution(&mut self, msg: LocalResolutionMsg) {
+    fn local_resolution(&mut self, msg: LocalResolutionMsg) -> RunUpdateAct {
         let run_id = msg.run_id;
         if let Some(rh) = self.runs.get_mut(&run_id) {
-            rh.send_local_resolution(msg.res)
+            rh.local_resolution(msg.res)
         } else {
             // It isn't an explicit error if the machine is missing when a local activity resolves.
             // This can happen if an activity reports a timeout after we stopped caring about it.
             debug!(run_id = %run_id,
                    "Tried to resolve a local activity for a run we are no longer tracking");
+            None
+        }
+    }
+    fn process_heartbeat_timeout(&mut self, run_id: String) -> RunUpdateAct {
+        if let Some(rh) = self.runs.get_mut(&run_id) {
+            rh.heartbeat_timeout()
+        } else {
+            None
         }
     }
@@ -679,19 +410,10 @@ impl WFStream {
     /// activation to evict the workflow from the lang side. Workflow will not *actually* be evicted
     /// until lang replies to that activation
     fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
-        let activation_has_eviction = self.activation_has_eviction(&info.run_id);
         if let Some(rh) = self.runs.get_mut(&info.run_id) {
-            let attempts = rh.wft.as_ref().map(|wt| wt.info.attempt);
-            if !activation_has_eviction && rh.trying_to_evict.is_none() {
-                debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
-                rh.trying_to_evict = Some(info);
-                rh.check_more_activations();
-                EvictionRequestResult::EvictionRequested(attempts)
-            } else {
-                EvictionRequestResult::EvictionAlreadyRequested(attempts)
-            }
+            rh.request_eviction(info)
         } else {
-            warn!(run_id=%info.run_id, "Eviction requested for unknown run");
+            debug!(run_id=%info.run_id, "Eviction requested for unknown run");
             EvictionRequestResult::NotFound
         }
     }
@@ -710,36 +432,10 @@ impl WFStream {
         }
     }
-    /// Evict a workflow from the cache by its run id. Any existing pending activations will be
-    /// destroyed, and any outstanding activations invalidated.
-    fn evict_run(&mut self, run_id: &str) {
-        debug!(run_id=%run_id, "Evicting run");
-        let mut did_take_buff = false;
-        // Now it can safely be deleted, it'll get recreated once the un-buffered poll is handled if
-        // there was one.
-        if let Some(mut rh) = self.runs.remove(run_id) {
-            rh.handle.abort();
-            if let Some(buff) = rh.buffered_resp.take() {
-                self.instantiate_or_update(buff);
-                did_take_buff = true;
-            }
-        }
-        if !did_take_buff {
-            // If there wasn't a buffered poll, there might be one for a different run which needs
-            // a free cache slot, and now there is.
-            if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
-                self.instantiate_or_update(buff);
-            }
-        }
-    }
     fn complete_wft(
         &mut self,
         run_id: &str,
-        reported_wft_to_server: bool,
+        wft_report_status: WFTReportStatus,
     ) -> Option<OutstandingTask> {
         // If the WFT completion wasn't sent to the server, but we did see the final event, we still
         // want to clear the workflow task. This can really only happen in replay testing, where we
@@ -749,9 +445,9 @@ impl WFStream {
         let saw_final = self
             .runs
             .get(run_id)
-            .map(|r| r.have_seen_terminal_event)
+            .map(|r| r.have_seen_terminal_event())
             .unwrap_or_default();
-        if !saw_final && !reported_wft_to_server {
+        if !saw_final && matches!(wft_report_status, WFTReportStatus::NotReported) {
             return None;
         }
@@ -759,60 +455,26 @@ impl WFStream {
             // Can't mark the WFT complete if there are pending queries, as doing so would destroy
             // them.
             if rh
-                .wft
-                .as_ref()
+                .wft()
                 .map(|wft| !wft.pending_queries.is_empty())
                 .unwrap_or_default()
             {
                 return None;
             }
-            debug!("Marking WFT completed");
-            let retme = rh.wft.take();
-            if let Some(ot) = &retme {
-                if let Some(m) = self.run_metrics(run_id) {
-                    m.wf_task_latency(ot.start_time.elapsed());
-                }
-            }
-            retme
+            rh.mark_wft_complete(wft_report_status)
         } else {
             None
         }
     }
-    /// Stores some work if there is any outstanding WFT or activation for the run. If there was
-    /// not, returns the work back out inside the option.
-    fn buffer_resp_if_outstanding_work(&mut self, work: PermittedWFT) -> Option<PermittedWFT> {
-        let run_id = &work.wft.workflow_execution.run_id;
-        if let Some(mut run) = self.runs.get_mut(run_id) {
-            let about_to_issue_evict = run.trying_to_evict.is_some() && !run.last_action_acked;
-            let has_wft = run.wft.is_some();
-            let has_activation = run.activation.is_some();
-            if has_wft
-                || has_activation
-                || about_to_issue_evict
-                || run.more_pending_work
-                || !run.last_action_acked
-            {
-                debug!(run_id = %run_id, run = ?run,
-                       "Got new WFT for a run with outstanding work, buffering it");
-                run.buffered_resp = Some(work);
-                None
-            } else {
-                Some(work)
-            }
-        } else {
-            Some(work)
-        }
-    }
     fn buffer_resp_on_full_cache(&mut self, work: PermittedWFT) {
-        debug!(run_id=%work.wft.workflow_execution.run_id, "Buffering WFT because cache is full");
+        debug!(run_id=%work.work.execution.run_id, "Buffering WFT because cache is full");
         // If there's already a buffered poll for the run, replace it.
         if let Some(rh) = self
             .buffered_polls_need_cache_slot
             .iter_mut()
-            .find(|w| w.wft.workflow_execution.run_id == work.wft.workflow_execution.run_id)
+            .find(|w| w.work.execution.run_id == work.work.execution.run_id)
         {
             *rh = work;
         } else {
@@ -823,7 +485,7 @@ impl WFStream {
     /// Makes sure we have enough pending evictions to fulfill the needs of buffered WFTs who are
     /// waiting on a cache slot
-    fn reconcile_buffered(&mut self) {
+    fn reconcile_buffered(&mut self) -> Vec<ActivationOrAuto> {
         // We must ensure that there are at least as many pending evictions as there are tasks
         // that we might need to un-buffer (skipping runs which already have buffered tasks for
         // themselves)
@@ -832,109 +494,222 @@ impl WFStream {
         let num_existing_evictions = self
             .runs
             .runs_lru_order()
-            .filter(|(_, h)| h.trying_to_evict.is_some())
+            .filter(|(_, h)| h.is_trying_to_evict())
             .count();
         let mut num_evicts_needed = num_in_buff.saturating_sub(num_existing_evictions);
         for (rid, handle) in self.runs.runs_lru_order() {
             if num_evicts_needed == 0 {
                 break;
             }
-            if handle.buffered_resp.is_none() {
+            if !handle.has_buffered_wft() {
                 num_evicts_needed -= 1;
                 evict_these.push(rid.to_string());
             }
         }
+        let mut acts = vec![];
         for run_id in evict_these {
-            self.request_eviction(RequestEvictMsg {
-                run_id,
-                message: "Workflow cache full".to_string(),
-                reason: EvictionReason::CacheFull,
-            });
+            acts.extend(
+                self.request_eviction(RequestEvictMsg {
+                    run_id,
+                    message: "Workflow cache full".to_string(),
+                    reason: EvictionReason::CacheFull,
+                })
+                .into_run_update_resp(),
+            );
         }
-    }
-    fn reply_to_complete(
-        &self,
-        run_id: &str,
-        outcome: ActivationCompleteOutcome,
-        chan: oneshot::Sender<ActivationCompleteResult>,
-    ) {
-        let most_recently_processed_event = self
-            .runs
-            .peek(run_id)
-            .map(|rh| rh.most_recently_processed_event_number)
-            .unwrap_or_default();
-        chan.send(ActivationCompleteResult {
-            most_recently_processed_event,
-            outcome,
-        })
-        .expect("Rcv half of activation reply not dropped");
+        acts
     }
     fn shutdown_done(&self) -> bool {
-        let all_runs_ready = self
-            .runs
-            .handles()
-            .all(|r| !r.has_any_pending_work(true, false));
-        if self.shutdown_token.is_cancelled() && all_runs_ready {
-            info!("Workflow shutdown is done");
-            true
-        } else {
-            false
+        if self.shutdown_token.is_cancelled() {
+            if Arc::strong_count(&self.history_fetch_refcounter) > 1 {
+                // Don't exit if there are outstanding fetch requests
+                return false;
+            }
+            let all_runs_ready = self
+                .runs
+                .handles()
+                .all(|r| !r.has_any_pending_work(self.ignore_evicts_on_shutdown, false));
+            if all_runs_ready {
+                return true;
+            }
         }
-    }
-    fn get_task(&mut self, run_id: &str) -> Option<&OutstandingTask> {
-        self.runs.get(run_id).and_then(|rh| rh.wft.as_ref())
-    }
-    fn get_activation(&mut self, run_id: &str) -> Option<&OutstandingActivation> {
-        self.runs.get(run_id).and_then(|rh| rh.activation.as_ref())
-    }
-    fn run_metrics(&mut self, run_id: &str) -> Option<&MetricsContext> {
-        self.runs.get(run_id).map(|r| &r.metrics)
-    }
-    fn activation_has_only_eviction(&mut self, run_id: &str) -> bool {
-        self.runs
-            .get(run_id)
-            .and_then(|rh| rh.activation)
-            .map(OutstandingActivation::has_only_eviction)
-            .unwrap_or_default()
-    }
-    fn activation_has_eviction(&mut self, run_id: &str) -> bool {
-        self.runs
-            .get(run_id)
-            .and_then(|rh| rh.activation)
-            .map(OutstandingActivation::has_eviction)
-            .unwrap_or_default()
+        false
     }
     fn outstanding_wfts(&self) -> usize {
-        self.runs.handles().filter(|r| r.wft.is_some()).count()
+        self.runs.handles().filter(|r| r.wft().is_some()).count()
     }
     // Useful when debugging
     #[allow(dead_code)]
     fn info_dump(&self, run_id: &str) {
         if let Some(r) = self.runs.peek(run_id) {
-            info!(run_id, wft=?r.wft, activation=?r.activation, buffered=r.buffered_resp.is_some(),
-                  trying_to_evict=r.trying_to_evict.is_some(), more_work=r.more_pending_work,
-                  last_action_acked=r.last_action_acked);
+            info!(run_id, wft=?r.wft(), activation=?r.activation(),
+                  buffered_wft=r.has_buffered_wft(),
+                  trying_to_evict=r.is_trying_to_evict(), more_work=r.more_pending_work());
         } else {
             info!(run_id, "Run not found");
         }
     }
 }
-/// Drains pending queries from the workflow task and appends them to the activation's jobs
-fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
-    debug!(queries=?wft.pending_queries, "Dispatching queries");
-    let query_jobs = wft
-        .pending_queries
-        .drain(..)
-        .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
-    act.jobs.extend(query_jobs);
+/// All possible inputs to the [WFStream]
+#[derive(derive_more::From, Debug)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
+enum WFStreamInput {
+    NewWft(PermittedWFT),
+    Local(LocalInput),
+    /// The stream given to us which represents the poller (or a mock) terminated.
+    PollerDead,
+    /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
+    /// error while polling
+    PollerError(
+        #[cfg_attr(
+            feature = "save_wf_inputs",
+            serde(with = "tonic_status_serde::SerdeStatus")
+        )]
+        tonic::Status,
+    ),
+    FailedFetch {
+        run_id: String,
+        #[cfg_attr(
+            feature = "save_wf_inputs",
+            serde(with = "tonic_status_serde::SerdeStatus")
+        )]
+        err: tonic::Status,
+    },
+}
+/// A non-poller-received input to the [WFStream]
+#[derive(derive_more::DebugCustom)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
+#[debug(fmt = "LocalInput {{ {input:?} }}")]
+pub(super) struct LocalInput {
+    pub input: LocalInputs,
+    #[cfg_attr(feature = "save_wf_inputs", serde(skip, default = "Span::current"))]
+    pub span: Span,
+}
+impl From<HeartbeatTimeoutMsg> for LocalInput {
+    fn from(hb: HeartbeatTimeoutMsg) -> Self {
+        Self {
+            input: LocalInputs::HeartbeatTimeout(hb.run_id),
+            span: hb.span,
+        }
+    }
+}
+/// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
+/// new polls.
+#[derive(Debug, derive_more::From)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
+pub(super) enum LocalInputs {
+    Completion(WFActCompleteMsg),
+    FetchedPageCompletion {
+        paginator: HistoryPaginator,
+        update: HistoryUpdate,
+    },
+    LocalResolution(LocalResolutionMsg),
+    PostActivation(PostActivationMsg),
+    RequestEviction(RequestEvictMsg),
+    HeartbeatTimeout(String),
+    #[cfg_attr(feature = "save_wf_inputs", serde(skip))]
+    GetStateInfo(GetStateInfoMsg),
+}
+impl LocalInputs {
+    fn run_id(&self) -> Option<&str> {
+        Some(match self {
+            LocalInputs::Completion(c) => c.completion.run_id(),
+            LocalInputs::FetchedPageCompletion { paginator, .. } => &paginator.run_id,
+            LocalInputs::LocalResolution(lr) => &lr.run_id,
+            LocalInputs::PostActivation(pa) => &pa.run_id,
+            LocalInputs::RequestEviction(re) => &re.run_id,
+            LocalInputs::HeartbeatTimeout(hb) => hb,
+            LocalInputs::GetStateInfo(_) => return None,
+        })
+    }
+}
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
+enum ExternalPollerInputs {
+    NewWft(PermittedWFT),
+    PollerDead,
+    PollerError(tonic::Status),
+    FetchedUpdate(PermittedWFT),
+    NextPage {
+        paginator: HistoryPaginator,
+        update: HistoryUpdate,
+        span: Span,
+    },
+    FailedFetch {
+        run_id: String,
+        err: tonic::Status,
+    },
+}
+impl From<ExternalPollerInputs> for WFStreamInput {
+    fn from(l: ExternalPollerInputs) -> Self {
+        match l {
+            ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
+            ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
+            ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
+            ExternalPollerInputs::FetchedUpdate(wft) => WFStreamInput::NewWft(wft),
+            ExternalPollerInputs::FailedFetch { run_id, err } => {
+                WFStreamInput::FailedFetch { run_id, err }
+            }
+            ExternalPollerInputs::NextPage {
+                paginator,
+                update,
+                span,
+            } => WFStreamInput::Local(LocalInput {
+                input: LocalInputs::FetchedPageCompletion { paginator, update },
+                span,
+            }),
+        }
+    }
+}
+impl From<Result<WFTExtractorOutput, tonic::Status>> for ExternalPollerInputs {
+    fn from(v: Result<WFTExtractorOutput, tonic::Status>) -> Self {
+        match v {
+            Ok(WFTExtractorOutput::NewWFT(pwft)) => ExternalPollerInputs::NewWft(pwft),
+            Ok(WFTExtractorOutput::FetchResult(updated_wft, _)) => {
+                ExternalPollerInputs::FetchedUpdate(updated_wft)
+            }
+            Ok(WFTExtractorOutput::NextPage {
+                paginator,
+                update,
+                span,
+                rc: _rc,
+            }) => ExternalPollerInputs::NextPage {
+                paginator,
+                update,
+                span,
+            },
+            Ok(WFTExtractorOutput::FailedFetch { run_id, err }) => {
+                ExternalPollerInputs::FailedFetch { run_id, err }
+            }
+            Ok(WFTExtractorOutput::PollerDead) => ExternalPollerInputs::PollerDead,
+            Err(e) => ExternalPollerInputs::PollerError(e),
+        }
+    }
+}
+#[derive(Debug)]
+enum NewOrFetchedComplete {
+    New(WFActCompleteMsg),
+    Fetched(HistoryUpdate, HistoryPaginator),
+}
+impl NewOrFetchedComplete {
+    fn run_id(&self) -> &str {
+        match self {
+            NewOrFetchedComplete::New(c) => c.completion.run_id(),
+            NewOrFetchedComplete::Fetched(_, p) => &p.run_id,
+        }
+    }
 }