RubyGems - temporalio - Versions diffs - 0.0.2 → 0.1.0 - Mend

temporalio 0.0.2 → 0.1.0

Files changed (202) hide show

data/bridge/sdk-core/core/src/worker/workflow/mod.rs CHANGED Viewed

@@ -8,40 +8,58 @@ mod history_update;
 mod machines;
 mod managed_run;
 mod run_cache;
+mod wft_extraction;
 pub(crate) mod wft_poller;
 mod workflow_stream;
+#[cfg(feature = "save_wf_inputs")]
+pub use workflow_stream::replay_wf_state_inputs;
 pub(crate) use bridge::WorkflowBridge;
 pub(crate) use driven_workflow::{DrivenWorkflow, WorkflowFetcher};
-pub(crate) use history_update::{HistoryPaginator, HistoryUpdate};
-pub(crate) use machines::WFMachinesError;
+pub(crate) use history_update::HistoryUpdate;
 #[cfg(test)]
 pub(crate) use managed_run::ManagedWFFunc;
+use crate::worker::activities::TrackedPermittedTqResp;
 use crate::{
-    abstractions::OwnedMeteredSemPermit,
-    protosext::{legacy_query_failure, ValidPollWFTQResponse, WorkflowActivationExt},
-    telemetry::VecDisplayer,
+    abstractions::{
+        stream_when_allowed, MeteredSemaphore, TrackedOwnedMeteredSemPermit, UsedMeteredSemPermit,
+    },
+    internal_flags::InternalFlags,
+    protosext::{legacy_query_failure, ValidPollWFTQResponse},
+    telemetry::{
+        metrics::workflow_worker_type, set_trace_subscriber_for_current_thread, TelemetryInstance,
+        VecDisplayer,
+    },
     worker::{
-        activities::{ActivitiesFromWFTsHandle, PermittedTqResp},
+        activities::{ActivitiesFromWFTsHandle, LocalActivityManager},
         client::{WorkerClient, WorkflowTaskCompletion},
         workflow::{
-            managed_run::{ManagedRun, WorkflowManager},
+            history_update::HistoryPaginator,
+            managed_run::RunUpdateAct,
+            wft_extraction::{HistoryFetchReq, WFTExtractor},
             wft_poller::validate_wft,
             workflow_stream::{LocalInput, LocalInputs, WFStream},
         },
-        LocalActRequest, LocalActivityResolution,
+        LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
     },
     MetricsContext,
 };
+use anyhow::anyhow;
 use futures::{stream::BoxStream, Stream, StreamExt};
+use futures_util::{future::abortable, stream};
+use prost_types::TimestampError;
 use std::{
-    collections::HashSet,
-    fmt::{Debug, Display, Formatter},
+    cell::RefCell,
+    collections::VecDeque,
+    fmt::Debug,
     future::Future,
     ops::DerefMut,
+    rc::Rc,
     result,
     sync::Arc,
+    thread,
     time::{Duration, Instant},
 };
 use temporal_sdk_core_api::errors::{CompleteWfError, PollWfError};
@@ -59,36 +77,41 @@ use temporal_sdk_core_protos::{
     },
     temporal::api::{
         command::v1::{command::Attributes, Command as ProtoCommand, Command},
-        common::v1::{Memo, RetryPolicy, SearchAttributes},
+        common::v1::{Memo, MeteringMetadata, RetryPolicy, SearchAttributes, WorkflowExecution},
         enums::v1::WorkflowTaskFailedCause,
+        query::v1::WorkflowQuery,
+        sdk::v1::WorkflowTaskCompletedMetadata,
         taskqueue::v1::StickyExecutionAttributes,
-        workflowservice::v1::PollActivityTaskQueueResponse,
+        workflowservice::v1::{get_system_info_response, PollActivityTaskQueueResponse},
     },
     TaskToken,
 };
 use tokio::{
     sync::{
-        mpsc::{unbounded_channel, UnboundedSender},
+        mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
         oneshot,
     },
-    task,
-    task::{JoinError, JoinHandle},
+    task::{spawn_blocking, LocalSet},
 };
 use tokio_stream::wrappers::UnboundedReceiverStream;
 use tokio_util::sync::CancellationToken;
 use tracing::Span;
 pub(crate) const LEGACY_QUERY_ID: &str = "legacy_query";
+/// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
+/// necessary.
+const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
 const MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK: usize = 3;
 type Result<T, E = WFMachinesError> = result::Result<T, E>;
 type BoxedActivationStream = BoxStream<'static, Result<ActivationOrAuto, PollWfError>>;
+type InternalFlagsRef = Rc<RefCell<InternalFlags>>;
 /// Centralizes all state related to workflows and workflow tasks
 pub(crate) struct Workflows {
     task_queue: String,
     local_tx: UnboundedSender<LocalInput>,
-    processing_task: tokio::sync::Mutex<Option<JoinHandle<()>>>,
+    processing_task: tokio::sync::Mutex<Option<thread::JoinHandle<()>>>,
     activation_stream: tokio::sync::Mutex<(
         BoxedActivationStream,
         // Used to indicate polling may begin
@@ -100,9 +123,12 @@ pub(crate) struct Workflows {
     sticky_attrs: Option<StickyExecutionAttributes>,
     /// If set, can be used to reserve activity task slots for eager-return of new activity tasks.
     activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
+    /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
+    wft_semaphore: MeteredSemaphore,
+    local_act_mgr: Arc<LocalActivityManager>,
 }
-pub(super) struct WorkflowBasics {
+pub(crate) struct WorkflowBasics {
     pub max_cached_workflows: usize,
     pub max_outstanding_wfts: usize,
     pub shutdown_token: CancellationToken,
@@ -110,53 +136,115 @@ pub(super) struct WorkflowBasics {
     pub namespace: String,
     pub task_queue: String,
     pub ignore_evicts_on_shutdown: bool,
+    pub fetching_concurrency: usize,
+    pub server_capabilities: get_system_info_response::Capabilities,
+    #[cfg(feature = "save_wf_inputs")]
+    pub wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
+}
+pub(crate) struct RunBasics<'a> {
+    pub namespace: String,
+    pub workflow_id: String,
+    pub workflow_type: String,
+    pub run_id: String,
+    pub history: HistoryUpdate,
+    pub metrics: MetricsContext,
+    pub capabilities: &'a get_system_info_response::Capabilities,
 }
 impl Workflows {
+    #[allow(clippy::too_many_arguments)] // Not much worth combining here
     pub(super) fn new(
         basics: WorkflowBasics,
         sticky_attrs: Option<StickyExecutionAttributes>,
         client: Arc<dyn WorkerClient>,
         wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
-        local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
-            + Send
-            + Sync
-            + 'static,
+        local_activity_request_sink: impl LocalActivityRequestSink,
+        local_act_mgr: Arc<LocalActivityManager>,
+        heartbeat_timeout_rx: UnboundedReceiver<HeartbeatTimeoutMsg>,
         activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
+        telem_instance: Option<&TelemetryInstance>,
     ) -> Self {
         let (local_tx, local_rx) = unbounded_channel();
+        let (fetch_tx, fetch_rx) = unbounded_channel();
         let shutdown_tok = basics.shutdown_token.clone();
         let task_queue = basics.task_queue.clone();
-        let mut stream = WFStream::build(
-            basics,
+        let wft_semaphore = MeteredSemaphore::new(
+            basics.max_outstanding_wfts,
+            basics.metrics.with_new_attrs([workflow_worker_type()]),
+            MetricsContext::available_task_slots,
+        );
+        // Only allow polling of the new WFT stream if there are available task slots
+        let proceeder = stream::unfold(wft_semaphore.clone(), |sem| async move {
+            Some((sem.acquire_owned().await.unwrap(), sem))
+        });
+        let wft_stream = stream_when_allowed(wft_stream, proceeder);
+        let extracted_wft_stream = WFTExtractor::build(
+            client.clone(),
+            basics.fetching_concurrency,
             wft_stream,
+            UnboundedReceiverStream::new(fetch_rx),
+        );
+        let locals_stream = stream::select(
             UnboundedReceiverStream::new(local_rx),
-            client.clone(),
-            local_activity_request_sink,
+            UnboundedReceiverStream::new(heartbeat_timeout_rx).map(Into::into),
         );
         let (activation_tx, activation_rx) = unbounded_channel();
         let (start_polling_tx, start_polling_rx) = oneshot::channel();
         // We must spawn a task to constantly poll the activation stream, because otherwise
         // activation completions would not cause anything to happen until the next poll.
-        let processing_task = task::spawn(async move {
-            // However, we want to avoid plowing ahead until we've been asked to poll at least once.
-            // This supports activity-only workers.
-            let do_poll = tokio::select! {
-                sp = start_polling_rx => {
-                    sp.is_ok()
+        let tracing_sub = telem_instance.map(|ti| ti.trace_subscriber());
+        let processing_task = thread::spawn(move || {
+            if let Some(ts) = tracing_sub {
+                set_trace_subscriber_for_current_thread(ts);
+            }
+            let rt = tokio::runtime::Builder::new_current_thread()
+                .enable_all()
+                .thread_name("workflow-processing")
+                .build()
+                .unwrap();
+            let local = LocalSet::new();
+            local.block_on(&rt, async move {
+                let mut stream = WFStream::build(
+                    basics,
+                    extracted_wft_stream,
+                    locals_stream,
+                    local_activity_request_sink,
+                );
+                // However, we want to avoid plowing ahead until we've been asked to poll at least
+                // once. This supports activity-only workers.
+                let do_poll = tokio::select! {
+                    sp = start_polling_rx => {
+                        sp.is_ok()
+                    }
+                    _ = shutdown_tok.cancelled() => {
+                        false
+                    }
+                };
+                if !do_poll {
+                    return;
                 }
-                _ = shutdown_tok.cancelled() => {
-                    false
+                while let Some(output) = stream.next().await {
+                    match output {
+                        Ok(o) => {
+                            for fetchreq in o.fetch_histories {
+                                fetch_tx
+                                    .send(fetchreq)
+                                    .expect("Fetch channel must not be dropped");
+                            }
+                            for act in o.activations {
+                                activation_tx
+                                    .send(Ok(act))
+                                    .expect("Activation processor channel not dropped");
+                            }
+                        }
+                        Err(e) => activation_tx
+                            .send(Err(e))
+                            .expect("Activation processor channel not dropped"),
+                    }
                 }
-            };
-            if !do_poll {
-                return;
-            }
-            while let Some(act) = stream.next().await {
-                activation_tx
-                    .send(act)
-                    .expect("Activation processor channel not dropped");
-            }
+            });
         });
         Self {
             task_queue,
@@ -169,12 +257,14 @@ impl Workflows {
             client,
             sticky_attrs,
             activity_tasks_handle,
+            wft_semaphore,
+            local_act_mgr,
         }
     }
-    pub async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
+    pub(super) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
         loop {
-            let r = {
+            let al = {
                 let mut lock = self.activation_stream.lock().await;
                 let (ref mut stream, ref mut beginner) = lock.deref_mut();
                 if let Some(beginner) = beginner.take() {
@@ -182,17 +272,37 @@ impl Workflows {
                 }
                 stream.next().await.unwrap_or(Err(PollWfError::ShutDown))?
             };
-            Span::current().record("run_id", r.run_id());
-            match r {
+            Span::current().record("run_id", al.run_id());
+            match al {
                 ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
                     debug!(activation=%act, "Sending activation to lang");
                     break Ok(act);
                 }
                 ActivationOrAuto::Autocomplete { run_id } => {
-                    self.activation_completed(WorkflowActivationCompletion {
-                        run_id,
-                        status: Some(workflow_completion::Success::from_variants(vec![]).into()),
-                    })
+                    self.activation_completed(
+                        WorkflowActivationCompletion {
+                            run_id,
+                            status: Some(
+                                workflow_completion::Success::from_variants(vec![]).into(),
+                            ),
+                        },
+                        // We need to say a type, but the type is irrelevant, so imagine some
+                        // boxed function we'll never call.
+                        Option::<Box<dyn Fn(&str, usize) + Send>>::None,
+                    )
+                    .await?;
+                }
+                ActivationOrAuto::AutoFail {
+                    run_id,
+                    machines_err,
+                } => {
+                    self.activation_completed(
+                        WorkflowActivationCompletion {
+                            run_id,
+                            status: Some(auto_fail_to_complete_status(machines_err)),
+                        },
+                        Option::<Box<dyn Fn(&str, usize) + Send>>::None,
+                    )
                     .await?;
                 }
             }
@@ -202,10 +312,11 @@ impl Workflows {
     /// Queue an activation completion for processing, returning a future that will resolve with
     /// the outcome of that completion. See [ActivationCompletedOutcome].
     ///
-    /// Returns the most-recently-processed event number for the run
-    pub async fn activation_completed(
+    /// Returns the most-recently-processed event number for the run.
+    pub(super) async fn activation_completed(
         &self,
         completion: WorkflowActivationCompletion,
+        post_activate_hook: Option<impl Fn(&str, usize)>,
     ) -> Result<usize, CompleteWfError> {
         let is_empty_completion = completion.is_empty();
         let completion = validate_completion(completion)?;
@@ -213,7 +324,7 @@ impl Workflows {
         let (tx, rx) = oneshot::channel();
         let was_sent = self.send_local(WFActCompleteMsg {
             completion,
-            response_tx: tx,
+            response_tx: Some(tx),
         });
         if !was_sent {
             if is_empty_completion {
@@ -230,7 +341,7 @@ impl Workflows {
             .await
             .expect("Send half of activation complete response not dropped");
         let mut wft_from_complete = None;
-        let reported_wft_to_server = match completion_outcome.outcome {
+        let wft_report_status = match completion_outcome.outcome {
             ActivationCompleteOutcome::ReportWFTSuccess(report) => match report {
                 ServerCommandsWithWorkflowInfo {
                     task_token,
@@ -239,6 +350,7 @@ impl Workflows {
                             mut commands,
                             query_responses,
                             force_new_wft,
+                            sdk_metadata,
                         },
                 } => {
                     let reserved_act_permits =
@@ -252,6 +364,13 @@ impl Workflows {
                         sticky_attributes: None,
                         return_new_workflow_task: true,
                         force_create_new_workflow_task: force_new_wft,
+                        sdk_metadata,
+                        metering_metadata: MeteringMetadata {
+                            nonfirst_local_activity_execution_attempts: self
+                                .local_act_mgr
+                                .get_nonfirst_attempt_count(&run_id)
+                                as u32,
+                        },
                     };
                     let sticky_attrs = self.sticky_attrs.clone();
                     // Do not return new WFT if we would not cache, because returned new WFTs are
@@ -273,14 +392,14 @@ impl Workflows {
                         Ok(())
                     })
                     .await;
-                    true
+                    WFTReportStatus::Reported
                 }
                 ServerCommandsWithWorkflowInfo {
                     task_token,
                     action: ActivationAction::RespondLegacyQuery { result },
                 } => {
                     self.respond_legacy_query(task_token, *result).await;
-                    true
+                    WFTReportStatus::Reported
                 }
             },
             ActivationCompleteOutcome::ReportWFTFail(outcome) => match outcome {
@@ -292,29 +411,54 @@ impl Workflows {
                             .await
                     })
                     .await;
-                    true
+                    WFTReportStatus::Reported
                 }
                 FailedActivationWFTReport::ReportLegacyQueryFailure(task_token, failure) => {
                     warn!(run_id=%run_id, failure=?failure, "Failing legacy query request");
                     self.respond_legacy_query(task_token, legacy_query_failure(failure))
                         .await;
-                    true
+                    WFTReportStatus::Reported
                 }
             },
-            ActivationCompleteOutcome::DoNothing => false,
+            ActivationCompleteOutcome::WFTFailedDontReport => WFTReportStatus::DropWft,
+            ActivationCompleteOutcome::DoNothing => WFTReportStatus::NotReported,
+        };
+        let maybe_pwft = if let Some(wft) = wft_from_complete {
+            match HistoryPaginator::from_poll(wft, self.client.clone()).await {
+                Ok((paginator, pwft)) => Some((pwft, paginator)),
+                Err(e) => {
+                    self.request_eviction(
+                        &run_id,
+                        format!("Failed to paginate workflow task from completion: {e:?}"),
+                        EvictionReason::Fatal,
+                    );
+                    None
+                }
+            }
+        } else {
+            None
         };
+        if let Some(h) = post_activate_hook {
+            h(&run_id, completion_outcome.most_recently_processed_event);
+        }
         self.post_activation(PostActivationMsg {
             run_id,
-            reported_wft_to_server,
-            wft_from_complete,
+            wft_report_status,
+            wft_from_complete: maybe_pwft,
         });
         Ok(completion_outcome.most_recently_processed_event)
     }
     /// Tell workflow that a local activity has finished with the provided result
-    pub fn notify_of_local_result(&self, run_id: impl Into<String>, resolved: LocalResolution) {
+    pub(super) fn notify_of_local_result(
+        &self,
+        run_id: impl Into<String>,
+        resolved: LocalResolution,
+    ) {
         self.send_local(LocalResolutionMsg {
             run_id: run_id.into(),
             res: resolved,
@@ -322,7 +466,7 @@ impl Workflows {
     }
     /// Request eviction of a workflow
-    pub fn request_eviction(
+    pub(super) fn request_eviction(
         &self,
         run_id: impl Into<String>,
         message: impl Into<String>,
@@ -336,22 +480,39 @@ impl Workflows {
     }
     /// Query the state of workflow management. Can return `None` if workflow state is shut down.
-    pub fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
+    pub(super) fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
         let (tx, rx) = oneshot::channel();
         self.send_local(GetStateInfoMsg { response_tx: tx });
         async move { rx.await.ok() }
     }
-    pub async fn shutdown(&self) -> Result<(), JoinError> {
+    pub(super) fn available_wft_permits(&self) -> usize {
+        self.wft_semaphore.available_permits()
+    }
+    pub(super) async fn shutdown(&self) -> Result<(), anyhow::Error> {
         let maybe_jh = self.processing_task.lock().await.take();
         if let Some(jh) = maybe_jh {
-            // This acts as a final wake up in case the stream is still alive and wouldn't otherwise
-            // receive another message. It allows it to shut itself down.
-            let _ = self.get_state_info();
-            jh.await
-        } else {
-            Ok(())
+            // This serves to drive the stream if it is still alive and wouldn't otherwise receive
+            // another message. It allows it to shut itself down.
+            let (waker, stop_waker) = abortable(async {
+                let mut interval = tokio::time::interval(Duration::from_millis(10));
+                loop {
+                    interval.tick().await;
+                    let _ = self.get_state_info().await;
+                }
+            });
+            let (_, jh_res) = tokio::join!(
+                waker,
+                spawn_blocking(move || {
+                    let r = jh.join();
+                    stop_waker.abort();
+                    r
+                })
+            );
+            jh_res?.map_err(|e| anyhow!("Error joining workflow processing thread: {e:?}"))?;
         }
+        Ok(())
     }
     /// Must be called after every activation completion has finished
@@ -393,7 +554,11 @@ impl Workflows {
     /// successfully.
     fn send_local(&self, msg: impl Into<LocalInputs>) -> bool {
         let msg = msg.into();
-        let print_err = !matches!(msg, LocalInputs::GetStateInfo(_));
+        let print_err = match &msg {
+            LocalInputs::GetStateInfo(_) => false,
+            LocalInputs::LocalResolution(lr) if lr.res.is_la_cancel_confirmation() => false,
+            _ => true,
+        };
         if let Err(e) = self.local_tx.send(LocalInput {
             input: msg,
             span: Span::current(),
@@ -414,7 +579,7 @@ impl Workflows {
     /// Process eagerly returned activities from WFT completion
     fn handle_eager_activities(
         &self,
-        reserved_act_permits: Vec<OwnedMeteredSemPermit>,
+        reserved_act_permits: Vec<TrackedOwnedMeteredSemPermit>,
         eager_acts: Vec<PollActivityTaskQueueResponse>,
     ) {
         if let Some(at_handle) = self.activity_tasks_handle.as_ref() {
@@ -435,7 +600,7 @@ impl Workflows {
             let with_permits = reserved_act_permits
                 .into_iter()
                 .zip(eager_acts.into_iter())
-                .map(|(permit, resp)| PermittedTqResp { permit, resp });
+                .map(|(permit, resp)| TrackedPermittedTqResp { permit, resp });
             if with_permits.len() > 0 {
                 debug!(
                     "Adding {} activity tasks received from WFT complete",
@@ -458,7 +623,7 @@ impl Workflows {
     fn reserve_activity_slots_for_outgoing_commands(
         &self,
         commands: &mut [Command],
-    ) -> Vec<OwnedMeteredSemPermit> {
+    ) -> Vec<TrackedOwnedMeteredSemPermit> {
         let mut reserved = vec![];
         for cmd in commands {
             if let Some(Attributes::ScheduleActivityTaskCommandAttributes(attrs)) =
@@ -509,186 +674,30 @@ impl Workflows {
     }
 }
-/// Manages access to a specific workflow run, and contains various bookkeeping information that the
-/// [WFStream] may need to access quickly.
-#[derive(derive_more::DebugCustom)]
-#[debug(
-    fmt = "ManagedRunHandle {{ wft: {:?}, activation: {:?}, buffered_resp: {:?} \
-           have_seen_terminal_event: {}, most_recently_processed_event: {}, more_pending_work: {}, \
-           trying_to_evict: {}, last_action_acked: {} }}",
-    wft,
-    activation,
-    buffered_resp,
-    have_seen_terminal_event,
-    most_recently_processed_event_number,
-    more_pending_work,
-    "trying_to_evict.is_some()",
-    last_action_acked
+/// Returned when a cache miss happens and we need to fetch history from the beginning to
+/// replay a run
+#[derive(Debug, derive_more::Display)]
+#[display(
+    fmt = "CacheMissFetchReq(run_id: {})",
+    "original_wft.work.execution.run_id"
 )]
-struct ManagedRunHandle {
-    /// If set, the WFT this run is currently/will be processing.
-    wft: Option<OutstandingTask>,
-    /// An outstanding activation to lang
-    activation: Option<OutstandingActivation>,
-    /// If set, it indicates there is a buffered poll response from the server that applies to this
-    /// run. This can happen when lang takes too long to complete a task and the task times out, for
-    /// example. Upon next completion, the buffered response will be removed and can be made ready
-    /// to be returned from polling
-    buffered_resp: Option<PermittedWFT>,
-    /// True if this machine has seen an event which ends the execution
-    have_seen_terminal_event: bool,
-    /// The most recently processed event id this machine has seen. 0 means it has seen nothing.
-    most_recently_processed_event_number: usize,
-    /// Is set true when the machines indicate that there is additional known work to be processed
-    more_pending_work: bool,
-    /// Is set if an eviction has been requested for this run
-    trying_to_evict: Option<RequestEvictMsg>,
-    /// Set to true if the last action we tried to take to this run has been processed (ie: the
-    /// [RunUpdateResponse] for it has been seen.
-    last_action_acked: bool,
-    /// For sending work to the machines
-    run_actions_tx: UnboundedSender<RunAction>,
-    /// Handle to the task where the actual machines live
-    handle: JoinHandle<()>,
-    /// We track if we have recorded useful debugging values onto a certain span yet, to overcome
-    /// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
-    /// is fixed.
-    recorded_span_ids: HashSet<tracing::Id>,
-    metrics: MetricsContext,
+#[must_use]
+struct CacheMissFetchReq {
+    original_wft: PermittedWFT,
+}
+/// Bubbled up from inside workflow state if we're trying to apply the next workflow task but it
+/// isn't in memory
+#[derive(Debug)]
+#[must_use]
+struct NextPageReq {
+    paginator: HistoryPaginator,
+    span: Span,
 }
-impl ManagedRunHandle {
-    fn new(
-        wfm: WorkflowManager,
-        activations_tx: UnboundedSender<RunUpdateResponse>,
-        local_activity_request_sink: LocalActivityRequestSink,
-        metrics: MetricsContext,
-    ) -> Self {
-        let (run_actions_tx, run_actions_rx) = unbounded_channel();
-        let managed = ManagedRun::new(wfm, activations_tx, local_activity_request_sink);
-        let handle = tokio::task::spawn(managed.run(run_actions_rx));
-        Self {
-            wft: None,
-            activation: None,
-            buffered_resp: None,
-            have_seen_terminal_event: false,
-            most_recently_processed_event_number: 0,
-            more_pending_work: false,
-            trying_to_evict: None,
-            last_action_acked: true,
-            run_actions_tx,
-            handle,
-            recorded_span_ids: Default::default(),
-            metrics,
-        }
-    }
-    fn incoming_wft(&mut self, wft: NewIncomingWFT) {
-        if self.wft.is_some() {
-            error!("Trying to send a new WFT for a run which already has one!");
-        }
-        self.send_run_action(RunActions::NewIncomingWFT(wft));
-    }
-    fn check_more_activations(&mut self) {
-        // No point in checking for more activations if we have not acked the last update, or
-        // if there's already an outstanding activation.
-        if self.last_action_acked && self.activation.is_none() {
-            self.send_run_action(RunActions::CheckMoreWork {
-                want_to_evict: self.trying_to_evict.clone(),
-                has_pending_queries: self
-                    .wft
-                    .as_ref()
-                    .map(|wft| !wft.pending_queries.is_empty())
-                    .unwrap_or_default(),
-                has_wft: self.wft.is_some(),
-            });
-        }
-    }
-    fn send_completion(&mut self, c: RunActivationCompletion) {
-        self.send_run_action(RunActions::ActivationCompletion(c));
-    }
-    fn send_local_resolution(&mut self, r: LocalResolution) {
-        self.send_run_action(RunActions::LocalResolution(r));
-    }
-    fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
-        let act_type = match &act {
-            ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
-                if act.is_legacy_query() {
-                    OutstandingActivation::LegacyQuery
-                } else {
-                    OutstandingActivation::Normal {
-                        contains_eviction: act.eviction_index().is_some(),
-                        num_jobs: act.jobs.len(),
-                    }
-                }
-            }
-            ActivationOrAuto::Autocomplete { .. } => OutstandingActivation::Autocomplete,
-        };
-        if let Some(old_act) = self.activation {
-            // This is a panic because we have screwed up core logic if this is violated. It must be
-            // upheld.
-            panic!(
-                "Attempted to insert a new outstanding activation {:?}, but there already was \
-                 one outstanding: {:?}",
-                act, old_act
-            );
-        }
-        self.activation = Some(act_type);
-    }
-    fn send_run_action(&mut self, action: RunActions) {
-        self.last_action_acked = false;
-        self.run_actions_tx
-            .send(RunAction {
-                action,
-                trace_span: Span::current(),
-            })
-            .expect("Receive half of run actions not dropped");
-    }
-    /// Returns true if the managed run has any form of pending work
-    /// If `ignore_evicts` is true, pending evictions do not count as pending work.
-    /// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
-    fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
-        let evict_work = if ignore_evicts {
-            false
-        } else {
-            self.trying_to_evict.is_some()
-        };
-        let act_work = if ignore_evicts {
-            if let Some(ref act) = self.activation {
-                !act.has_only_eviction()
-            } else {
-                false
-            }
-        } else {
-            self.activation.is_some()
-        };
-        let buffered = if ignore_buffered {
-            false
-        } else {
-            self.buffered_resp.is_some()
-        };
-        self.wft.is_some()
-            || buffered
-            || !self.last_action_acked
-            || self.more_pending_work
-            || act_work
-            || evict_work
-    }
-    /// Returns true if the handle is currently processing a WFT which contains a legacy query.
-    fn pending_work_is_legacy_query(&self) -> bool {
-        // Either we know because there is a pending legacy query, or it's already been drained and
-        // sent as an activation.
-        matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
-            || self
-                .wft
-                .as_ref()
-                .map(|t| t.has_pending_legacy_query())
-                .unwrap_or_default()
-    }
+#[derive(Debug)]
+struct WFStreamOutput {
+    activations: VecDeque<ActivationOrAuto>,
+    fetch_histories: VecDeque<HistoryFetchReq>,
 }
 #[derive(Debug, derive_more::Display)]
@@ -697,9 +706,15 @@ enum ActivationOrAuto {
     /// This type should only be filled with an empty activation which is ready to have queries
     /// inserted into the joblist
     ReadyForQueries(WorkflowActivation),
+    #[display(fmt = "Autocomplete(run_id={run_id})")]
     Autocomplete {
         run_id: String,
     },
+    #[display(fmt = "AutoFail(run_id={run_id})")]
+    AutoFail {
+        run_id: String,
+        machines_err: WFMachinesError,
+    },
 }
 impl ActivationOrAuto {
     pub fn run_id(&self) -> &str {
@@ -707,15 +722,53 @@ impl ActivationOrAuto {
             ActivationOrAuto::LangActivation(act) => &act.run_id,
             ActivationOrAuto::Autocomplete { run_id, .. } => run_id,
             ActivationOrAuto::ReadyForQueries(act) => &act.run_id,
+            ActivationOrAuto::AutoFail { run_id, .. } => run_id,
         }
     }
 }
+/// A processed WFT which has been validated and had a history update extracted from it
 #[derive(derive_more::DebugCustom)]
-#[debug(fmt = "PermittedWft {{ {:?} }}", wft)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
+#[debug(fmt = "PermittedWft({work:?})")]
 pub(crate) struct PermittedWFT {
-    wft: ValidPollWFTQResponse,
-    permit: OwnedMeteredSemPermit,
+    work: PreparedWFT,
+    #[cfg_attr(
+        feature = "save_wf_inputs",
+        serde(skip, default = "UsedMeteredSemPermit::fake_deserialized")
+    )]
+    permit: UsedMeteredSemPermit,
+    #[cfg_attr(
+        feature = "save_wf_inputs",
+        serde(skip, default = "HistoryPaginator::fake_deserialized")
+    )]
+    paginator: HistoryPaginator,
+}
+#[derive(Debug)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
+struct PreparedWFT {
+    task_token: TaskToken,
+    attempt: u32,
+    execution: WorkflowExecution,
+    workflow_type: String,
+    legacy_query: Option<WorkflowQuery>,
+    query_requests: Vec<QueryWorkflow>,
+    update: HistoryUpdate,
+}
+impl PreparedWFT {
+    /// Returns true if the contained history update is incremental (IE: expects to hit a cached
+    /// workflow)
+    pub fn is_incremental(&self) -> bool {
+        let start_event_id = self.update.first_event_id();
+        let poll_resp_is_incremental = start_event_id.map(|eid| eid > 1).unwrap_or_default();
+        poll_resp_is_incremental || start_event_id.is_none()
+    }
 }
 #[derive(Debug)]
@@ -727,7 +780,7 @@ pub(crate) struct OutstandingTask {
     pub start_time: Instant,
     /// The WFT permit owned by this task, ensures we don't exceed max concurrent WFT, and makes
     /// sure the permit is automatically freed when we delete the task.
-    pub permit: OwnedMeteredSemPermit,
+    pub permit: UsedMeteredSemPermit,
 }
 impl OutstandingTask {
@@ -806,49 +859,80 @@ pub(crate) enum ActivationAction {
         commands: Vec<ProtoCommand>,
         query_responses: Vec<QueryResult>,
         force_new_wft: bool,
+        sdk_metadata: WorkflowTaskCompletedMetadata,
     },
     /// We should respond to a legacy query request
     RespondLegacyQuery { result: Box<QueryResult> },
 }
-#[derive(Debug, Eq, PartialEq, Hash)]
-pub(crate) enum EvictionRequestResult {
-    EvictionRequested(Option<u32>),
+#[derive(Debug)]
+enum EvictionRequestResult {
+    EvictionRequested(Option<u32>, RunUpdateAct),
     NotFound,
     EvictionAlreadyRequested(Option<u32>),
 }
+impl EvictionRequestResult {
+    fn into_run_update_resp(self) -> RunUpdateAct {
+        match self {
+            EvictionRequestResult::EvictionRequested(_, resp) => resp,
+            EvictionRequestResult::NotFound
+            | EvictionRequestResult::EvictionAlreadyRequested(_) => None,
+        }
+    }
+}
 #[derive(Debug)]
 #[allow(dead_code)] // Not always used in non-test
 pub(crate) struct WorkflowStateInfo {
     pub cached_workflows: usize,
     pub outstanding_wft: usize,
-    pub available_wft_permits: usize,
 }
 #[derive(Debug)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
 struct WFActCompleteMsg {
     completion: ValidatedCompletion,
-    response_tx: oneshot::Sender<ActivationCompleteResult>,
+    #[cfg_attr(feature = "save_wf_inputs", serde(skip))]
+    response_tx: Option<oneshot::Sender<ActivationCompleteResult>>,
 }
 #[derive(Debug)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
 struct LocalResolutionMsg {
     run_id: String,
     res: LocalResolution,
 }
 #[derive(Debug)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
 struct PostActivationMsg {
     run_id: String,
-    reported_wft_to_server: bool,
-    wft_from_complete: Option<ValidPollWFTQResponse>,
+    wft_report_status: WFTReportStatus,
+    wft_from_complete: Option<(PreparedWFT, HistoryPaginator)>,
 }
 #[derive(Debug, Clone)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
 struct RequestEvictMsg {
     run_id: String,
     message: String,
     reason: EvictionReason,
 }
 #[derive(Debug)]
+pub(crate) struct HeartbeatTimeoutMsg {
+    pub(crate) run_id: String,
+    pub(crate) span: Span,
+}
+#[derive(Debug)]
 struct GetStateInfoMsg {
     response_tx: oneshot::Sender<WorkflowStateInfo>,
 }
@@ -869,16 +953,24 @@ enum ActivationCompleteOutcome {
     ReportWFTFail(FailedActivationWFTReport),
     /// There's nothing to do right now. EX: The workflow needs to keep replaying.
     DoNothing,
+    /// The workflow task failed, but we shouldn't report it. EX: We have failed 2 or more attempts
+    /// in a row.
+    WFTFailedDontReport,
 }
-#[derive(Debug)]
-struct FulfillableActivationComplete {
-    result: ActivationCompleteResult,
-    resp_chan: oneshot::Sender<ActivationCompleteResult>,
-}
-impl FulfillableActivationComplete {
-    fn fulfill(self) {
-        let _ = self.resp_chan.send(self.result);
-    }
+/// Did we report, or not, completion of a WFT to server?
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
+enum WFTReportStatus {
+    Reported,
+    /// The WFT completion was not reported when finishing the activation, because there's still
+    /// work to be done. EX: Running LAs.
+    NotReported,
+    /// We didn't report, but we want to clear the outstanding workflow task anyway. See
+    /// [ActivationCompleteOutcome::WFTFailedDontReport]
+    DropWft,
 }
 fn validate_completion(
@@ -908,8 +1000,7 @@ fn validate_completion(
                     reason: format!(
                         "Workflow completion had a legacy query response along with other \
                          commands. This is not allowed and constitutes an error in the \
-                         lang SDK. Commands: {:?}",
-                        commands
+                         lang SDK. Commands: {commands:?}"
                     ),
                     run_id: completion.run_id,
                 });
@@ -918,6 +1009,7 @@ fn validate_completion(
             Ok(ValidatedCompletion::Success {
                 run_id: completion.run_id,
                 commands,
+                used_flags: success.used_internal_flags,
             })
         }
         Some(workflow_activation_completion::Status::Failed(failure)) => {
@@ -934,11 +1026,16 @@ fn validate_completion(
 }
 #[derive(Debug)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
 #[allow(clippy::large_enum_variant)]
 enum ValidatedCompletion {
     Success {
         run_id: String,
         commands: Vec<WFCommand>,
+        used_flags: Vec<u32>,
     },
     Fail {
         run_id: String,
@@ -955,112 +1052,6 @@ impl ValidatedCompletion {
     }
 }
-/// Input to run tasks, sent to [ManagedRun]s via [ManagedRunHandle]s
-#[derive(Debug)]
-struct RunAction {
-    action: RunActions,
-    trace_span: Span,
-}
-#[derive(Debug)]
-#[allow(clippy::large_enum_variant)]
-enum RunActions {
-    NewIncomingWFT(NewIncomingWFT),
-    ActivationCompletion(RunActivationCompletion),
-    CheckMoreWork {
-        want_to_evict: Option<RequestEvictMsg>,
-        has_pending_queries: bool,
-        has_wft: bool,
-    },
-    LocalResolution(LocalResolution),
-    HeartbeatTimeout,
-}
-#[derive(Debug)]
-struct NewIncomingWFT {
-    /// This field is only populated if the machines already exist. Otherwise the machines
-    /// are instantiated with the workflow history.
-    history_update: Option<HistoryUpdate>,
-    /// Wft start time
-    start_time: Instant,
-}
-#[derive(Debug)]
-struct RunActivationCompletion {
-    task_token: TaskToken,
-    start_time: Instant,
-    commands: Vec<WFCommand>,
-    activation_was_eviction: bool,
-    activation_was_only_eviction: bool,
-    has_pending_query: bool,
-    query_responses: Vec<QueryResult>,
-    /// Used to notify the worker when the completion is done processing and the completion can
-    /// unblock. Must always be `Some` when initialized.
-    resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
-}
-/// A response from a [ManagedRun] held by a [ManagedRunHandle]
-#[derive(Debug)]
-struct RunUpdateResponse {
-    kind: RunUpdateResponseKind,
-    span: Span,
-}
-#[derive(Debug, derive_more::Display)]
-#[allow(clippy::large_enum_variant)]
-enum RunUpdateResponseKind {
-    Good(GoodRunUpdate),
-    Fail(FailRunUpdate),
-}
-impl RunUpdateResponseKind {
-    pub(crate) fn run_id(&self) -> &str {
-        match self {
-            RunUpdateResponseKind::Good(g) => &g.run_id,
-            RunUpdateResponseKind::Fail(f) => &f.run_id,
-        }
-    }
-}
-#[derive(Debug)]
-struct GoodRunUpdate {
-    run_id: String,
-    outgoing_activation: Option<ActivationOrAuto>,
-    fulfillable_complete: Option<FulfillableActivationComplete>,
-    have_seen_terminal_event: bool,
-    /// Is true if there are more jobs that need to be sent to lang
-    more_pending_work: bool,
-    most_recently_processed_event_number: usize,
-    /// Is true if this update was in response to a new WFT
-    in_response_to_wft: bool,
-}
-impl Display for GoodRunUpdate {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "GoodRunUpdate(run_id: {}, outgoing_activation: {}, more_pending_work: {})",
-            self.run_id,
-            if let Some(og) = self.outgoing_activation.as_ref() {
-                format!("{}", og)
-            } else {
-                "None".to_string()
-            },
-            self.more_pending_work
-        )
-    }
-}
-#[derive(Debug)]
-pub(crate) struct FailRunUpdate {
-    run_id: String,
-    err: WFMachinesError,
-    /// This is populated if the run update failed while processing a completion - and thus we
-    /// must respond down it when handling the failure.
-    completion_resp: Option<oneshot::Sender<ActivationCompleteResult>>,
-}
-impl Display for FailRunUpdate {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "FailRunUpdate(run_id: {}, error: {:?})",
-            self.run_id, self.err
-        )
-    }
-}
 #[derive(Debug)]
 pub struct OutgoingServerCommands {
     pub commands: Vec<ProtoCommand>,
@@ -1068,9 +1059,22 @@ pub struct OutgoingServerCommands {
 }
 #[derive(Debug)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
 pub(crate) enum LocalResolution {
     LocalActivity(LocalActivityResolution),
 }
+impl LocalResolution {
+    pub fn is_la_cancel_confirmation(&self) -> bool {
+        match self {
+            LocalResolution::LocalActivity(lar) => {
+                matches!(lar.result, LocalActivityExecutionResult::Cancelled(_))
+            }
+        }
+    }
+}
 #[derive(thiserror::Error, Debug, derive_more::From)]
 #[error("Lang provided workflow command with empty variant")]
@@ -1079,6 +1083,10 @@ pub struct EmptyWorkflowCommandErr;
 /// [DrivenWorkflow]s respond with these when called, to indicate what they want to do next.
 /// EX: Create a new timer, complete the workflow, etc.
 #[derive(Debug, derive_more::From, derive_more::Display)]
+#[cfg_attr(
+    feature = "save_wf_inputs",
+    derive(serde::Serialize, serde::Deserialize)
+)]
 #[allow(clippy::large_enum_variant)]
 pub enum WFCommand {
     /// Returned when we need to wait for the lang sdk to send us something
@@ -1171,12 +1179,9 @@ pub struct WorkflowStartedInfo {
     retry_policy: Option<RetryPolicy>,
 }
-type LocalActivityRequestSink =
-    Arc<dyn Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution> + Send + Sync>;
 /// Wraps outgoing activation job protos with some internal details core might care about
 #[derive(Debug, derive_more::Display)]
-#[display(fmt = "{}", variant)]
+#[display(fmt = "{variant}")]
 struct OutgoingJob {
     variant: workflow_activation_job::Variant,
     /// Since LA resolutions are not distinguished from non-LA resolutions as far as lang is
@@ -1198,3 +1203,74 @@ impl From<OutgoingJob> for WorkflowActivationJob {
         }
     }
 }
+/// Errors thrown inside of workflow machines
+#[derive(thiserror::Error, Debug)]
+pub(crate) enum WFMachinesError {
+    #[error("Nondeterminism error: {0}")]
+    Nondeterminism(String),
+    #[error("Fatal error in workflow machines: {0}")]
+    Fatal(String),
+}
+impl WFMachinesError {
+    pub fn evict_reason(&self) -> EvictionReason {
+        match self {
+            WFMachinesError::Nondeterminism(_) => EvictionReason::Nondeterminism,
+            WFMachinesError::Fatal(_) => EvictionReason::Fatal,
+        }
+    }
+}
+impl From<TimestampError> for WFMachinesError {
+    fn from(_: TimestampError) -> Self {
+        Self::Fatal("Could not decode timestamp".to_string())
+    }
+}
+fn auto_fail_to_complete_status(err: WFMachinesError) -> workflow_activation_completion::Status {
+    workflow_activation_completion::Status::Failed(Failure {
+        failure: Some(
+            temporal_sdk_core_protos::temporal::api::failure::v1::Failure {
+                message: "Error while processing workflow task".to_string(),
+                source: err.to_string(),
+                stack_trace: "".to_string(),
+                encoded_attributes: None,
+                cause: None,
+                failure_info: None,
+            },
+        ),
+        force_cause: WorkflowTaskFailedCause::from(err.evict_reason()) as i32,
+    })
+}
+pub(crate) trait LocalActivityRequestSink: Send + Sync + 'static {
+    fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution>;
+}
+#[derive(derive_more::Constructor)]
+pub(super) struct LAReqSink {
+    lam: Arc<LocalActivityManager>,
+    /// If we're recording WF inputs, we also need to store immediate resolutions so they're
+    /// available on replay.
+    #[allow(dead_code)] // sometimes appears unused due to feature flagging
+    recorder: Option<UnboundedSender<Vec<u8>>>,
+}
+impl LocalActivityRequestSink for LAReqSink {
+    fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution> {
+        if reqs.is_empty() {
+            return vec![];
+        }
+        #[allow(clippy::let_and_return)] // When feature is off clippy doesn't like this
+        let res = self.lam.enqueue(reqs);
+        // We always save when there are any reqs, even if the response might be empty, so that
+        // calls/responses are 1:1
+        #[cfg(feature = "save_wf_inputs")]
+        self.write_req(&res);
+        res
+    }
+}