npm - @temporalio/core-bridge - Versions diffs - 0.16.3 → 0.17.2 - Mend

@temporalio/core-bridge 0.16.3 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/index.node +0 -0
package/package.json +3 -3
package/releases/aarch64-apple-darwin/index.node +0 -0
package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
package/releases/x86_64-apple-darwin/index.node +0 -0
package/releases/x86_64-pc-windows-msvc/index.node +0 -0
package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
package/sdk-core/sdk-core-protos/src/lib.rs +7 -0
package/sdk-core/src/core_tests/queries.rs +9 -2
package/sdk-core/src/core_tests/workflow_tasks.rs +85 -6
package/sdk-core/src/errors.rs +2 -4
package/sdk-core/src/machines/child_workflow_state_machine.rs +1 -10
package/sdk-core/src/machines/workflow_machines.rs +17 -15
package/sdk-core/src/test_help/history_builder.rs +10 -0
package/sdk-core/src/test_help/history_info.rs +21 -1
package/sdk-core/src/test_help/mod.rs +6 -10
package/sdk-core/src/worker/mod.rs +63 -46
package/sdk-core/src/workflow/workflow_tasks/mod.rs +33 -35

package/index.node CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@temporalio/core-bridge",
-  "version": "0.16.3",
+  "version": "0.17.2",
   "description": "Temporal.io SDK Core<>Node bridge",
   "main": "index.node",
   "types": "index.d.ts",
@@ -19,7 +19,7 @@
   "license": "MIT",
   "dependencies": {
     "@opentelemetry/api": "^1.0.3",
-    "@temporalio/common": "^0.16.0",
+    "@temporalio/common": "^0.17.2",
     "arg": "^5.0.1",
     "cargo-cp-artifact": "^0.1.4",
     "which": "^2.0.2"
@@ -40,5 +40,5 @@
   "publishConfig": {
     "access": "public"
   },
-  "gitHead": "af54140a4da7e191c06627c5e06b92a3fd929bcc"
+  "gitHead": "2232465a4f9b0cade28f0c21c2d7856053678728"
 }

package/releases/aarch64-apple-darwin/index.node CHANGED Viewed

Binary file

package/releases/aarch64-unknown-linux-gnu/index.node CHANGED Viewed

Binary file

package/releases/x86_64-apple-darwin/index.node CHANGED Viewed

Binary file

package/releases/x86_64-pc-windows-msvc/index.node CHANGED Viewed

Binary file

package/releases/x86_64-unknown-linux-gnu/index.node CHANGED Viewed

Binary file

package/sdk-core/sdk-core-protos/src/lib.rs CHANGED Viewed

@@ -264,6 +264,7 @@ pub mod coresdk {
     }
     pub mod workflow_completion {
+        use crate::temporal::api::failure;
         tonic::include_proto!("coresdk.workflow_completion");
         impl wf_activation_completion::Status {
@@ -274,6 +275,12 @@ pub mod coresdk {
                 }
             }
         }
+        impl From<failure::v1::Failure> for Failure {
+            fn from(f: failure::v1::Failure) -> Self {
+                Failure { failure: Some(f) }
+            }
+        }
     }
     pub mod child_workflow {

package/sdk-core/src/core_tests/queries.rs CHANGED Viewed

@@ -290,10 +290,17 @@ async fn legacy_query_failure_on_wft_failure() {
     core.shutdown().await;
 }
+#[rstest::rstest]
 #[tokio::test]
-async fn legacy_query_with_full_history_after_complete() {
+async fn legacy_query_after_complete(#[values(false, true)] full_history: bool) {
     let wfid = "fake_wf_id";
-    let t = canned_histories::single_timer_wf_completes("1");
+    let t = if full_history {
+        canned_histories::single_timer_wf_completes("1")
+    } else {
+        let mut t = canned_histories::single_timer("1");
+        t.add_workflow_task_completed();
+        t
+    };
     let query_with_hist_task = {
         let mut pr = hist_to_poll_resp(
             &t,

package/sdk-core/src/core_tests/workflow_tasks.rs CHANGED Viewed

@@ -1619,19 +1619,19 @@ async fn failing_wft_doesnt_eat_permit_forever() {
     t.add_by_type(EventType::WorkflowExecutionStarted);
     t.add_workflow_task_scheduled_and_started();
-    let failures = 5;
-    // One extra response for when we stop failing
-    let resps = (1..=(failures + 1)).map(|_| 1);
     let mock = MockServerGatewayApis::new();
-    let mut mock = single_hist_mock_sg("fake_wf_id", t, resps, mock, true);
+    let mut mock = single_hist_mock_sg("fake_wf_id", t, [1, 1, 1], mock, true);
     mock.worker_cfg(TEST_Q, |cfg| {
         cfg.max_cached_workflows = 2;
         cfg.max_outstanding_workflow_tasks = 2;
     });
+    let outstanding_mock_tasks = mock.outstanding_task_map.clone();
     let core = mock_core(mock);
-    // Spin failing the WFT to verify that we don't get stuck
-    for _ in 1..=failures {
+    let mut run_id = "".to_string();
+    // Fail twice, verifying a permit is eaten. We cannot fail the same run more than twice in a row
+    // because we purposefully time out rather than spamming.
+    for _ in 1..=2 {
         let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
         // Issue a nonsense completion that will trigger a WFT failure
         core.complete_workflow_activation(WfActivationCompletion::from_cmd(
@@ -1648,12 +1648,91 @@ async fn failing_wft_doesnt_eat_permit_forever() {
                 variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
             },]
         );
+        run_id = activation.run_id.clone();
         core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
             .await
             .unwrap();
         assert_eq!(core.outstanding_wfts(TEST_Q), 0);
         assert_eq!(core.available_wft_permits(TEST_Q), 2);
     }
+    // We should be "out of work" because the mock service thinks we didn't complete the last task,
+    // which we didn't, because we don't spam failures. The real server would eventually time out
+    // the task. Mock doesn't understand that, so the WFT permit is released because eventually a
+    // new one will be generated. We manually clear the mock's outstanding task list so the next
+    // poll will work.
+    outstanding_mock_tasks
+        .unwrap()
+        .write()
+        .remove_by_left(&run_id);
+    let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
+    core.complete_workflow_activation(WfActivationCompletion::from_cmd(
+        TEST_Q,
+        activation.run_id,
+        CompleteWorkflowExecution { result: None }.into(),
+    ))
+    .await
+    .unwrap();
+    core.shutdown().await;
+}
+#[tokio::test]
+async fn cache_miss_doesnt_eat_permit_forever() {
+    let mut t = TestHistoryBuilder::default();
+    t.add_by_type(EventType::WorkflowExecutionStarted);
+    t.add_full_wf_task();
+    t.add_we_signaled("sig", vec![]);
+    t.add_full_wf_task();
+    t.add_workflow_execution_completed();
+    let mut mh = MockPollCfg::from_resp_batches(
+        "fake_wf_id",
+        t,
+        [
+            ResponseType::ToTaskNum(1),
+            ResponseType::OneTask(2),
+            ResponseType::ToTaskNum(1),
+            ResponseType::OneTask(2),
+            ResponseType::ToTaskNum(1),
+            ResponseType::OneTask(2),
+            // Last one to complete successfully
+            ResponseType::ToTaskNum(1),
+        ],
+        MockServerGatewayApis::new(),
+    );
+    mh.num_expected_fails = Some(3);
+    mh.expect_fail_wft_matcher =
+        Box::new(|_, cause, _| matches!(cause, WorkflowTaskFailedCause::ResetStickyTaskQueue));
+    let mut mock = build_mock_pollers(mh);
+    mock.worker_cfg(TEST_Q, |cfg| {
+        cfg.max_outstanding_workflow_tasks = 2;
+    });
+    let core = mock_core(mock);
+    // Spin missing the cache to verify that we don't get stuck
+    for _ in 1..=3 {
+        // Start
+        let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
+        core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
+            .await
+            .unwrap();
+        // Evict
+        let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
+        assert_matches!(
+            activation.jobs.as_slice(),
+            [WfActivationJob {
+                variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
+            },]
+        );
+        core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
+            .await
+            .unwrap();
+        assert_eq!(core.outstanding_wfts(TEST_Q), 0);
+        assert_eq!(core.available_wft_permits(TEST_Q), 2);
+        // When we loop back up, the poll will trigger a cache miss, which we should immediately
+        // reply to WFT with failure, and then poll again, which will deliver the from-the-start
+        // history
+    }
     let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
     core.complete_workflow_activation(WfActivationCompletion::from_cmd(
         TEST_Q,

package/sdk-core/src/errors.rs CHANGED Viewed

@@ -1,6 +1,6 @@
 //! Error types exposed by public APIs
-use crate::{machines::WFMachinesError, task_token::TaskToken, WorkerLookupErr};
+use crate::{machines::WFMachinesError, WorkerLookupErr};
 use temporal_sdk_core_protos::coresdk::{
     activity_result::ActivityResult, workflow_completion::WfActivationCompletion,
 };
@@ -11,9 +11,8 @@ pub(crate) struct WorkflowUpdateError {
     /// Underlying workflow error
     pub source: WFMachinesError,
     /// The run id of the erring workflow
+    #[allow(dead_code)] // Useful in debug output
     pub run_id: String,
-    /// The task token associated with this update, if one existed yet.
-    pub task_token: Option<TaskToken>,
 }
 impl From<WorkflowMissingError> for WorkflowUpdateError {
@@ -21,7 +20,6 @@ impl From<WorkflowMissingError> for WorkflowUpdateError {
         Self {
             source: WFMachinesError::Fatal("Workflow machines missing".to_string()),
             run_id: wme.run_id,
-            task_token: None,
         }
     }
 }

package/sdk-core/src/machines/child_workflow_state_machine.rs CHANGED Viewed

@@ -7,8 +7,7 @@ use std::convert::{TryFrom, TryInto};
 use temporal_sdk_core_protos::{
     coresdk::{
         child_workflow::{
-            self as wfr, child_workflow_result::Status as ChildWorkflowStatus,
-            ChildWorkflowCancellationType, ChildWorkflowResult,
+            self as wfr, child_workflow_result::Status as ChildWorkflowStatus, ChildWorkflowResult,
         },
         common::Payload,
         workflow_activation::{
@@ -115,7 +114,6 @@ impl StartCommandCreated {
             StartEventRecorded::default(),
             SharedState {
                 initiated_event_id,
-                attrs: None, // Drop the attributes to avoid holding large payloads in memory
                 ..state
             },
         )
@@ -303,9 +301,7 @@ pub(super) struct SharedState {
     workflow_id: String,
     run_id: String,
     workflow_type: String,
-    cancellation_type: ChildWorkflowCancellationType,
     cancelled_before_sent: bool,
-    attrs: Option<StartChildWorkflowExecution>,
 }
 /// Creates a new child workflow state machine and a command to start it on the server.
@@ -329,11 +325,6 @@ impl ChildWorkflowMachine {
                 workflow_id: attribs.workflow_id.clone(),
                 workflow_type: attribs.workflow_type.clone(),
                 namespace: attribs.namespace.clone(),
-                cancellation_type: ChildWorkflowCancellationType::from_i32(
-                    attribs.cancellation_type,
-                )
-                .unwrap(),
-                attrs: Some(attribs.clone()),
                 ..Default::default()
             },
         };

package/sdk-core/src/machines/workflow_machines.rs CHANGED Viewed

@@ -61,6 +61,10 @@ pub(crate) struct WorkflowMachines {
     /// Eventually, this number should reach the started id in the latest history update, but
     /// we must incrementally apply the history while communicating with lang.
     next_started_event_id: i64,
+    /// The event id of the most recent event processed. It's possible in some situations (ex legacy
+    /// queries) to receive a history with no new workflow tasks. If the last history we processed
+    /// also had no new tasks, we need a way to know not to apply the same events over again.
+    last_processed_event: i64,
     /// True if the workflow is replaying from history
     pub replaying: bool,
     /// Namespace this workflow exists in
@@ -120,7 +124,6 @@ struct CommandAndMachine {
 #[derive(Debug, Clone, Copy)]
 struct ChangeInfo {
-    deprecated: bool,
     created_command: bool,
 }
@@ -196,6 +199,7 @@ impl WorkflowMachines {
             // In an ideal world one could say ..Default::default() here and it'd still work.
             current_started_event_id: 0,
             next_started_event_id: 0,
+            last_processed_event: 0,
             workflow_start_time: None,
             workflow_end_time: None,
             current_wf_time: None,
@@ -529,11 +533,16 @@ impl WorkflowMachines {
         }
         let last_handled_wft_started_id = self.current_started_event_id;
-        let events = self
-            .last_history_from_server
-            .take_next_wft_sequence(last_handled_wft_started_id)
-            .await
-            .map_err(WFMachinesError::HistoryFetchingError)?;
+        let events = {
+            let mut evts = self
+                .last_history_from_server
+                .take_next_wft_sequence(last_handled_wft_started_id)
+                .await
+                .map_err(WFMachinesError::HistoryFetchingError)?;
+            // Do not re-process events we have already processed
+            evts.retain(|e| e.event_id > self.last_processed_event);
+            evts
+        };
         // We're caught up on reply if there are no new events to process
         // TODO: Probably this is unneeded if we evict whenever history is from non-sticky queue
@@ -564,23 +573,17 @@ impl WorkflowMachines {
         while let Some(event) = history.next() {
             let next_event = history.peek();
-            if event.event_type == EventType::WorkflowTaskStarted as i32 && next_event.is_none() {
-                self.handle_event(event, false)?;
-                break;
-            }
             self.handle_event(event, next_event.is_some())?;
+            self.last_processed_event = event.event_id;
         }
         // Scan through to the next WFT, searching for any patch markers, so that we can
         // pre-resolve them.
         for e in self.last_history_from_server.peek_next_wft_sequence() {
-            if let Some((patch_id, deprecated)) = e.get_changed_marker_details() {
+            if let Some((patch_id, _)) = e.get_changed_marker_details() {
                 self.encountered_change_markers.insert(
                     patch_id.clone(),
                     ChangeInfo {
-                        deprecated,
                         created_command: false,
                     },
                 );
@@ -743,7 +746,6 @@ impl WorkflowMachines {
                             self.encountered_change_markers.insert(
                                 attrs.patch_id,
                                 ChangeInfo {
-                                    deprecated: attrs.deprecated,
                                     created_command: true,
                                 },
                             );

package/sdk-core/src/test_help/history_builder.rs CHANGED Viewed

@@ -342,6 +342,16 @@ impl TestHistoryBuilder {
         HistoryInfo::new_from_history(&self.events.clone().into(), None)
     }
+    pub(crate) fn get_one_wft(
+        &self,
+        from_wft_number: usize,
+    ) -> Result<HistoryInfo, HistoryInfoError> {
+        let mut histinfo =
+            HistoryInfo::new_from_history(&self.events.clone().into(), Some(from_wft_number))?;
+        histinfo.make_incremental();
+        Ok(histinfo)
+    }
     fn build_and_push_event(&mut self, event_type: EventType, attribs: Attributes) {
         self.current_event_id += 1;
         let evt = HistoryEvent {

package/sdk-core/src/test_help/history_info.rs CHANGED Viewed

@@ -39,6 +39,7 @@ impl HistoryInfo {
             return Err(HistoryInfoError::HistoryEndsUnexpectedly);
         }
+        let is_all_hist = to_wf_task_num.is_none();
         let to_wf_task_num = to_wf_task_num.unwrap_or(usize::MAX);
         let mut workflow_task_started_event_id = 0;
         let mut previous_started_event_id = 0;
@@ -83,7 +84,7 @@ impl HistoryInfo {
             }
             if next_event.is_none() {
-                if event.is_final_wf_execution_event() {
+                if event.is_final_wf_execution_event() || is_all_hist {
                     return Ok(Self {
                         previous_started_event_id,
                         workflow_task_started_event_id,
@@ -100,6 +101,18 @@ impl HistoryInfo {
         unreachable!()
     }
+    /// Remove events from the beginning of this history such that it looks like what would've been
+    /// delivered on a sticky queue where the previously started task was the one before the last
+    /// task in this history.
+    pub(crate) fn make_incremental(&mut self) {
+        let last_complete_ix = self
+            .events
+            .iter()
+            .rposition(|he| he.event_type() == EventType::WorkflowTaskCompleted)
+            .expect("Must be a WFT completed event in history");
+        self.events.drain(0..=last_complete_ix);
+    }
     pub(crate) fn events(&self) -> &[HistoryEvent] {
         &self.events
     }
@@ -135,4 +148,11 @@ mod tests {
         let history_info = t.get_history_info(2).unwrap();
         assert_eq!(8, history_info.events.len());
     }
+    #[test]
+    fn incremental_works() {
+        let t = canned_histories::single_timer("timer1");
+        let hi = t.get_one_wft(2).unwrap();
+        dbg!(hi.events);
+    }
 }

package/sdk-core/src/test_help/mod.rs CHANGED Viewed

@@ -53,6 +53,10 @@ pub static NO_MORE_WORK_ERROR_MSG: &str = "No more work to do";
 #[derive(derive_more::From, Debug, Clone, Copy, Eq, PartialEq, Hash)]
 pub enum ResponseType {
     ToTaskNum(usize),
+    /// Returns just the history after the WFT completed of the provided task number - 1, through to
+    /// the next WFT started. Simulating the incremental history for just the provided task number
+    #[from(ignore)]
+    OneTask(usize),
     AllHistory,
 }
@@ -142,6 +146,7 @@ pub struct FakeWfResponses {
 pub struct MocksHolder<SG> {
     sg: SG,
     mock_pollers: HashMap<String, MockWorker>,
+    // bidirectional mapping of run id / task token
     pub outstanding_task_map: Option<Arc<RwLock<BiMap<String, TaskToken>>>>,
 }
@@ -377,16 +382,6 @@ pub fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder<MockServerGateway
             }
         }
-        // TODO: Fix -- or not? Sticky invalidation could make this pointless anyway
-        // Verify response batches only ever return longer histories (IE: Are sorted ascending)
-        // assert!(
-        //     hist.response_batches
-        //         .as_slice()
-        //         .windows(2)
-        //         .all(|w| w[0] <= w[1]),
-        //     "response batches must have increasing wft numbers"
-        // );
         if cfg.enforce_correct_number_of_polls {
             *correct_num_polls.get_or_insert(0) += hist.response_batches.len();
         }
@@ -495,6 +490,7 @@ pub fn hist_to_poll_resp(
     };
     let hist_info = match response_type {
         ResponseType::ToTaskNum(tn) => t.get_history_info(tn).unwrap(),
+        ResponseType::OneTask(tn) => t.get_one_wft(tn).unwrap(),
         ResponseType::AllHistory => t.get_full_history_info().unwrap(),
     };
     let batch = hist_info.events().to_vec();

package/sdk-core/src/worker/mod.rs CHANGED Viewed

@@ -329,20 +329,28 @@ impl Worker {
         completion: WfActivationCompletion,
     ) -> Result<(), CompleteWfError> {
         let wfstatus = completion.status;
-        let did_complete_wft = match wfstatus {
+        let report_outcome = match wfstatus {
             Some(wf_activation_completion::Status::Successful(success)) => {
                 self.wf_activation_success(&completion.run_id, success)
                     .await
             }
             Some(wf_activation_completion::Status::Failed(failure)) => {
-                self.wf_activation_failed(&completion.run_id, failure).await
+                self.wf_activation_failed(
+                    &completion.run_id,
+                    WorkflowTaskFailedCause::Unspecified,
+                    failure,
+                )
+                .await
+            }
+            None => {
+                return Err(CompleteWfError::MalformedWorkflowCompletion {
+                    reason: "Workflow completion had empty status field".to_owned(),
+                    completion: None,
+                })
             }
-            None => Err(CompleteWfError::MalformedWorkflowCompletion {
-                reason: "Workflow completion had empty status field".to_owned(),
-                completion: None,
-            }),
         }?;
-        self.after_workflow_activation(&completion.run_id, did_complete_wft);
+        self.after_workflow_activation(&completion.run_id, report_outcome);
         Ok(())
     }
@@ -488,6 +496,7 @@ impl Worker {
                         }),
                     )
                     .await?;
+                self.return_workflow_task_permit();
                 None
             }
             NewWfTaskOutcome::Evict(e) => {
@@ -508,7 +517,7 @@ impl Worker {
         &self,
         run_id: &str,
         success: workflow_completion::Success,
-    ) -> Result<bool, CompleteWfError> {
+    ) -> Result<WFTReportOutcome, CompleteWfError> {
         // Convert to wf commands
         let cmds = success
             .commands
@@ -552,7 +561,10 @@ impl Worker {
                         .await
                 })
                 .await?;
-                Ok(true)
+                Ok(WFTReportOutcome {
+                    reported_to_server: true,
+                    failed: false,
+                })
             }
             Ok(Some(ServerCommandsWithWorkflowInfo {
                 task_token,
@@ -562,9 +574,15 @@ impl Worker {
                 self.server_gateway
                     .respond_legacy_query(task_token, result)
                     .await?;
-                Ok(true)
+                Ok(WFTReportOutcome {
+                    reported_to_server: true,
+                    failed: false,
+                })
             }
-            Ok(None) => Ok(false),
+            Ok(None) => Ok(WFTReportOutcome {
+                reported_to_server: false,
+                failed: false,
+            }),
             Err(update_err) => {
                 // Automatically fail the workflow task in the event we couldn't update machines
                 let fail_cause = if matches!(&update_err.source, WFMachinesError::Nondeterminism(_))
@@ -573,30 +591,13 @@ impl Worker {
                 } else {
                     WorkflowTaskFailedCause::Unspecified
                 };
-                warn!(run_id, error=?update_err, "Failing workflow task");
-                if let Some(ref tt) = update_err.task_token {
-                    let wft_fail_str = format!("{:?}", update_err);
-                    self.handle_wft_reporting_errs(run_id, || async {
-                        self.server_gateway
-                            .fail_workflow_task(
-                                tt.clone(),
-                                fail_cause,
-                                Some(Failure::application_failure(wft_fail_str.clone(), false)),
-                            )
-                            .await
-                    })
-                    .await?;
-                    // We must evict the workflow since we've failed a WFT
-                    self.request_wf_eviction(
-                        run_id,
-                        format!("Workflow task failure: {}", wft_fail_str),
-                    );
-                    Ok(true)
-                } else {
-                    Ok(false)
-                }
+                let wft_fail_str = format!("{:?}", update_err);
+                self.wf_activation_failed(
+                    run_id,
+                    fail_cause,
+                    Failure::application_failure(wft_fail_str.clone(), false).into(),
+                )
+                .await
             }
         }
     }
@@ -607,35 +608,46 @@ impl Worker {
     async fn wf_activation_failed(
         &self,
         run_id: &str,
+        cause: WorkflowTaskFailedCause,
         failure: workflow_completion::Failure,
-    ) -> Result<bool, CompleteWfError> {
+    ) -> Result<WFTReportOutcome, CompleteWfError> {
         Ok(match self.wft_manager.failed_activation(run_id) {
             FailedActivationOutcome::Report(tt) => {
+                warn!(run_id, failure=?failure, "Failing workflow activation");
                 self.handle_wft_reporting_errs(run_id, || async {
                     self.server_gateway
-                        .fail_workflow_task(
-                            tt,
-                            WorkflowTaskFailedCause::Unspecified,
-                            failure.failure.map(Into::into),
-                        )
+                        .fail_workflow_task(tt, cause, failure.failure.map(Into::into))
                         .await
                 })
                 .await?;
-                true
+                WFTReportOutcome {
+                    reported_to_server: true,
+                    failed: true,
+                }
             }
             FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
+                warn!(run_id, failure=?failure, "Failing legacy query request");
                 self.server_gateway
                     .respond_legacy_query(task_token, legacy_query_failure(failure))
                     .await?;
-                true
+                WFTReportOutcome {
+                    reported_to_server: true,
+                    failed: true,
+                }
             }
-            FailedActivationOutcome::NoReport => false,
+            FailedActivationOutcome::NoReport => WFTReportOutcome {
+                reported_to_server: false,
+                failed: true,
+            },
         })
     }
-    fn after_workflow_activation(&self, run_id: &str, did_complete_wft: bool) {
+    fn after_workflow_activation(&self, run_id: &str, report_outcome: WFTReportOutcome) {
         self.wft_manager.after_wft_report(run_id);
-        if did_complete_wft {
+        if report_outcome.reported_to_server || report_outcome.failed {
+            // If we failed the WFT but didn't report anything, we still want to release the WFT
+            // permit since the server will eventually time out the task and we've already evicted
+            // the run.
             self.return_workflow_task_permit();
         }
         self.wft_manager.on_activation_done(run_id);
@@ -717,6 +729,11 @@ impl WorkerConfig {
     }
 }
+struct WFTReportOutcome {
+    reported_to_server: bool,
+    failed: bool,
+}
 #[cfg(test)]
 mod tests {
     use super::*;

package/sdk-core/src/workflow/workflow_tasks/mod.rs CHANGED Viewed

@@ -135,7 +135,7 @@ pub enum ActivationAction {
 }
 macro_rules! machine_mut {
-    ($myself:ident, $run_id:ident, $task_token:ident, $clos:expr) => {{
+    ($myself:ident, $run_id:ident, $clos:expr) => {{
         $myself
             .workflow_machines
             .access($run_id, $clos)
@@ -143,7 +143,6 @@ macro_rules! machine_mut {
             .map_err(|source| WorkflowUpdateError {
                 source,
                 run_id: $run_id.to_owned(),
-                task_token: Some($task_token.clone()),
             })
     }};
 }
@@ -256,6 +255,7 @@ impl WorkflowTaskManager {
         debug!(
             task_token = %&work.task_token,
             history_length = %work.history.events.len(),
+            attempt = %work.attempt,
             "Applying new workflow task from server"
         );
         let task_start_time = Instant::now();
@@ -325,19 +325,20 @@ impl WorkflowTaskManager {
             return Ok(None);
         }
-        let task_token = if let Some(entry) = self.workflow_machines.get_task(run_id) {
-            entry.info.task_token.clone()
-        } else {
-            if !self.activation_has_eviction(run_id) {
-                // Don't bother warning if this was an eviction, since it's normal to issue
-                // eviction activations without an associated workflow task in that case.
-                warn!(
-                    run_id,
-                    "Attempted to complete activation for nonexistent run"
-                );
-            }
-            return Ok(None);
-        };
+        let (task_token, is_leg_query_task) =
+            if let Some(entry) = self.workflow_machines.get_task(run_id) {
+                (entry.info.task_token.clone(), entry.legacy_query.is_some())
+            } else {
+                if !self.activation_has_eviction(run_id) {
+                    // Don't bother warning if this was an eviction, since it's normal to issue
+                    // eviction activations without an associated workflow task in that case.
+                    warn!(
+                        run_id,
+                        "Attempted to complete activation for run without associated workflow task"
+                    );
+                }
+                return Ok(None);
+            };
         // If the only command in the activation is a legacy query response, that means we need
         // to respond differently than a typical activation.
@@ -364,7 +365,6 @@ impl WorkflowTaskManager {
                             return Err(WorkflowUpdateError {
                                 source: WFMachinesError::Fatal("Legacy query activation response included other commands, this is not allowed and constitutes an error in the lang SDK".to_string()),
                                 run_id: run_id.to_string(),
-                                task_token: Some(task_token)
                             });
                         }
                         query_responses.push(qr);
@@ -375,30 +375,32 @@ impl WorkflowTaskManager {
             }
             // Send commands from lang into the machines
-            machine_mut!(self, run_id, task_token, |wfm: &mut WorkflowManager| {
+            machine_mut!(self, run_id, |wfm: &mut WorkflowManager| {
                 wfm.push_commands(commands).boxed()
             })?;
             // Check if the workflow run needs another activation and queue it up if there is one
             // by pushing it into the pending activations list
-            let next_activation = machine_mut!(
-                self,
-                run_id,
-                task_token,
-                move |mgr: &mut WorkflowManager| mgr.get_next_activation().boxed()
-            )?;
+            let next_activation = machine_mut!(self, run_id, move |mgr: &mut WorkflowManager| mgr
+                .get_next_activation()
+                .boxed())?;
             if !next_activation.jobs.is_empty() {
                 self.pending_activations.push(next_activation);
                 let _ = self.pending_activations_notifier.send(true);
             }
             // We want to fetch the outgoing commands only after any new activation has been queued,
             // as doing so may have altered the outgoing commands.
-            let server_cmds =
-                machine_mut!(self, run_id, task_token, |wfm: &mut WorkflowManager| {
-                    async move { Ok(wfm.get_server_commands()) }.boxed()
-                })?;
+            let server_cmds = machine_mut!(self, run_id, |wfm: &mut WorkflowManager| {
+                async move { Ok(wfm.get_server_commands()) }.boxed()
+            })?;
+            let is_query_playback = is_leg_query_task && query_responses.is_empty();
             // We only actually want to send commands back to the server if there are no more
-            // pending activations and we are caught up on replay.
-            if !self.pending_activations.has_pending(run_id) && !server_cmds.replaying {
+            // pending activations and we are caught up on replay. We don't want to complete a wft
+            // if we already saw the final event in the workflow, or if we are playing back for the
+            // express purpose of fulfilling a query
+            if !self.pending_activations.has_pending(run_id)
+                && !server_cmds.replaying
+                && !is_query_playback
+            {
                 Some(ServerCommandsWithWorkflowInfo {
                     task_token,
                     action: ActivationAction::WftComplete {
@@ -448,7 +450,7 @@ impl WorkflowTaskManager {
         } else {
             // Blow up any cached data associated with the workflow
             let should_report = self
-                .request_eviction(run_id, "Activation failed by lang")
+                .request_eviction(run_id, "Activation failed")
                 .map_or(true, |attempt| attempt <= 1);
             if should_report {
                 FailedActivationOutcome::Report(tt)
@@ -507,11 +509,7 @@ impl WorkflowTaskManager {
                 Ok((wft_info, activation))
             }
-            Err(source) => Err(WorkflowUpdateError {
-                source,
-                run_id,
-                task_token: Some(wft_info.task_token),
-            }),
+            Err(source) => Err(WorkflowUpdateError { source, run_id }),
         }
     }