npm - @temporalio/core-bridge - Versions diffs - 0.20.0 → 0.21.0 - Mend

@temporalio/core-bridge 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/index.d.ts +1 -0
package/package.json +3 -3
package/releases/aarch64-apple-darwin/index.node +0 -0
package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
package/releases/x86_64-apple-darwin/index.node +0 -0
package/releases/x86_64-pc-windows-msvc/index.node +0 -0
package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
package/sdk-core/core/src/core_tests/queries.rs +107 -1
package/sdk-core/core/src/core_tests/workflow_tasks.rs +44 -1
package/sdk-core/core/src/worker/activities/local_activities.rs +58 -6
package/sdk-core/core/src/worker/mod.rs +14 -3
package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +70 -11
package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +80 -40
package/sdk-core/sdk/src/lib.rs +1 -1
package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +26 -8
package/src/errors.rs +9 -2
package/src/lib.rs +39 -16

package/index.d.ts CHANGED Viewed

@@ -162,6 +162,7 @@ export declare function newReplayWorker(
   callback: WorkerCallback
 ): void;
 export declare function workerShutdown(worker: Worker, callback: VoidCallback): void;
+export declare function clientClose(client: Client): void;
 export declare function runtimeShutdown(runtime: Runtime, callback: VoidCallback): void;
 export declare function pollLogs(runtime: Runtime, callback: LogsCallback): void;
 export declare function workerPollWorkflowActivation(

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@temporalio/core-bridge",
-  "version": "0.20.0",
+  "version": "0.21.0",
   "description": "Temporal.io SDK Core<>Node bridge",
   "main": "index.js",
   "types": "index.d.ts",
@@ -20,7 +20,7 @@
   "license": "MIT",
   "dependencies": {
     "@opentelemetry/api": "^1.0.3",
-    "@temporalio/internal-non-workflow-common": "^0.20.0",
+    "@temporalio/internal-non-workflow-common": "^0.21.0",
     "arg": "^5.0.1",
     "cargo-cp-artifact": "^0.1.4",
     "which": "^2.0.2"
@@ -43,5 +43,5 @@
   "publishConfig": {
     "access": "public"
   },
-  "gitHead": "e6b7468a00c68efd4baebbf866cf0a28c150bb6b"
+  "gitHead": "eb5901f47e16f8c8fe36c1154d5176c5f3205efc"
 }

package/releases/aarch64-apple-darwin/index.node CHANGED Viewed

Binary file

package/releases/aarch64-unknown-linux-gnu/index.node CHANGED Viewed

Binary file

package/releases/x86_64-apple-darwin/index.node CHANGED Viewed

Binary file

package/releases/x86_64-pc-windows-msvc/index.node CHANGED Viewed

Binary file

package/releases/x86_64-unknown-linux-gnu/index.node CHANGED Viewed

Binary file

package/sdk-core/core/src/core_tests/queries.rs CHANGED Viewed

@@ -21,7 +21,8 @@ use temporal_sdk_core_protos::{
         history::v1::History,
         query::v1::WorkflowQuery,
         workflowservice::v1::{
-            RespondQueryTaskCompletedResponse, RespondWorkflowTaskCompletedResponse,
+            GetWorkflowExecutionHistoryResponse, RespondQueryTaskCompletedResponse,
+            RespondWorkflowTaskCompletedResponse,
         },
     },
 };
@@ -381,3 +382,108 @@ async fn legacy_query_after_complete(#[values(false, true)] full_history: bool)
     core.shutdown().await;
 }
+#[tokio::test]
+async fn query_cache_miss_causes_page_fetch_dont_reply_wft_too_early() {
+    let wfid = "fake_wf_id";
+    let query_resp = "response";
+    let t = canned_histories::single_timer("1");
+    let full_hist = t.get_full_history_info().unwrap();
+    let tasks = VecDeque::from(vec![{
+        // Create a partial task
+        let mut pr = hist_to_poll_resp(
+            &t,
+            wfid.to_owned(),
+            ResponseType::OneTask(2),
+            TEST_Q.to_string(),
+        );
+        pr.queries = HashMap::new();
+        pr.queries.insert(
+            "the-query".to_string(),
+            WorkflowQuery {
+                query_type: "query-type".to_string(),
+                query_args: Some(b"hi".into()),
+                header: None,
+            },
+        );
+        pr
+    }]);
+    let mut mock_client = mock_workflow_client();
+    mock_client
+        .expect_get_workflow_execution_history()
+        .returning(move |_, _, _| {
+            Ok(GetWorkflowExecutionHistoryResponse {
+                history: Some(full_hist.clone().into()),
+                ..Default::default()
+            })
+        });
+    mock_client
+        .expect_complete_workflow_task()
+        .times(1)
+        .returning(|resp| {
+            // Verify both the complete command and the query response are sent
+            assert_eq!(resp.commands.len(), 1);
+            assert_eq!(resp.query_responses.len(), 1);
+            Ok(RespondWorkflowTaskCompletedResponse::default())
+        });
+    let mut mock = MocksHolder::from_client_with_responses(mock_client, tasks, vec![]);
+    mock.worker_cfg(|wc| wc.max_cached_workflows = 10);
+    let core = mock_worker(mock);
+    let task = core.poll_workflow_activation().await.unwrap();
+    // The first task should *only* start the workflow. It should *not* have a query in it, which
+    // was the bug. Query should only appear after we have caught up on replay.
+    assert_matches!(
+        task.jobs.as_slice(),
+        [WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::StartWorkflow(_)),
+        }]
+    );
+    core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+        task.run_id,
+        start_timer_cmd(1, Duration::from_secs(1)),
+    ))
+    .await
+    .unwrap();
+    let task = core.poll_workflow_activation().await.unwrap();
+    assert_matches!(
+        task.jobs.as_slice(),
+        [WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::FireTimer(_)),
+        }]
+    );
+    core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+        task.run_id,
+        CompleteWorkflowExecution { result: None }.into(),
+    ))
+    .await
+    .unwrap();
+    // Now the query shall arrive
+    let task = core.poll_workflow_activation().await.unwrap();
+    assert_matches!(
+        task.jobs[0],
+        WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::QueryWorkflow(_)),
+        }
+    );
+    core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+        task.run_id,
+        QueryResult {
+            query_id: "the-query".to_string(),
+            variant: Some(
+                QuerySuccess {
+                    response: Some(query_resp.into()),
+                }
+                .into(),
+            ),
+        }
+        .into(),
+    ))
+    .await
+    .unwrap();
+    core.shutdown().await;
+}

package/sdk-core/core/src/core_tests/workflow_tasks.rs CHANGED Viewed

@@ -35,7 +35,7 @@ use temporal_sdk_core_protos::{
     temporal::api::{
         enums::v1::{EventType, WorkflowTaskFailedCause},
         failure::v1::Failure,
-        history::v1::{history_event, TimerFiredEventAttributes},
+        history::v1::{history_event, History, TimerFiredEventAttributes},
         workflowservice::v1::{
             GetWorkflowExecutionHistoryResponse, RespondWorkflowTaskCompletedResponse,
         },
@@ -1694,3 +1694,46 @@ async fn tasks_from_completion_are_delivered() {
     .unwrap();
     core.shutdown().await;
 }
+#[tokio::test]
+async fn evict_missing_wf_during_poll_doesnt_eat_permit() {
+    let wfid = "fake_wf_id";
+    let mut t = TestHistoryBuilder::default();
+    t.add_by_type(EventType::WorkflowExecutionStarted);
+    t.add_full_wf_task();
+    t.add_we_signaled("sig", vec![]);
+    t.add_full_wf_task();
+    t.add_workflow_execution_completed();
+    let tasks = [hist_to_poll_resp(
+        &t,
+        wfid.to_owned(),
+        // Use a partial task so that we'll fetch history
+        ResponseType::OneTask(2),
+        TEST_Q.to_string(),
+    )];
+    let mut mock = mock_workflow_client();
+    mock.expect_get_workflow_execution_history()
+        .times(1)
+        .returning(move |_, _, _| {
+            Ok(GetWorkflowExecutionHistoryResponse {
+                // Empty history so we error applying it (no jobs)
+                history: Some(History { events: vec![] }),
+                raw_history: vec![],
+                next_page_token: vec![],
+                archived: false,
+            })
+        });
+    let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
+    mock.worker_cfg(|wc| {
+        wc.max_cached_workflows = 1;
+        wc.max_outstanding_workflow_tasks = 1;
+    });
+    let core = mock_worker(mock);
+    // Should error because mock is out of work
+    assert_matches!(core.poll_workflow_activation().await, Err(_));
+    assert_eq!(core.available_wft_permits(), 1);
+    core.shutdown().await;
+}

package/sdk-core/core/src/worker/activities/local_activities.rs CHANGED Viewed

@@ -14,7 +14,10 @@ use temporal_sdk_core_protos::{
         activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
         common::WorkflowExecution,
     },
-    temporal::api::enums::v1::TimeoutType,
+    temporal::api::{
+        enums::v1::TimeoutType,
+        failure::v1::{failure::FailureInfo, ApplicationFailureInfo},
+    },
 };
 use tokio::{
     sync::{
@@ -420,10 +423,13 @@ impl LocalActivityManager {
                 LocalActivityExecutionResult::Failed(f) => {
                     if let Some(backoff_dur) = info.la_info.schedule_cmd.retry_policy.should_retry(
                         info.attempt as usize,
-                        &f.failure
-                            .as_ref()
-                            .map(|f| format!("{:?}", f))
-                            .unwrap_or_else(|| "".to_string()),
+                        f.failure.as_ref().map_or("", |f| match &f.failure_info {
+                            Some(FailureInfo::ApplicationFailureInfo(ApplicationFailureInfo {
+                                r#type,
+                                ..
+                            })) => r#type.as_str(),
+                            _ => "",
+                        }),
                     ) {
                         let will_use_timer =
                             backoff_dur > info.la_info.schedule_cmd.local_retry_threshold;
@@ -637,7 +643,9 @@ impl Drop for TimeoutBag {
 mod tests {
     use super::*;
     use crate::protosext::LACloseTimeouts;
-    use temporal_sdk_core_protos::coresdk::common::RetryPolicy;
+    use temporal_sdk_core_protos::{
+        coresdk::common::RetryPolicy, temporal::api::failure::v1::Failure,
+    };
     use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
     impl DispatchOrTimeoutLA {
@@ -785,6 +793,50 @@ mod tests {
         )
     }
+    #[tokio::test]
+    async fn respects_non_retryable_error_types() {
+        let lam = LocalActivityManager::test(1);
+        lam.enqueue([NewLocalAct {
+            schedule_cmd: ValidScheduleLA {
+                seq: 1,
+                activity_id: "1".to_string(),
+                attempt: 1,
+                retry_policy: RetryPolicy {
+                    initial_interval: Some(Duration::from_secs(1).into()),
+                    backoff_coefficient: 10.0,
+                    maximum_interval: Some(Duration::from_secs(10).into()),
+                    maximum_attempts: 10,
+                    non_retryable_error_types: vec!["TestError".to_string()],
+                },
+                local_retry_threshold: Duration::from_secs(5),
+                ..Default::default()
+            },
+            workflow_type: "".to_string(),
+            workflow_exec_info: Default::default(),
+            schedule_time: SystemTime::now(),
+        }
+        .into()]);
+        let next = lam.next_pending().await.unwrap().unwrap();
+        let tt = TaskToken(next.task_token);
+        let res = lam.complete(
+            &tt,
+            &LocalActivityExecutionResult::Failed(ActFail {
+                failure: Some(Failure {
+                    failure_info: Some(FailureInfo::ApplicationFailureInfo(
+                        ApplicationFailureInfo {
+                            r#type: "TestError".to_string(),
+                            non_retryable: false,
+                            ..Default::default()
+                        },
+                    )),
+                    ..Default::default()
+                }),
+            }),
+        );
+        assert_matches!(res, LACompleteAction::Report(_));
+    }
     #[tokio::test]
     async fn can_cancel_during_local_backoff() {
         let lam = LocalActivityManager::test(1);

package/sdk-core/core/src/worker/mod.rs CHANGED Viewed

@@ -65,6 +65,7 @@ use tracing_futures::Instrument;
 #[cfg(test)]
 use crate::worker::client::WorkerClient;
+use crate::workflow::workflow_tasks::EvictionRequestResult;
 /// A worker polls on a certain task queue
 pub struct Worker {
@@ -530,13 +531,18 @@ impl Worker {
         self.workflows_semaphore.add_permit();
     }
+    /// Request a workflow eviction. Returns true if we actually queued up a new eviction request.
     pub(crate) fn request_wf_eviction(
         &self,
         run_id: &str,
         message: impl Into<String>,
         reason: EvictionReason,
-    ) {
-        self.wft_manager.request_eviction(run_id, message, reason);
+    ) -> bool {
+        match self.wft_manager.request_eviction(run_id, message, reason) {
+            EvictionRequestResult::EvictionIssued(_) => true,
+            EvictionRequestResult::NotFound => false,
+            EvictionRequestResult::EvictionAlreadyOutstanding => false,
+        }
     }
     /// Sets a function to be called at the end of each activation completion
@@ -675,11 +681,16 @@ impl Worker {
             }
             NewWfTaskOutcome::Evict(e) => {
                 warn!(error=?e, run_id=%we.run_id, "Error while applying poll response to workflow");
-                self.request_wf_eviction(
+                let did_issue_eviction = self.request_wf_eviction(
                     &we.run_id,
                     format!("Error while applying poll response to workflow: {:?}", e),
                     e.evict_reason(),
                 );
+                // If we didn't actually need to issue an eviction, then return the WFT permit.
+                // EX: The workflow we tried to evict wasn't in the cache.
+                if !did_issue_eviction {
+                    self.return_workflow_task_permit();
+                }
                 None
             }
         })

package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs CHANGED Viewed

@@ -12,6 +12,7 @@ use std::{
     collections::HashMap,
     fmt::Debug,
     ops::{Deref, DerefMut},
+    sync::Arc,
 };
 use temporal_sdk_core_protos::coresdk::workflow_activation::WorkflowActivation;
@@ -22,7 +23,7 @@ pub(crate) struct WorkflowConcurrencyManager {
 }
 struct ManagedRun {
-    wfm: Mutex<WorkflowManager>,
+    wfm: Arc<Mutex<WorkflowManager>>,
     wft: Option<OutstandingTask>,
     activation: Option<OutstandingActivation>,
     metrics: MetricsContext,
@@ -36,7 +37,7 @@ struct ManagedRun {
 impl ManagedRun {
     fn new(wfm: WorkflowManager, metrics: MetricsContext) -> Self {
         Self {
-            wfm: Mutex::new(wfm),
+            wfm: Arc::new(Mutex::new(wfm)),
             wft: None,
             activation: None,
             metrics,
@@ -266,16 +267,19 @@ impl WorkflowConcurrencyManager {
         F: for<'a> FnOnce(&'a mut WorkflowManager) -> BoxFuture<Result<Fout>>,
         Fout: Send + Debug,
     {
-        let readlock = self.runs.read();
-        let m = readlock
-            .get(run_id)
-            .ok_or_else(|| WFMachinesError::Fatal("Missing workflow machines".to_string()))?;
-        // This holds a non-async mutex across an await point which is technically a no-no, but
-        // we never access the machines for the same run simultaneously anyway. This should all
-        // get fixed with a generally different approach which moves the runs inside workers.
-        let mut wfm_mutex = m.wfm.lock();
-        let res = mutator(&mut wfm_mutex).await;
+        // TODO: Slightly less than ideal. We must avoid holding the read lock on the overall
+        //  machine map while async-ly mutating the inner machine. So, we clone the inner ArcMutex.
+        //  We should restructure things to avoid the top-level lock on the map.
+        let wfm = {
+            let readlock = self.runs.read();
+            let m = readlock
+                .get(run_id)
+                .ok_or_else(|| WFMachinesError::Fatal("Missing workflow machines".to_string()))?;
+            m.wfm.clone()
+        };
+        let res = mutator(&mut wfm.lock()).await;
         res
     }
@@ -321,6 +325,8 @@ impl WorkflowConcurrencyManager {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::test_help::canned_histories;
+    use tokio::sync::Barrier;
     // We test mostly error paths here since the happy paths are well covered by the tests of the
     // core sdk itself, and setting up the fake data is onerous here. If we make the concurrency
@@ -342,4 +348,57 @@ mod tests {
         // Should whine that the machines have nothing to do (history empty)
         assert_matches!(res.unwrap_err(), WFMachinesError::Fatal { .. });
     }
+    /// This test makes sure that if we're stuck on an await within the machine mutator we don't
+    /// cause a deadlock if a write happens during that. This test will hang without proper
+    /// implementation.
+    #[tokio::test]
+    async fn aba_deadlock_prevented() {
+        let run_id = "some_run_id";
+        let timer_hist = canned_histories::single_timer("t");
+        let access_barr: &'static Barrier = Box::leak(Box::new(Barrier::new(2)));
+        let wft = timer_hist.get_history_info(1).unwrap();
+        let mgr = WorkflowConcurrencyManager::new();
+        mgr.create_or_update(
+            run_id,
+            wft.clone().into(),
+            "fake_wf_id",
+            "fake_namespace",
+            "fake_wf_type",
+            &Default::default(),
+        )
+        .await
+        .unwrap();
+        // Perform access which blocks
+        let access_fut = mgr.access(run_id, |_wfm| {
+            async {
+                // Wait to make sure access has started
+                access_barr.wait().await;
+                // Wait to make sure write has finished
+                access_barr.wait().await;
+                Ok(())
+            }
+            .boxed()
+        });
+        let write_fut = async {
+            // Wait to make sure access has started
+            access_barr.wait().await;
+            // Now try writing
+            mgr.create_or_update(
+                "different_run_id",
+                wft.clone().into(),
+                "fake_wf_id",
+                "fake_namespace",
+                "fake_wf_type",
+                &Default::default(),
+            )
+            .await
+            .unwrap();
+            // Indicate write has finished
+            access_barr.wait().await;
+        };
+        let (r1, _) = tokio::join!(access_fut, write_fut);
+        r1.unwrap();
+    }
 }

package/sdk-core/core/src/workflow/workflow_tasks/mod.rs CHANGED Viewed

@@ -57,7 +57,7 @@ pub struct WorkflowTaskManager {
     pending_activations: PendingActivations,
     /// Holds activations which are purely query activations needed to respond to legacy queries.
     /// Activations may only be added here for runs which do not have other pending activations.
-    pending_legacy_queries: SegQueue<WorkflowActivation>,
+    pending_queries: SegQueue<WorkflowActivation>,
     /// Holds poll wft responses from the server that need to be applied
     ready_buffered_wft: SegQueue<ValidPollWFTQResponse>,
     /// Used to wake blocked workflow task polling
@@ -74,9 +74,8 @@ pub struct WorkflowTaskManager {
 #[derive(Clone, Debug)]
 pub(crate) struct OutstandingTask {
     pub info: WorkflowTaskInfo,
-    /// If set the outstanding task has query from the old `query` field which must be fulfilled
-    /// upon finishing replay
-    pub legacy_query: Option<QueryWorkflow>,
+    /// Set if the outstanding task has quer(ies) which must be fulfilled upon finishing replay
+    pub pending_queries: Vec<QueryWorkflow>,
     start_time: Instant,
 }
@@ -150,6 +149,13 @@ pub(crate) enum ActivationAction {
     RespondLegacyQuery { result: QueryResult },
 }
+#[derive(Debug, Eq, PartialEq, Hash)]
+pub(crate) enum EvictionRequestResult {
+    EvictionIssued(Option<u32>),
+    NotFound,
+    EvictionAlreadyOutstanding,
+}
 macro_rules! machine_mut {
     ($myself:ident, $run_id:ident, $clos:expr) => {{
         $myself
@@ -172,7 +178,7 @@ impl WorkflowTaskManager {
         Self {
             workflow_machines: WorkflowConcurrencyManager::new(),
             pending_activations: Default::default(),
-            pending_legacy_queries: Default::default(),
+            pending_queries: Default::default(),
             ready_buffered_wft: Default::default(),
             pending_activations_notifier,
             cache_manager: Mutex::new(WorkflowCacheManager::new(eviction_policy, metrics.clone())),
@@ -181,8 +187,8 @@ impl WorkflowTaskManager {
     }
     pub(crate) fn next_pending_activation(&self) -> Option<WorkflowActivation> {
-        // Dispatch pending legacy queries first
-        if let leg_q @ Some(_) = self.pending_legacy_queries.pop() {
+        // Dispatch pending queries first
+        if let leg_q @ Some(_) = self.pending_queries.pop() {
             return leg_q;
         }
         // It is important that we do not issue pending activations for any workflows which already
@@ -247,7 +253,7 @@ impl WorkflowTaskManager {
         run_id: &str,
         message: impl Into<String>,
         reason: EvictionReason,
-    ) -> Option<u32> {
+    ) -> EvictionRequestResult {
         if self.workflow_machines.exists(run_id) {
             if !self.activation_has_eviction(run_id) {
                 let message = message.into();
@@ -256,13 +262,17 @@ impl WorkflowTaskManager {
                 self.pending_activations
                     .notify_needs_eviction(run_id, message, reason);
                 self.pending_activations_notifier.notify_waiters();
+                EvictionRequestResult::EvictionIssued(
+                    self.workflow_machines
+                        .get_task(run_id)
+                        .map(|wt| wt.info.attempt),
+                )
+            } else {
+                EvictionRequestResult::EvictionAlreadyOutstanding
             }
-            self.workflow_machines
-                .get_task(run_id)
-                .map(|wt| wt.info.attempt)
         } else {
             warn!(%run_id, "Eviction requested for unknown run");
-            None
+            EvictionRequestResult::NotFound
         }
     }
@@ -304,9 +314,11 @@ impl WorkflowTaskManager {
             return NewWfTaskOutcome::TaskBuffered;
         };
+        let start_event_id = work.history.events.first().map(|e| e.event_id);
         debug!(
             task_token = %&work.task_token,
             history_length = %work.history.events.len(),
+            start_event_id = ?start_event_id,
             attempt = %work.attempt,
             run_id = %work.workflow_execution.run_id,
             "Applying new workflow task from server"
@@ -320,33 +332,45 @@ impl WorkflowTaskManager {
             .take()
             .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
-        let (info, mut next_activation) =
+        let (info, mut next_activation, mut pending_queries) =
             match self.instantiate_or_update_workflow(work, client).await {
-                Ok((info, next_activation)) => (info, next_activation),
+                Ok(res) => res,
                 Err(e) => {
                     return NewWfTaskOutcome::Evict(e);
                 }
             };
+        if !pending_queries.is_empty() && legacy_query.is_some() {
+            error!(
+                "Server issued both normal and legacy queries. This should not happen. Please \
+                 file a bug report."
+            );
+            return NewWfTaskOutcome::Evict(WorkflowUpdateError {
+                source: WFMachinesError::Fatal(
+                    "Server issued both normal and legacy query".to_string(),
+                ),
+                run_id: next_activation.run_id,
+            });
+        }
         // Immediately dispatch query activation if no other jobs
-        let legacy_query = if next_activation.jobs.is_empty() {
-            if let Some(lq) = legacy_query {
+        if let Some(lq) = legacy_query {
+            if next_activation.jobs.is_empty() {
                 debug!("Dispatching legacy query {}", &lq);
                 next_activation
                     .jobs
                     .push(workflow_activation_job::Variant::QueryWorkflow(lq).into());
+            } else {
+                pending_queries.push(lq);
             }
-            None
-        } else {
-            legacy_query
-        };
+        }
         self.workflow_machines
             .insert_wft(
                 &next_activation.run_id,
                 OutstandingTask {
                     info,
-                    legacy_query,
+                    pending_queries,
                     start_time: task_start_time,
                 },
             )
@@ -388,11 +412,11 @@ impl WorkflowTaskManager {
             return Ok(None);
         }
-        let (task_token, is_leg_query_task, start_time) =
+        let (task_token, has_pending_query, start_time) =
             if let Some(entry) = self.workflow_machines.get_task(run_id) {
                 (
                     entry.info.task_token.clone(),
-                    entry.legacy_query.is_some(),
+                    !entry.pending_queries.is_empty(),
                     entry.start_time,
                 )
             } else {
@@ -493,7 +517,7 @@ impl WorkflowTaskManager {
             let must_heartbeat = self
                 .wait_for_local_acts_or_heartbeat(run_id, wft_heartbeat_deadline)
                 .await;
-            let is_query_playback = is_leg_query_task && query_responses.is_empty();
+            let is_query_playback = has_pending_query && query_responses.is_empty();
             // We only actually want to send commands back to the server if there are no more
             // pending activations and we are caught up on replay. We don't want to complete a wft
@@ -559,9 +583,10 @@ impl WorkflowTaskManager {
             FailedActivationOutcome::ReportLegacyQueryFailure(tt)
         } else {
             // Blow up any cached data associated with the workflow
-            let should_report = self
-                .request_eviction(run_id, failstr, reason)
-                .map_or(true, |attempt| attempt <= 1);
+            let should_report = match self.request_eviction(run_id, failstr, reason) {
+                EvictionRequestResult::EvictionIssued(Some(attempt)) => attempt <= 1,
+                _ => false,
+            };
             if should_report {
                 FailedActivationOutcome::Report(tt)
             } else {
@@ -578,7 +603,8 @@ impl WorkflowTaskManager {
         &self,
         poll_wf_resp: ValidPollWFTQResponse,
         client: Arc<WorkerClientBag>,
-    ) -> Result<(WorkflowTaskInfo, WorkflowActivation), WorkflowUpdateError> {
+    ) -> Result<(WorkflowTaskInfo, WorkflowActivation, Vec<QueryWorkflow>), WorkflowUpdateError>
+    {
         let run_id = poll_wf_resp.workflow_execution.run_id.clone();
         let wft_info = WorkflowTaskInfo {
@@ -593,10 +619,12 @@ impl WorkflowTaskManager {
             .map(|ev| ev.event_id > 1)
             .unwrap_or_default();
+        let mut did_miss_cache = false;
         let page_token = if !self.workflow_machines.exists(&run_id) && poll_resp_is_incremental {
             debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
                    cache. Will fetch history");
             self.metrics.sticky_cache_miss();
+            did_miss_cache = true;
             NextPageToken::FetchFromStart
         } else {
             poll_wf_resp.next_page_token.into()
@@ -625,16 +653,26 @@ impl WorkflowTaskManager {
             .await
         {
             Ok(mut activation) => {
-                // If there are in-poll queries, insert jobs for those queries into the activation
+                // If there are in-poll queries, insert jobs for those queries into the activation,
+                // but only if we hit the cache. If we didn't, those queries will need to be dealt
+                // with once replay is over
+                let mut pending_queries = vec![];
                 if !poll_wf_resp.query_requests.is_empty() {
-                    let query_jobs = poll_wf_resp
-                        .query_requests
-                        .into_iter()
-                        .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
-                    activation.jobs.extend(query_jobs);
+                    if !did_miss_cache {
+                        let query_jobs = poll_wf_resp
+                            .query_requests
+                            .into_iter()
+                            .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
+                        activation.jobs.extend(query_jobs);
+                    } else {
+                        poll_wf_resp
+                            .query_requests
+                            .into_iter()
+                            .for_each(|q| pending_queries.push(q));
+                    }
                 }
-                Ok((wft_info, activation))
+                Ok((wft_info, activation, pending_queries))
             }
             Err(source) => Err(WorkflowUpdateError { source, run_id }),
         }
@@ -661,16 +699,18 @@ impl WorkflowTaskManager {
         // removed from the outstanding tasks map
         let retme = if !self.pending_activations.has_pending(run_id) {
             if !just_evicted {
-                // Check if there was a legacy query which must be fulfilled, and if there is create
-                // a new pending activation for it.
+                // Check if there was a pending query which must be fulfilled, and if there is
+                // create a new pending activation for it.
                 if let Some(ref mut ot) = &mut *self
                     .workflow_machines
                     .get_task_mut(run_id)
                     .expect("Machine must exist")
                 {
-                    if let Some(query) = ot.legacy_query.take() {
-                        let na = create_query_activation(run_id.to_string(), [query]);
-                        self.pending_legacy_queries.push(na);
+                    if !ot.pending_queries.is_empty() {
+                        for query in ot.pending_queries.drain(..) {
+                            let na = create_query_activation(run_id.to_string(), [query]);
+                            self.pending_queries.push(na);
+                        }
                         self.pending_activations_notifier.notify_waiters();
                         return false;
                     }

package/sdk-core/sdk/src/lib.rs CHANGED Viewed

@@ -389,7 +389,7 @@ impl ActivityHalf {
                 tokio::spawn(ACT_CANCEL_TOK.scope(ct, async move {
                     let mut inputs = start.input;
                     let arg = inputs.pop().unwrap_or_default();
-                    let output = (&act_fn.act_func)(arg).await;
+                    let output = (act_fn.act_func)(arg).await;
                     let result = match output {
                         Ok(res) => ActivityExecutionResult::ok(res),
                         Err(err) => match err.downcast::<ActivityCancelledError>() {

package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs CHANGED Viewed

@@ -1,5 +1,5 @@
 use std::time::Duration;
-use temporal_client::{WorkflowClientTrait, WorkflowOptions};
+use temporal_client::WorkflowOptions;
 use temporal_sdk::{WfContext, WfExitValue, WorkflowResult};
 use temporal_sdk_core_protos::coresdk::workflow_commands::ContinueAsNewWorkflowExecution;
 use temporal_sdk_core_test_utils::CoreWfStarter;
@@ -33,13 +33,31 @@ async fn continue_as_new_happy_path() {
         )
         .await
         .unwrap();
+    // The four additional runs
+    worker.incr_expected_run_count(4);
     worker.run_until_done().await.unwrap();
+}
-    // Terminate the continued workflow
-    starter
-        .get_client()
-        .await
-        .terminate_workflow_execution(wf_name.to_owned(), None)
-        .await
-        .unwrap();
+#[tokio::test]
+async fn continue_as_new_multiple_concurrent() {
+    let wf_name = "continue_as_new_multiple_concurrent";
+    let mut starter = CoreWfStarter::new(wf_name);
+    starter.max_cached_workflows(3).max_wft(3);
+    let mut worker = starter.worker().await;
+    worker.register_wf(wf_name.to_string(), continue_as_new_wf);
+    let wf_names = (1..=20).map(|i| format!("{}-{}", wf_name, i));
+    for name in wf_names.clone() {
+        worker
+            .submit_wf(
+                name.to_string(),
+                wf_name.to_string(),
+                vec![[1].into()],
+                WorkflowOptions::default(),
+            )
+            .await
+            .unwrap();
+    }
+    worker.incr_expected_run_count(20 * 4);
+    worker.run_until_done().await.unwrap();
 }

package/src/errors.rs CHANGED Viewed

@@ -10,6 +10,8 @@ pub static SHUTDOWN_ERROR: OnceCell<Root<JsFunction>> = OnceCell::new();
 pub static NO_WORKER_ERROR: OnceCell<Root<JsFunction>> = OnceCell::new();
 /// Something unexpected happened, considered fatal
 pub static UNEXPECTED_ERROR: OnceCell<Root<JsFunction>> = OnceCell::new();
+/// Used in different parts of the project to signal that something unexpected has happened
+pub static ILLEGAL_STATE_ERROR: OnceCell<Root<JsFunction>> = OnceCell::new();
 static ALREADY_REGISTERED_ERRORS: OnceCell<bool> = OnceCell::new();
@@ -70,9 +72,9 @@ pub fn register_errors(mut cx: FunctionContext) -> JsResult<JsUndefined> {
     let res = ALREADY_REGISTERED_ERRORS.set(true);
     if res.is_err() {
         // Don't do anything if errors are already registered
-        return Ok(cx.undefined())
+        return Ok(cx.undefined());
     }
     let mapping = cx.argument::<JsObject>(0)?;
     let shutdown_error = mapping
         .get(&mut cx, "ShutdownError")?
@@ -90,11 +92,16 @@ pub fn register_errors(mut cx: FunctionContext) -> JsResult<JsUndefined> {
         .get(&mut cx, "UnexpectedError")?
         .downcast_or_throw::<JsFunction, FunctionContext>(&mut cx)?
         .root(&mut cx);
+    let illegal_state_error = mapping
+        .get(&mut cx, "IllegalStateError")?
+        .downcast_or_throw::<JsFunction, FunctionContext>(&mut cx)?
+        .root(&mut cx);
     TRANSPORT_ERROR.get_or_try_init(|| Ok(transport_error))?;
     SHUTDOWN_ERROR.get_or_try_init(|| Ok(shutdown_error))?;
     NO_WORKER_ERROR.get_or_try_init(|| Ok(no_worker_error))?;
     UNEXPECTED_ERROR.get_or_try_init(|| Ok(unexpected_error))?;
+    ILLEGAL_STATE_ERROR.get_or_try_init(|| Ok(illegal_state_error))?;
     Ok(cx.undefined())
 }

package/src/lib.rs CHANGED Viewed

@@ -8,6 +8,7 @@ use once_cell::sync::OnceCell;
 use opentelemetry::trace::{FutureExt, SpanContext, TraceContextExt};
 use prost::Message;
 use std::{
+    cell::RefCell,
     fmt::Display,
     future::Future,
     sync::Arc,
@@ -135,7 +136,7 @@ struct Client {
     core_client: Arc<RawClient>,
 }
-type BoxedClient = JsBox<Client>;
+type BoxedClient = JsBox<RefCell<Option<Client>>>;
 impl Finalize for Client {}
 /// Worker struct, hold a reference for the channel sender responsible for sending requests from
@@ -291,10 +292,10 @@ fn start_bridge_loop(event_queue: Arc<EventQueue>, receiver: &mut UnboundedRecei
                         }
                         Ok(client) => {
                             send_result(event_queue.clone(), callback, |cx| {
-                                Ok(cx.boxed(Client {
+                                Ok(cx.boxed(RefCell::new(Some(Client {
                                     runtime,
                                     core_client: Arc::new(client),
-                                }))
+                                }))))
                             });
                         }
                     }
@@ -590,15 +591,23 @@ fn worker_new(mut cx: FunctionContext) -> JsResult<JsUndefined> {
     let callback = cx.argument::<JsFunction>(2)?;
     let config = worker_options.as_worker_config(&mut cx)?;
-    let request = Request::InitWorker {
-        client: client.core_client.clone(),
-        runtime: client.runtime.clone(),
-        config,
-        callback: callback.root(&mut cx),
-    };
-    if let Err(err) = client.runtime.sender.send(request) {
-        callback_with_unexpected_error(&mut cx, callback, err)?;
+    match &*client.borrow() {
+        None => {
+            callback_with_error(&mut cx, callback, move |cx| {
+                UNEXPECTED_ERROR.from_string(cx, "Tried to use closed Client".to_string())
+            })?;
+        }
+        Some(client) => {
+            let request = Request::InitWorker {
+                client: client.core_client.clone(),
+                runtime: client.runtime.clone(),
+                config,
+                callback: callback.root(&mut cx),
+            };
+            if let Err(err) = client.runtime.sender.send(request) {
+                callback_with_unexpected_error(&mut cx, callback, err)?;
+            };
+        }
     };
     Ok(cx.undefined())
@@ -783,13 +792,26 @@ fn worker_record_activity_heartbeat(mut cx: FunctionContext) -> JsResult<JsUndef
 fn worker_shutdown(mut cx: FunctionContext) -> JsResult<JsUndefined> {
     let worker = cx.argument::<BoxedWorker>(0)?;
     let callback = cx.argument::<JsFunction>(1)?;
-    match worker.runtime.sender.send(Request::ShutdownWorker {
+    if let Err(err) = worker.runtime.sender.send(Request::ShutdownWorker {
         worker: worker.core_worker.clone(),
         callback: callback.root(&mut cx),
     }) {
-        Err(err) => cx.throw_error(format!("{}", err)),
-        _ => Ok(cx.undefined()),
-    }
+        UNEXPECTED_ERROR
+            .from_error(&mut cx, err)
+            .and_then(|err| cx.throw(err))?;
+    };
+    Ok(cx.undefined())
+}
+/// Drop a reference to a Client, once all references are dropped, the Client will be closed.
+fn client_close(mut cx: FunctionContext) -> JsResult<JsUndefined> {
+    let client = cx.argument::<BoxedClient>(0)?;
+    if client.replace(None).is_none() {
+        ILLEGAL_STATE_ERROR
+            .from_error(&mut cx, "Client already closed")
+            .and_then(|err| cx.throw(err))?;
+    };
+    Ok(cx.undefined())
 }
 /// Convert Rust SystemTime into a JS array with 2 numbers (seconds, nanos)
@@ -824,6 +846,7 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> {
     cx.export_function("newWorker", worker_new)?;
     cx.export_function("newReplayWorker", replay_worker_new)?;
     cx.export_function("workerShutdown", worker_shutdown)?;
+    cx.export_function("clientClose", client_close)?;
     cx.export_function("runtimeShutdown", runtime_shutdown)?;
     cx.export_function("pollLogs", poll_logs)?;
     cx.export_function(