npm - @temporalio/core-bridge - Versions diffs - 0.20.2 → 0.22.0 - Mend

@temporalio/core-bridge 0.20.2 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/Cargo.lock +137 -127
package/index.d.ts +7 -2
package/package.json +3 -3
package/releases/aarch64-apple-darwin/index.node +0 -0
package/releases/x86_64-apple-darwin/index.node +0 -0
package/releases/x86_64-pc-windows-msvc/index.node +0 -0
package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
package/sdk-core/.buildkite/docker/docker-compose.yaml +5 -4
package/sdk-core/client/Cargo.toml +1 -0
package/sdk-core/client/src/lib.rs +52 -9
package/sdk-core/client/src/raw.rs +9 -1
package/sdk-core/client/src/retry.rs +12 -1
package/sdk-core/client/src/workflow_handle/mod.rs +183 -0
package/sdk-core/core/src/abstractions.rs +10 -3
package/sdk-core/core/src/core_tests/child_workflows.rs +7 -9
package/sdk-core/core/src/core_tests/determinism.rs +8 -19
package/sdk-core/core/src/core_tests/local_activities.rs +22 -32
package/sdk-core/core/src/core_tests/queries.rs +272 -5
package/sdk-core/core/src/core_tests/workers.rs +4 -34
package/sdk-core/core/src/core_tests/workflow_tasks.rs +197 -41
package/sdk-core/core/src/pending_activations.rs +11 -0
package/sdk-core/core/src/telemetry/mod.rs +1 -1
package/sdk-core/core/src/test_help/mod.rs +57 -7
package/sdk-core/core/src/worker/mod.rs +64 -15
package/sdk-core/core/src/workflow/machines/mod.rs +1 -1
package/sdk-core/core/src/workflow/machines/timer_state_machine.rs +2 -2
package/sdk-core/core/src/workflow/machines/workflow_machines.rs +14 -3
package/sdk-core/core/src/workflow/mod.rs +5 -2
package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +47 -2
package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +16 -2
package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +252 -125
package/sdk-core/core-api/src/worker.rs +9 -0
package/sdk-core/sdk/Cargo.toml +1 -0
package/sdk-core/sdk/src/activity_context.rs +223 -0
package/sdk-core/sdk/src/interceptors.rs +8 -2
package/sdk-core/sdk/src/lib.rs +167 -122
package/sdk-core/sdk-core-protos/src/history_info.rs +3 -7
package/sdk-core/test-utils/Cargo.toml +1 -0
package/sdk-core/test-utils/src/lib.rs +78 -37
package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +11 -4
package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +0 -1
package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +0 -3
package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +33 -17
package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +10 -1
package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +0 -1
package/sdk-core/tests/integ_tests/workflow_tests.rs +71 -3
package/sdk-core/tests/load_tests.rs +80 -6
package/src/errors.rs +9 -2
package/src/lib.rs +39 -16
package/releases/aarch64-unknown-linux-gnu/index.node +0 -0

package/sdk-core/core/src/core_tests/workflow_tasks.rs CHANGED Viewed

@@ -71,7 +71,7 @@ fn single_activity_failure_setup(hist_batches: &'static [usize]) -> Worker {
 #[case::incremental(single_timer_setup(&[1, 2]), NonSticky)]
 #[case::replay(single_timer_setup(&[2]), NonSticky)]
 #[case::incremental_evict(single_timer_setup(&[1, 2]), AfterEveryReply)]
-#[case::replay_evict(single_timer_setup(&[2, 2]), AfterEveryReply)]
+#[case::replay_evict(single_timer_setup(&[2]), AfterEveryReply)]
 #[tokio::test]
 async fn single_timer(#[case] worker: Worker, #[case] evict: WorkflowCachingPolicy) {
     poll_and_reply(
@@ -1114,14 +1114,9 @@ async fn complete_after_eviction() {
     let eviction_activation = core.poll_workflow_activation().await.unwrap();
     assert_matches!(
         eviction_activation.jobs.as_slice(),
-        [
-            WorkflowActivationJob {
-                variant: Some(workflow_activation_job::Variant::FireTimer(_)),
-            },
-            WorkflowActivationJob {
-                variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
-            }
-        ]
+        [WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::FireTimer(_)),
+        },]
     );
     // Complete the activation containing the eviction, the way we normally would have
     core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
@@ -1130,6 +1125,13 @@ async fn complete_after_eviction() {
     ))
     .await
     .unwrap();
+    let eviction = core.poll_workflow_activation().await.unwrap();
+    assert_matches!(
+        eviction.jobs.as_slice(),
+        [WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
+        }]
+    );
     core.shutdown().await;
 }
@@ -1260,38 +1262,6 @@ async fn buffered_work_drained_on_shutdown() {
     });
 }
-#[tokio::test]
-async fn buffering_tasks_doesnt_count_toward_outstanding_max() {
-    let wfid = "fake_wf_id";
-    let t = canned_histories::single_timer("1");
-    let mock = mock_workflow_client();
-    let mut tasks = VecDeque::new();
-    // A way bigger task list than allowed outstanding tasks
-    tasks.extend(
-        std::iter::repeat(hist_to_poll_resp(
-            &t,
-            wfid.to_owned(),
-            2.into(),
-            TEST_Q.to_string(),
-        ))
-        .take(20),
-    );
-    let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
-    mock.worker_cfg(|wc| {
-        wc.max_cached_workflows = 10;
-        wc.max_outstanding_workflow_tasks = 5;
-    });
-    let core = mock_worker(mock);
-    // Poll for first WFT
-    core.poll_workflow_activation().await.unwrap();
-    // This will error out when the mock runs out of responses. Otherwise it would hang when we
-    // hit the max
-    assert_matches!(
-        core.poll_workflow_activation().await.unwrap_err(),
-        PollWfError::TonicError(_)
-    );
-}
 #[tokio::test]
 async fn fail_wft_then_recover() {
     let t = canned_histories::long_sequential_timers(1);
@@ -1737,3 +1707,189 @@ async fn evict_missing_wf_during_poll_doesnt_eat_permit() {
     core.shutdown().await;
 }
+#[tokio::test]
+async fn poll_faster_than_complete_wont_overflow_cache() {
+    // Make workflow tasks for 5 different runs
+    let tasks: Vec<_> = (1..=5)
+        .map(|i| {
+            hist_to_poll_resp(
+                // New hist each time for new run ids
+                &canned_histories::single_timer("1"),
+                format!("wf-{}", i),
+                ResponseType::ToTaskNum(1),
+                TEST_Q.to_string(),
+            )
+        })
+        .collect();
+    let mut mock = mock_workflow_client();
+    mock.expect_complete_workflow_task()
+        .times(3)
+        .returning(|_| Ok(Default::default()));
+    let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
+    mock.worker_cfg(|wc| {
+        wc.max_cached_workflows = 3;
+        wc.max_outstanding_workflow_tasks = 3;
+    });
+    let core = mock_worker(mock);
+    // Poll 4 times, completing once, such that max tasks are never exceeded
+    let p1 = core.poll_workflow_activation().await.unwrap();
+    let p2 = core.poll_workflow_activation().await.unwrap();
+    let p3 = core.poll_workflow_activation().await.unwrap();
+    for (i, p_res) in [&p1, &p2, &p3].into_iter().enumerate() {
+        assert_matches!(
+            &p_res.jobs[0].variant,
+            Some(workflow_activation_job::Variant::StartWorkflow(sw))
+            if sw.workflow_id == format!("wf-{}", i + 1)
+        );
+    }
+    // Complete first task to free a wft slot. Cache size is at 3
+    core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+        p1.run_id,
+        start_timer_cmd(1, Duration::from_secs(1)),
+    ))
+    .await
+    .unwrap();
+    // Now we're at cache limit. We will poll for a task, discover it is for a new run, issue
+    // an eviction, and buffer the new run task. However, the run we're trying to evict has pending
+    // activations! Thus, we must complete them first before this poll will unblock, and then it
+    // will unblock with the eviciton.
+    let p4 = async {
+        let p4 = core.poll_workflow_activation().await.unwrap();
+        assert_matches!(
+            &p4.jobs.as_slice(),
+            [WorkflowActivationJob {
+                variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
+            }]
+        );
+        p4
+    };
+    let p2_pending_completer = async {
+        // Sleep needed because otherwise the complete unblocks waiting for the cache to free a slot
+        // before we have a chance to actually... wait for it.
+        tokio::time::sleep(Duration::from_millis(100)).await;
+        core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+            p2.run_id,
+            start_timer_cmd(1, Duration::from_secs(1)),
+        ))
+        .await
+        .unwrap();
+    };
+    let (p4, _) = tokio::join!(p4, p2_pending_completer);
+    assert_eq!(core.cached_workflows(), 3);
+    // This poll should also block until the eviction is actually completed
+    let blocking_poll = async {
+        let res = core.poll_workflow_activation().await.unwrap();
+        assert_matches!(
+            &res.jobs[0].variant,
+            Some(workflow_activation_job::Variant::StartWorkflow(sw))
+            if sw.workflow_id == format!("wf-{}", 4)
+        );
+        res
+    };
+    let complete_evict = async {
+        core.complete_workflow_activation(WorkflowActivationCompletion::empty(p4.run_id))
+            .await
+            .unwrap();
+    };
+    let (_p5, _) = tokio::join!(blocking_poll, complete_evict);
+    assert_eq!(core.cached_workflows(), 3);
+    // The next poll will get an buffer a task for a new run, and generate an eviction for p3 but
+    // that eviction cannot be obtained until we complete the existing outstanding task.
+    let p6 = async {
+        let p6 = core.poll_workflow_activation().await.unwrap();
+        assert_matches!(
+            p6.jobs.as_slice(),
+            [WorkflowActivationJob {
+                variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
+            }]
+        );
+        p6
+    };
+    let completer = async {
+        core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+            p3.run_id,
+            start_timer_cmd(1, Duration::from_secs(1)),
+        ))
+        .await
+        .unwrap();
+    };
+    let (p6, _) = tokio::join!(p6, completer);
+    let complete_evict = async {
+        core.complete_workflow_activation(WorkflowActivationCompletion::empty(p6.run_id))
+            .await
+            .unwrap();
+    };
+    let blocking_poll = async {
+        // This poll will also block until the last eviction goes through, and when it does it'll
+        // produce the final start workflow task
+        let res = core.poll_workflow_activation().await.unwrap();
+        assert_matches!(
+            &res.jobs[0].variant,
+            Some(workflow_activation_job::Variant::StartWorkflow(sw))
+            if sw.workflow_id == "wf-5"
+        );
+    };
+    tokio::join!(blocking_poll, complete_evict);
+    // p5 outstanding and final poll outstanding -- hence one permit available
+    assert_eq!(core.available_wft_permits(), 1);
+    assert_eq!(core.cached_workflows(), 3);
+}
+#[tokio::test]
+async fn eviction_waits_until_replay_finished() {
+    let wfid = "fake_wf_id";
+    let t = canned_histories::long_sequential_timers(3);
+    let mock = mock_workflow_client();
+    let mock = single_hist_mock_sg(wfid, t, &[3], mock, true);
+    let core = mock_worker(mock);
+    let activation = core.poll_workflow_activation().await.unwrap();
+    // Immediately request eviction after getting start workflow
+    core.request_workflow_eviction(&activation.run_id);
+    core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+        activation.run_id,
+        start_timer_cmd(1, Duration::from_secs(1)),
+    ))
+    .await
+    .unwrap();
+    let t1_fired = core.poll_workflow_activation().await.unwrap();
+    assert_matches!(
+        t1_fired.jobs.as_slice(),
+        [WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::FireTimer(_)),
+        }]
+    );
+    core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
+        t1_fired.run_id,
+        start_timer_cmd(2, Duration::from_secs(1)),
+    ))
+    .await
+    .unwrap();
+    let t2_fired = core.poll_workflow_activation().await.unwrap();
+    assert_matches!(
+        t2_fired.jobs.as_slice(),
+        [WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::FireTimer(_)),
+        }]
+    );
+    core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
+        t2_fired.run_id,
+        vec![CompleteWorkflowExecution { result: None }.into()],
+    ))
+    .await
+    .unwrap();
+    // The first two WFTs were replay, and now that we've caught up, the eviction will be sent
+    let eviction = core.poll_workflow_activation().await.unwrap();
+    assert_matches!(
+        eviction.jobs.as_slice(),
+        [WorkflowActivationJob {
+            variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
+        }]
+    );
+    core.shutdown().await;
+}

package/sdk-core/core/src/pending_activations.rs CHANGED Viewed

@@ -22,6 +22,7 @@ struct PaInner {
     queue: VecDeque<ActivationKey>,
 }
+#[derive(Debug)]
 pub struct PendingActInfo {
     pub needs_eviction: Option<RemoveFromCache>,
     pub run_id: String,
@@ -41,6 +42,7 @@ impl PendingActivations {
             inner.queue.push_back(key);
         };
     }
     pub fn notify_needs_eviction(&self, run_id: &str, message: String, reason: EvictionReason) {
         let mut inner = self.inner.write();
@@ -106,6 +108,15 @@ impl PendingActivations {
             inner.activations.remove(k);
         }
     }
+    /// Returns true if any pending activation contains an eviction
+    pub fn is_some_eviction(&self) -> bool {
+        self.inner
+            .read()
+            .activations
+            .values()
+            .any(|act| act.needs_eviction.is_some())
+    }
 }
 #[cfg(test)]

package/sdk-core/core/src/telemetry/mod.rs CHANGED Viewed

@@ -271,7 +271,7 @@ pub(crate) fn test_telem_console() {
 pub(crate) fn test_telem_collector() {
     telemetry_init(&TelemetryOptions {
         otel_collector_url: Some("grpc://localhost:4317".parse().unwrap()),
-        tracing_filter: "temporal_sdk_core=DEBUG".to_string(),
+        tracing_filter: "temporal_sdk_core=DEBUG,temporal_sdk=DEBUG".to_string(),
         log_forwarding_level: LevelFilter::Off,
         prometheus_export_bind_address: None,
         totally_disable: false,

package/sdk-core/core/src/test_help/mod.rs CHANGED Viewed

@@ -16,6 +16,7 @@ use std::{
     collections::{BTreeMap, HashMap, HashSet, VecDeque},
     ops::RangeFull,
     sync::Arc,
+    time::Duration,
 };
 use temporal_sdk_core_api::Worker as WorkerTrait;
 use temporal_sdk_core_protos::{
@@ -34,6 +35,8 @@ use temporal_sdk_core_protos::{
         },
     },
 };
+use temporal_sdk_core_test_utils::TestWorker;
+use tokio::sync::Notify;
 pub const TEST_Q: &str = "q";
 pub static NO_MORE_WORK_ERROR_MSG: &str = "No more work to do";
@@ -103,13 +106,26 @@ pub(crate) fn mock_worker(mocks: MocksHolder) -> Worker {
     )
 }
+pub(crate) fn mock_sdk(poll_cfg: MockPollCfg) -> TestWorker {
+    mock_sdk_cfg(poll_cfg, |_| {})
+}
+pub(crate) fn mock_sdk_cfg(
+    mut poll_cfg: MockPollCfg,
+    mutator: impl FnOnce(&mut WorkerConfig),
+) -> TestWorker {
+    poll_cfg.using_rust_sdk = true;
+    let mut mock = build_mock_pollers(poll_cfg);
+    mock.worker_cfg(mutator);
+    let core = mock_worker(mock);
+    TestWorker::new(Arc::new(core), TEST_Q.to_string())
+}
 pub struct FakeWfResponses {
     pub wf_id: String,
     pub hist: TestHistoryBuilder,
     pub response_batches: Vec<ResponseType>,
 }
-// TODO: Rename to mock TQ or something?
 pub struct MocksHolder {
     client_bag: WorkerClientBag,
     mock_worker: MockWorker,
@@ -264,6 +280,10 @@ pub(crate) struct MockPollCfg {
     /// All calls to fail WFTs must match this predicate
     pub expect_fail_wft_matcher:
         Box<dyn Fn(&TaskToken, &WorkflowTaskFailedCause, &Option<Failure>) -> bool + Send>,
+    /// If being used with the Rust SDK, this is set true. It ensures pollers will not error out
+    /// early with no work, since we cannot know the exact number of times polling will happen.
+    /// Instead, they will just block forever.
+    pub using_rust_sdk: bool,
 }
 impl MockPollCfg {
@@ -278,6 +298,7 @@ impl MockPollCfg {
             num_expected_fails,
             mock_client: mock_workflow_client(),
             expect_fail_wft_matcher: Box::new(|_, _, _| true),
+            using_rust_sdk: false,
         }
     }
     pub fn from_resp_batches(
@@ -296,6 +317,7 @@ impl MockPollCfg {
             num_expected_fails: None,
             mock_client,
             expect_fail_wft_matcher: Box::new(|_, _, _| true),
+            using_rust_sdk: false,
         }
     }
 }
@@ -320,7 +342,7 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
             }
         }
-        if cfg.enforce_correct_number_of_polls {
+        if cfg.enforce_correct_number_of_polls && !cfg.using_rust_sdk {
             *correct_num_polls.get_or_insert(0) += hist.response_batches.len();
         }
@@ -346,14 +368,17 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
         task_q_resps.insert(hist.wf_id, tasks);
     }
-    let mut mock_poller = mock_poller();
+    let mut mock_poller = mock_manual_poller();
     // The poller will return history from any workflow runs that do not have currently
     // outstanding tasks.
     let outstanding = outstanding_wf_task_tokens.clone();
+    let outstanding_wakeup_orig = Arc::new(Notify::new());
+    let outstanding_wakeup = outstanding_wakeup_orig.clone();
     mock_poller
         .expect_poll()
         .times(correct_num_polls.map_or_else(|| RangeFull.into(), Into::<TimesRange>::into))
         .returning(move || {
+            let mut resp = None;
             for (_, tasks) in task_q_resps.iter_mut() {
                 // Must extract run id from a workflow task associated with this workflow
                 // TODO: Case where run id changes for same workflow id is not handled here
@@ -364,19 +389,40 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
                         outstanding
                             .write()
                             .insert(rid, TaskToken(t.task_token.clone()));
-                        return Some(Ok(t));
+                        resp = Some(Ok(t));
+                        break;
                     }
                 }
             }
-            Some(Err(tonic::Status::cancelled(NO_MORE_WORK_ERROR_MSG)))
+            let outstanding_wakeup = outstanding_wakeup.clone();
+            async move {
+                if resp.is_some() {
+                    return resp;
+                }
+                if cfg.using_rust_sdk {
+                    // Simulate poll timeout, or just send an empty response and then try again
+                    // if we're told a new one might be ready.
+                    tokio::select! {
+                        _ = outstanding_wakeup.notified() => {}
+                        _ = tokio::time::sleep(Duration::from_secs(60)) => {}
+                    };
+                    Some(Ok(Default::default()))
+                } else {
+                    Some(Err(tonic::Status::cancelled(NO_MORE_WORK_ERROR_MSG)))
+                }
+            }
+            .boxed()
         });
     let mock_worker = MockWorker::new(Box::from(mock_poller));
     let outstanding = outstanding_wf_task_tokens.clone();
+    let outstanding_wakeup = outstanding_wakeup_orig.clone();
     cfg.mock_client
         .expect_complete_workflow_task()
         .returning(move |comp| {
             outstanding.write().remove_by_right(&comp.task_token);
+            outstanding_wakeup.notify_one();
             Ok(RespondWorkflowTaskCompletedResponse::default())
         });
     let outstanding = outstanding_wf_task_tokens.clone();
@@ -389,6 +435,7 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
         )
         .returning(move |tt, _, _| {
             outstanding.write().remove_by_right(&tt);
+            outstanding_wakeup_orig.notify_one();
             Ok(Default::default())
         });
@@ -498,10 +545,13 @@ pub(crate) async fn poll_and_reply_clears_outstanding_evicts<'a>(
                 }
             };
+            let ends_execution = reply.has_execution_ending();
             worker.complete_workflow_activation(reply).await.unwrap();
-            // Restart assertions from the beginning if it was an eviction
-            if contains_eviction.is_some() {
+            // Restart assertions from the beginning if it was an eviction (and workflow execution
+            // isn't over)
+            if contains_eviction.is_some() && !ends_execution {
                 continue 'outer;
             }

package/sdk-core/core/src/worker/mod.rs CHANGED Viewed

@@ -40,7 +40,7 @@ use crate::{
 };
 use activities::{LocalInFlightActInfo, WorkerActivityTasks};
 use futures::{Future, TryFutureExt};
-use std::{convert::TryInto, sync::Arc};
+use std::{convert::TryInto, future, sync::Arc};
 use temporal_client::WorkflowTaskCompletion;
 use temporal_sdk_core_protos::{
     coresdk::{
@@ -102,7 +102,6 @@ pub struct Worker {
 #[async_trait::async_trait]
 impl WorkerTrait for Worker {
-    #[instrument(level = "debug", skip(self), fields(run_id))]
     async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
         self.next_workflow_activation().await
     }
@@ -120,8 +119,6 @@ impl WorkerTrait for Worker {
         }
     }
-    #[instrument(level = "debug", skip(self, completion),
-    fields(completion=%&completion, run_id=%completion.run_id))]
     async fn complete_workflow_activation(
         &self,
         completion: WorkflowActivationCompletion,
@@ -165,7 +162,6 @@ impl WorkerTrait for Worker {
     }
     /// Begins the shutdown process, tells pollers they should stop. Is idempotent.
-    // TODO: will be in trait after Roey's shutdown refactor
     fn initiate_shutdown(&self) {
         self.shutdown_token.cancel();
         // First, we want to stop polling of both activity and workflow tasks
@@ -173,6 +169,7 @@ impl WorkerTrait for Worker {
             atm.notify_shutdown();
         }
         self.wf_task_source.stop_pollers();
+        info!("Initiated shutdown");
     }
     async fn shutdown(&self) {
@@ -258,6 +255,11 @@ impl Worker {
         Self::new(config, None, Arc::new(client.into()), Default::default())
     }
+    /// Returns number of currently cached workflows
+    pub fn cached_workflows(&self) -> usize {
+        self.wft_manager.cached_workflows()
+    }
     pub(crate) fn new_with_pollers(
         config: WorkerConfig,
         sticky_queue_name: Option<String>,
@@ -313,7 +315,6 @@ impl Worker {
     /// completed
     pub(crate) async fn shutdown(&self) {
         self.initiate_shutdown();
-        info!("Initiated shutdown");
         // Next we need to wait for all local activities to finish so no more workflow task
         // heartbeats will be generated
         self.local_act_mgr.shutdown_and_wait_all_finished().await;
@@ -436,6 +437,8 @@ impl Worker {
             Ok(())
         }
     }
+    #[instrument(level = "debug", skip(self), fields(run_id))]
     pub(crate) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
         // The poll needs to be in a loop because we can't guarantee tail call optimization in Rust
         // (simply) and we really, really need that for long-poll retries.
@@ -448,6 +451,21 @@ impl Worker {
                 return Ok(pa);
             }
+            if self.config.max_cached_workflows > 0 {
+                if let Some(cache_cap_fut) = self.wft_manager.wait_for_cache_capacity() {
+                    tokio::select! {
+                        biased;
+                        // We must loop up if there's a new pending activation, since those are for
+                        // already-cached workflows and may include evictions which will change if
+                        // we are still waiting or not.
+                        _ = self.pending_activations_notify.notified() => {
+                            continue
+                        },
+                        _ = cache_cap_fut => {}
+                    };
+                }
+            }
             // Apply any buffered poll responses from the server. Must come after pending
             // activations, since there may be an eviction etc for whatever run is popped here.
             if let Some(buff_wft) = self.wft_manager.next_buffered_poll() {
@@ -481,6 +499,8 @@ impl Worker {
         }
     }
+    #[instrument(level = "debug", skip(self, completion),
+    fields(completion=%&completion, run_id=%completion.run_id))]
     pub(crate) async fn complete_workflow_activation(
         &self,
         completion: WorkflowActivationCompletion,
@@ -539,9 +559,9 @@ impl Worker {
         reason: EvictionReason,
     ) -> bool {
         match self.wft_manager.request_eviction(run_id, message, reason) {
-            EvictionRequestResult::EvictionIssued(_) => true,
+            EvictionRequestResult::EvictionRequested(_) => true,
             EvictionRequestResult::NotFound => false,
-            EvictionRequestResult::EvictionAlreadyOutstanding => false,
+            EvictionRequestResult::EvictionAlreadyRequested(_) => false,
         }
     }
@@ -572,7 +592,17 @@ impl Worker {
     async fn workflow_poll_or_wfts_drained(
         &self,
     ) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
+        let mut shutdown_seen = false;
         loop {
+            // If we've already seen shutdown once it's important we don't freak out and
+            // restart the loop constantly while waiting for poll to finish shutting down.
+            let shutdown_restarter = async {
+                if shutdown_seen {
+                    future::pending::<()>().await;
+                } else {
+                    self.shutdown_token.cancelled().await;
+                };
+            };
             tokio::select! {
                 biased;
@@ -585,7 +615,9 @@ impl Worker {
                     }
                     return r
                 },
-                _ = self.shutdown_token.cancelled() => {},
+                _ = shutdown_restarter => {
+                    shutdown_seen = true;
+                },
             }
         }
     }
@@ -593,8 +625,8 @@ impl Worker {
     /// Wait until not at the outstanding workflow task limit, and then poll this worker's task
     /// queue for new workflow tasks.
     ///
-    /// Returns `Ok(None)` in the event of a poll timeout, or if there was some gRPC error that
-    /// callers can't do anything about.
+    /// Returns `Ok(None)` in the event of a poll timeout, if there was some gRPC error that
+    /// callers can't do anything about, or any other reason to restart the poll loop.
     async fn workflow_poll(&self) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
         // We can't say we're shut down if there are outstanding LAs, as they could end up WFT
         // heartbeating which is a "new" workflow task that we need to accept and process as long as
@@ -642,6 +674,20 @@ impl Worker {
         // Only permanently take a permit in the event the poll finished completely
         sem.forget();
+        let work = if self.config.max_cached_workflows > 0 {
+            // Add the workflow to cache management. We do not even attempt insert if cache
+            // size is zero because we do not want to generate eviction requests for
+            // workflows which may immediately generate pending activations.
+            if let Some(ready_to_work) = self.wft_manager.add_new_run_to_cache(work).await {
+                ready_to_work
+            } else {
+                return Ok(None);
+            }
+        } else {
+            work
+        };
         Ok(Some(work))
     }
@@ -664,9 +710,9 @@ impl Worker {
                 Some(a)
             }
             NewWfTaskOutcome::TaskBuffered => {
-                // If the task was buffered, it's not actually outstanding, so we can
-                // immediately return a permit.
-                self.return_workflow_task_permit();
+                // Though the task is not outstanding in the lang sense, it is outstanding from the
+                // server perspective. We used to return a permit here, but that doesn't actually
+                // make much sense.
                 None
             }
             NewWfTaskOutcome::Autocomplete | NewWfTaskOutcome::LocalActsOutstanding => {
@@ -743,7 +789,7 @@ impl Worker {
                     commands,
                     query_responses,
                     sticky_attributes: None,
-                    return_new_workflow_task: force_new_wft,
+                    return_new_workflow_task: true,
                     force_create_new_workflow_task: force_new_wft,
                 };
                 let sticky_attrs = self.get_sticky_attrs();
@@ -950,6 +996,7 @@ impl Worker {
     }
 }
+#[derive(Debug, Copy, Clone)]
 struct WFTReportOutcome {
     reported_to_server: bool,
     failed: bool,
@@ -986,6 +1033,7 @@ mod tests {
         let cfg = test_worker_cfg()
             .max_outstanding_workflow_tasks(5_usize)
+            .max_cached_workflows(5_usize)
             .build()
             .unwrap();
         let worker = Worker::new_test(cfg, mock_client);
@@ -1018,6 +1066,7 @@ mod tests {
         let cfg = test_worker_cfg()
             .max_outstanding_workflow_tasks(5_usize)
+            .max_cached_workflows(5_usize)
             .build()
             .unwrap();
         let worker = Worker::new_test(cfg, mock_client);