@temporalio/core-bridge 0.14.0 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/Cargo.lock +162 -38
  2. package/Cargo.toml +3 -3
  3. package/index.d.ts +14 -1
  4. package/index.node +0 -0
  5. package/package.json +8 -5
  6. package/releases/aarch64-apple-darwin/index.node +0 -0
  7. package/releases/{x86_64-pc-windows-gnu → aarch64-unknown-linux-gnu}/index.node +0 -0
  8. package/releases/x86_64-apple-darwin/index.node +0 -0
  9. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  10. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  11. package/scripts/build.js +77 -34
  12. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  13. package/sdk-core/Cargo.toml +6 -5
  14. package/sdk-core/fsm/Cargo.toml +1 -1
  15. package/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +2 -2
  16. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +8 -9
  17. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +13 -7
  18. package/sdk-core/fsm/rustfsm_trait/Cargo.toml +2 -2
  19. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +1 -1
  20. package/sdk-core/protos/local/workflow_activation.proto +6 -3
  21. package/sdk-core/sdk-core-protos/Cargo.toml +4 -4
  22. package/sdk-core/sdk-core-protos/src/lib.rs +38 -50
  23. package/sdk-core/src/core_tests/activity_tasks.rs +5 -5
  24. package/sdk-core/src/core_tests/child_workflows.rs +55 -29
  25. package/sdk-core/src/core_tests/determinism.rs +19 -9
  26. package/sdk-core/src/core_tests/mod.rs +3 -3
  27. package/sdk-core/src/core_tests/retry.rs +14 -8
  28. package/sdk-core/src/core_tests/workers.rs +1 -1
  29. package/sdk-core/src/core_tests/workflow_tasks.rs +347 -4
  30. package/sdk-core/src/errors.rs +27 -44
  31. package/sdk-core/src/lib.rs +13 -3
  32. package/sdk-core/src/machines/activity_state_machine.rs +44 -5
  33. package/sdk-core/src/machines/child_workflow_state_machine.rs +31 -11
  34. package/sdk-core/src/machines/complete_workflow_state_machine.rs +1 -1
  35. package/sdk-core/src/machines/continue_as_new_workflow_state_machine.rs +1 -1
  36. package/sdk-core/src/machines/mod.rs +18 -23
  37. package/sdk-core/src/machines/patch_state_machine.rs +8 -8
  38. package/sdk-core/src/machines/signal_external_state_machine.rs +22 -1
  39. package/sdk-core/src/machines/timer_state_machine.rs +21 -3
  40. package/sdk-core/src/machines/transition_coverage.rs +3 -3
  41. package/sdk-core/src/machines/workflow_machines.rs +11 -11
  42. package/sdk-core/src/pending_activations.rs +27 -22
  43. package/sdk-core/src/pollers/gateway.rs +15 -7
  44. package/sdk-core/src/pollers/poll_buffer.rs +6 -5
  45. package/sdk-core/src/pollers/retry.rs +153 -120
  46. package/sdk-core/src/prototype_rust_sdk/workflow_context.rs +61 -46
  47. package/sdk-core/src/prototype_rust_sdk/workflow_future.rs +13 -12
  48. package/sdk-core/src/prototype_rust_sdk.rs +17 -23
  49. package/sdk-core/src/telemetry/metrics.rs +2 -4
  50. package/sdk-core/src/telemetry/mod.rs +6 -7
  51. package/sdk-core/src/test_help/canned_histories.rs +17 -93
  52. package/sdk-core/src/test_help/history_builder.rs +61 -2
  53. package/sdk-core/src/test_help/history_info.rs +21 -2
  54. package/sdk-core/src/test_help/mod.rs +26 -34
  55. package/sdk-core/src/worker/activities/activity_heartbeat_manager.rs +246 -138
  56. package/sdk-core/src/worker/activities.rs +46 -45
  57. package/sdk-core/src/worker/config.rs +11 -0
  58. package/sdk-core/src/worker/dispatcher.rs +5 -5
  59. package/sdk-core/src/worker/mod.rs +86 -56
  60. package/sdk-core/src/workflow/driven_workflow.rs +3 -3
  61. package/sdk-core/src/workflow/history_update.rs +1 -1
  62. package/sdk-core/src/workflow/mod.rs +2 -1
  63. package/sdk-core/src/workflow/workflow_tasks/cache_manager.rs +13 -17
  64. package/sdk-core/src/workflow/workflow_tasks/concurrency_manager.rs +10 -18
  65. package/sdk-core/src/workflow/workflow_tasks/mod.rs +72 -57
  66. package/sdk-core/test_utils/Cargo.toml +1 -1
  67. package/sdk-core/test_utils/src/lib.rs +2 -2
  68. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +61 -1
  69. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +2 -2
  70. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +49 -0
  71. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +2 -2
  72. package/sdk-core/tests/integ_tests/workflow_tests.rs +1 -0
  73. package/src/conversions.rs +17 -0
  74. package/src/errors.rs +0 -7
  75. package/src/lib.rs +0 -20
@@ -45,14 +45,16 @@ async fn long_poll_non_retryable_errors() {
45
45
  let mut mock_gateway = MockServerGatewayApis::new();
46
46
  mock_gateway
47
47
  .expect_poll_workflow_task()
48
- .returning(move |_| Err(Status::new(code, "non-retryable failure")))
48
+ .returning(move |_, _| Err(Status::new(code, "non-retryable failure")))
49
49
  .times(1);
50
50
  mock_gateway
51
51
  .expect_poll_activity_task()
52
52
  .returning(move |_| Err(Status::new(code, "non-retryable failure")))
53
53
  .times(1);
54
54
  let retry_gateway = RetryGateway::new(mock_gateway, Default::default());
55
- let result = retry_gateway.poll_workflow_task("tq".to_string()).await;
55
+ let result = retry_gateway
56
+ .poll_workflow_task("tq".to_string(), false)
57
+ .await;
56
58
  assert!(result.is_err());
57
59
  let result = retry_gateway.poll_activity_task("tq".to_string()).await;
58
60
  assert!(result.is_err());
@@ -86,11 +88,11 @@ async fn long_poll_retries_forever() {
86
88
  let mut mock_gateway = MockServerGatewayApis::new();
87
89
  mock_gateway
88
90
  .expect_poll_workflow_task()
89
- .returning(move |_| Err(Status::new(Code::Unknown, "retryable failure")))
91
+ .returning(move |_, _| Err(Status::new(Code::Unknown, "retryable failure")))
90
92
  .times(50);
91
93
  mock_gateway
92
94
  .expect_poll_workflow_task()
93
- .returning(|_| Ok(Default::default()))
95
+ .returning(|_, _| Ok(Default::default()))
94
96
  .times(1);
95
97
  mock_gateway
96
98
  .expect_poll_activity_task()
@@ -103,7 +105,9 @@ async fn long_poll_retries_forever() {
103
105
 
104
106
  let retry_gateway = RetryGateway::new(mock_gateway, Default::default());
105
107
 
106
- let result = retry_gateway.poll_workflow_task("tq".to_string()).await;
108
+ let result = retry_gateway
109
+ .poll_workflow_task("tq".to_string(), false)
110
+ .await;
107
111
  assert!(result.is_ok());
108
112
  let result = retry_gateway.poll_activity_task("tq".to_string()).await;
109
113
  assert!(result.is_ok());
@@ -116,11 +120,11 @@ async fn long_poll_retries_deadline_exceeded() {
116
120
  let mut mock_gateway = MockServerGatewayApis::new();
117
121
  mock_gateway
118
122
  .expect_poll_workflow_task()
119
- .returning(move |_| Err(Status::new(code, "retryable failure")))
123
+ .returning(move |_, _| Err(Status::new(code, "retryable failure")))
120
124
  .times(5);
121
125
  mock_gateway
122
126
  .expect_poll_workflow_task()
123
- .returning(|_| Ok(Default::default()))
127
+ .returning(|_, _| Ok(Default::default()))
124
128
  .times(1);
125
129
  mock_gateway
126
130
  .expect_poll_activity_task()
@@ -133,7 +137,9 @@ async fn long_poll_retries_deadline_exceeded() {
133
137
 
134
138
  let retry_gateway = RetryGateway::new(mock_gateway, Default::default());
135
139
 
136
- let result = retry_gateway.poll_workflow_task("tq".to_string()).await;
140
+ let result = retry_gateway
141
+ .poll_workflow_task("tq".to_string(), false)
142
+ .await;
137
143
  assert!(result.is_ok());
138
144
  let result = retry_gateway.poll_activity_task("tq".to_string()).await;
139
145
  assert!(result.is_ok());
@@ -179,7 +179,7 @@ async fn after_shutdown_of_worker_get_shutdown_err() {
179
179
  let res = core.poll_workflow_activation(TEST_Q).await.unwrap();
180
180
  assert_matches!(
181
181
  res.jobs[0].variant,
182
- Some(wf_activation_job::Variant::RemoveFromCache(true))
182
+ Some(wf_activation_job::Variant::RemoveFromCache(_))
183
183
  );
184
184
  core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, run_id.clone()))
185
185
  .await
@@ -6,7 +6,7 @@ use crate::{
6
6
  build_fake_core, build_mock_pollers, build_multihist_mock_sg, canned_histories,
7
7
  gen_assert_and_fail, gen_assert_and_reply, hist_to_poll_resp, mock_core, poll_and_reply,
8
8
  poll_and_reply_clears_outstanding_evicts, single_hist_mock_sg, FakeWfResponses,
9
- MockPollCfg, MocksHolder, ResponseType, TestHistoryBuilder, TEST_Q,
9
+ MockPollCfg, MocksHolder, ResponseType, TestHistoryBuilder, NO_MORE_WORK_ERROR_MSG, TEST_Q,
10
10
  },
11
11
  workflow::WorkflowCachingPolicy::{self, AfterEveryReply, NonSticky},
12
12
  Core, CoreSDK, WfActivationCompletion,
@@ -30,7 +30,7 @@ use temporal_sdk_core_protos::{
30
30
  },
31
31
  },
32
32
  temporal::api::{
33
- enums::v1::EventType,
33
+ enums::v1::{EventType, WorkflowTaskFailedCause},
34
34
  failure::v1::Failure,
35
35
  history::v1::{history_event, TimerFiredEventAttributes},
36
36
  workflowservice::v1::RespondWorkflowTaskCompletedResponse,
@@ -642,9 +642,9 @@ async fn workflow_update_random_seed_on_workflow_reset() {
642
642
  UpdateRandomSeed{randomness_seed})),
643
643
  }] => {
644
644
  assert_ne!(randomness_seed_from_start.load(Ordering::SeqCst),
645
- *randomness_seed)
645
+ *randomness_seed);
646
646
  }
647
- )
647
+ );
648
648
  },
649
649
  vec![CompleteWorkflowExecution { result: None }.into()],
650
650
  ),
@@ -1391,3 +1391,346 @@ async fn buffering_tasks_doesnt_count_toward_outstanding_max() {
1391
1391
  PollWfError::TonicError(_)
1392
1392
  );
1393
1393
  }
1394
+
1395
+ #[tokio::test]
1396
+ async fn fail_wft_then_recover() {
1397
+ let t = canned_histories::long_sequential_timers(1);
1398
+ let mut mh = MockPollCfg::from_resp_batches(
1399
+ "fake_wf_id",
1400
+ t,
1401
+ // We need to deliver all of history twice because of eviction
1402
+ [ResponseType::AllHistory, ResponseType::AllHistory],
1403
+ MockServerGatewayApis::new(),
1404
+ );
1405
+ mh.num_expected_fails = Some(1);
1406
+ mh.expect_fail_wft_matcher =
1407
+ Box::new(|_, cause, _| matches!(cause, WorkflowTaskFailedCause::NonDeterministicError));
1408
+ let mut mock = build_mock_pollers(mh);
1409
+ mock.worker_cfg(TEST_Q, |wc| {
1410
+ wc.max_cached_workflows = 2;
1411
+ });
1412
+ let core = mock_core(mock);
1413
+
1414
+ let act = core.poll_workflow_activation(TEST_Q).await.unwrap();
1415
+ // Start an activity instead of a timer, triggering nondeterminism error
1416
+ core.complete_workflow_activation(WfActivationCompletion::from_cmds(
1417
+ TEST_Q,
1418
+ act.run_id.clone(),
1419
+ vec![ScheduleActivity {
1420
+ activity_id: "fake_activity".to_string(),
1421
+ ..Default::default()
1422
+ }
1423
+ .into()],
1424
+ ))
1425
+ .await
1426
+ .unwrap();
1427
+ // We must handle an eviction now
1428
+ let evict_act = core.poll_workflow_activation(TEST_Q).await.unwrap();
1429
+ assert_eq!(evict_act.run_id, act.run_id);
1430
+ assert_matches!(
1431
+ evict_act.jobs.as_slice(),
1432
+ [WfActivationJob {
1433
+ variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
1434
+ }]
1435
+ );
1436
+ core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, evict_act.run_id))
1437
+ .await
1438
+ .unwrap();
1439
+
1440
+ // Workflow starting over, this time issue the right command
1441
+ let act = core.poll_workflow_activation(TEST_Q).await.unwrap();
1442
+ core.complete_workflow_activation(WfActivationCompletion::from_cmds(
1443
+ TEST_Q,
1444
+ act.run_id,
1445
+ vec![StartTimer {
1446
+ seq: 1,
1447
+ ..Default::default()
1448
+ }
1449
+ .into()],
1450
+ ))
1451
+ .await
1452
+ .unwrap();
1453
+ let act = core.poll_workflow_activation(TEST_Q).await.unwrap();
1454
+ assert_matches!(
1455
+ act.jobs.as_slice(),
1456
+ [WfActivationJob {
1457
+ variant: Some(wf_activation_job::Variant::FireTimer(_)),
1458
+ },]
1459
+ );
1460
+ core.complete_workflow_activation(WfActivationCompletion::from_cmds(
1461
+ TEST_Q,
1462
+ act.run_id,
1463
+ vec![CompleteWorkflowExecution { result: None }.into()],
1464
+ ))
1465
+ .await
1466
+ .unwrap();
1467
+ core.shutdown().await;
1468
+ }
1469
+
1470
+ #[tokio::test]
1471
+ async fn poll_response_triggers_wf_error() {
1472
+ let mut t = TestHistoryBuilder::default();
1473
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1474
+ // Add this nonsense event here to make applying the poll response fail
1475
+ t.add_external_signal_completed(100);
1476
+ t.add_full_wf_task();
1477
+ t.add_workflow_execution_completed();
1478
+
1479
+ let mut mh = MockPollCfg::from_resp_batches(
1480
+ "fake_wf_id",
1481
+ t,
1482
+ [ResponseType::AllHistory],
1483
+ MockServerGatewayApis::new(),
1484
+ );
1485
+ // Since applying the poll response immediately generates an error core will start polling again
1486
+ // Rather than panic on bad expectation we want to return the magic "no more work" error
1487
+ mh.enforce_correct_number_of_polls = false;
1488
+ let mock = build_mock_pollers(mh);
1489
+ let core = mock_core(mock);
1490
+ // Poll for first WFT, which is immediately an eviction
1491
+ let act = core.poll_workflow_activation(TEST_Q).await;
1492
+ assert_matches!(act, Err(PollWfError::TonicError(err))
1493
+ if err.message() == NO_MORE_WORK_ERROR_MSG);
1494
+ }
1495
+
1496
+ // Verifies we can handle multiple wft timeouts in a row if lang is being very slow in responding
1497
+ #[tokio::test]
1498
+ async fn lang_slower_than_wft_timeouts() {
1499
+ let wfid = "fake_wf_id";
1500
+ let mut t = TestHistoryBuilder::default();
1501
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1502
+ t.add_workflow_task_scheduled_and_started();
1503
+ t.add_workflow_task_timed_out();
1504
+ t.add_full_wf_task();
1505
+ t.add_workflow_execution_completed();
1506
+
1507
+ let tasks = [
1508
+ hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string()),
1509
+ hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string()),
1510
+ hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string()),
1511
+ ];
1512
+ let mut mock = MockServerGatewayApis::new();
1513
+ mock.expect_complete_workflow_task()
1514
+ .times(1)
1515
+ .returning(|_| Err(tonic::Status::not_found("Workflow task not found.")));
1516
+ mock.expect_complete_workflow_task()
1517
+ .times(1)
1518
+ .returning(|_| Ok(Default::default()));
1519
+ let mut mock = MocksHolder::from_gateway_with_responses(mock, tasks, []);
1520
+ mock.worker_cfg(TEST_Q, |wc| {
1521
+ wc.max_cached_workflows = 2;
1522
+ });
1523
+ let core = mock_core(mock);
1524
+
1525
+ let wf_task = core.poll_workflow_activation(TEST_Q).await.unwrap();
1526
+ let poll_until_no_work = core.poll_workflow_activation(TEST_Q).await;
1527
+ assert_matches!(poll_until_no_work, Err(PollWfError::TonicError(err))
1528
+ if err.message() == NO_MORE_WORK_ERROR_MSG);
1529
+ // This completion runs into a workflow task not found error, since it's completing a stale
1530
+ // task.
1531
+ core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, wf_task.run_id))
1532
+ .await
1533
+ .unwrap();
1534
+ // Now we should get an eviction
1535
+ let wf_task = core.poll_workflow_activation(TEST_Q).await.unwrap();
1536
+ assert_matches!(
1537
+ wf_task.jobs.as_slice(),
1538
+ [WfActivationJob {
1539
+ variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
1540
+ }]
1541
+ );
1542
+ core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, wf_task.run_id))
1543
+ .await
1544
+ .unwrap();
1545
+ // The last WFT buffered should be applied now
1546
+ let start_again = core.poll_workflow_activation(TEST_Q).await.unwrap();
1547
+ assert_matches!(
1548
+ start_again.jobs[0].variant,
1549
+ Some(wf_activation_job::Variant::StartWorkflow(_))
1550
+ );
1551
+ core.complete_workflow_activation(WfActivationCompletion::from_cmds(
1552
+ TEST_Q,
1553
+ start_again.run_id,
1554
+ vec![CompleteWorkflowExecution { result: None }.into()],
1555
+ ))
1556
+ .await
1557
+ .unwrap();
1558
+ core.shutdown().await;
1559
+ }
1560
+
1561
+ #[tokio::test]
1562
+ async fn tries_cancel_of_completed_activity() {
1563
+ let mut t = TestHistoryBuilder::default();
1564
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1565
+ t.add_full_wf_task();
1566
+ let scheduled_event_id = t.add_activity_task_scheduled("1");
1567
+ t.add_we_signaled("sig", vec![]);
1568
+ let started_event_id = t.add_activity_task_started(scheduled_event_id);
1569
+ t.add_activity_task_completed(scheduled_event_id, started_event_id, Default::default());
1570
+ t.add_workflow_task_scheduled_and_started();
1571
+
1572
+ let mock = MockServerGatewayApis::new();
1573
+ let mut mock = single_hist_mock_sg("fake_wf_id", t, &[1, 2], mock, true);
1574
+ mock.worker_cfg(TEST_Q, |cfg| cfg.max_cached_workflows = 1);
1575
+ let core = mock_core(mock);
1576
+
1577
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1578
+ core.complete_workflow_activation(WfActivationCompletion::from_cmd(
1579
+ TEST_Q,
1580
+ activation.run_id,
1581
+ ScheduleActivity {
1582
+ seq: 1,
1583
+ activity_id: "1".to_string(),
1584
+ ..Default::default()
1585
+ }
1586
+ .into(),
1587
+ ))
1588
+ .await
1589
+ .unwrap();
1590
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1591
+ assert_matches!(
1592
+ activation.jobs.as_slice(),
1593
+ [
1594
+ WfActivationJob {
1595
+ variant: Some(wf_activation_job::Variant::SignalWorkflow(_)),
1596
+ },
1597
+ WfActivationJob {
1598
+ variant: Some(wf_activation_job::Variant::ResolveActivity(_)),
1599
+ }
1600
+ ]
1601
+ );
1602
+ core.complete_workflow_activation(WfActivationCompletion::from_cmds(
1603
+ TEST_Q,
1604
+ activation.run_id,
1605
+ vec![
1606
+ RequestCancelActivity { seq: 1 }.into(),
1607
+ CompleteWorkflowExecution { result: None }.into(),
1608
+ ],
1609
+ ))
1610
+ .await
1611
+ .unwrap();
1612
+
1613
+ core.shutdown().await;
1614
+ }
1615
+
1616
+ #[tokio::test]
1617
+ async fn failing_wft_doesnt_eat_permit_forever() {
1618
+ let mut t = TestHistoryBuilder::default();
1619
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1620
+ t.add_workflow_task_scheduled_and_started();
1621
+
1622
+ let failures = 5;
1623
+ // One extra response for when we stop failing
1624
+ let resps = (1..=(failures + 1)).map(|_| 1);
1625
+ let mock = MockServerGatewayApis::new();
1626
+ let mut mock = single_hist_mock_sg("fake_wf_id", t, resps, mock, true);
1627
+ mock.worker_cfg(TEST_Q, |cfg| {
1628
+ cfg.max_cached_workflows = 2;
1629
+ cfg.max_outstanding_workflow_tasks = 2;
1630
+ });
1631
+ let core = mock_core(mock);
1632
+
1633
+ // Spin failing the WFT to verify that we don't get stuck
1634
+ for _ in 1..=failures {
1635
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1636
+ // Issue a nonsense completion that will trigger a WFT failure
1637
+ core.complete_workflow_activation(WfActivationCompletion::from_cmd(
1638
+ TEST_Q,
1639
+ activation.run_id,
1640
+ RequestCancelActivity { seq: 1 }.into(),
1641
+ ))
1642
+ .await
1643
+ .unwrap();
1644
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1645
+ assert_matches!(
1646
+ activation.jobs.as_slice(),
1647
+ [WfActivationJob {
1648
+ variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
1649
+ },]
1650
+ );
1651
+ core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
1652
+ .await
1653
+ .unwrap();
1654
+ assert_eq!(core.outstanding_wfts(TEST_Q), 0);
1655
+ assert_eq!(core.available_wft_permits(TEST_Q), 2);
1656
+ }
1657
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1658
+ core.complete_workflow_activation(WfActivationCompletion::from_cmd(
1659
+ TEST_Q,
1660
+ activation.run_id,
1661
+ CompleteWorkflowExecution { result: None }.into(),
1662
+ ))
1663
+ .await
1664
+ .unwrap();
1665
+
1666
+ core.shutdown().await;
1667
+ }
1668
+
1669
+ #[tokio::test]
1670
+ async fn cache_miss_doesnt_eat_permit_forever() {
1671
+ let mut t = TestHistoryBuilder::default();
1672
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1673
+ t.add_full_wf_task();
1674
+ t.add_we_signaled("sig", vec![]);
1675
+ t.add_full_wf_task();
1676
+ t.add_workflow_execution_completed();
1677
+
1678
+ let mut mh = MockPollCfg::from_resp_batches(
1679
+ "fake_wf_id",
1680
+ t,
1681
+ [
1682
+ ResponseType::ToTaskNum(1),
1683
+ ResponseType::OneTask(2),
1684
+ ResponseType::ToTaskNum(1),
1685
+ ResponseType::OneTask(2),
1686
+ ResponseType::ToTaskNum(1),
1687
+ ResponseType::OneTask(2),
1688
+ // Last one to complete successfully
1689
+ ResponseType::ToTaskNum(1),
1690
+ ],
1691
+ MockServerGatewayApis::new(),
1692
+ );
1693
+ mh.num_expected_fails = Some(3);
1694
+ mh.expect_fail_wft_matcher =
1695
+ Box::new(|_, cause, _| matches!(cause, WorkflowTaskFailedCause::ResetStickyTaskQueue));
1696
+ let mut mock = build_mock_pollers(mh);
1697
+ mock.worker_cfg(TEST_Q, |cfg| {
1698
+ cfg.max_outstanding_workflow_tasks = 2;
1699
+ });
1700
+ let core = mock_core(mock);
1701
+
1702
+ // Spin missing the cache to verify that we don't get stuck
1703
+ for _ in 1..=3 {
1704
+ // Start
1705
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1706
+ core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
1707
+ .await
1708
+ .unwrap();
1709
+ // Evict
1710
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1711
+ assert_matches!(
1712
+ activation.jobs.as_slice(),
1713
+ [WfActivationJob {
1714
+ variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
1715
+ },]
1716
+ );
1717
+ core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
1718
+ .await
1719
+ .unwrap();
1720
+ assert_eq!(core.outstanding_wfts(TEST_Q), 0);
1721
+ assert_eq!(core.available_wft_permits(TEST_Q), 2);
1722
+ // When we loop back up, the poll will trigger a cache miss, which we should immediately
1723
+ // reply to WFT with failure, and then poll again, which will deliver the from-the-start
1724
+ // history
1725
+ }
1726
+ let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
1727
+ core.complete_workflow_activation(WfActivationCompletion::from_cmd(
1728
+ TEST_Q,
1729
+ activation.run_id,
1730
+ CompleteWorkflowExecution { result: None }.into(),
1731
+ ))
1732
+ .await
1733
+ .unwrap();
1734
+
1735
+ core.shutdown().await;
1736
+ }
@@ -16,6 +16,23 @@ pub(crate) struct WorkflowUpdateError {
16
16
  pub task_token: Option<TaskToken>,
17
17
  }
18
18
 
19
+ impl From<WorkflowMissingError> for WorkflowUpdateError {
20
+ fn from(wme: WorkflowMissingError) -> Self {
21
+ Self {
22
+ source: WFMachinesError::Fatal("Workflow machines missing".to_string()),
23
+ run_id: wme.run_id,
24
+ task_token: None,
25
+ }
26
+ }
27
+ }
28
+
29
+ /// The workflow machines were expected to be in the cache but were not
30
+ #[derive(Debug)]
31
+ pub(crate) struct WorkflowMissingError {
32
+ /// The run id of the erring workflow
33
+ pub run_id: String,
34
+ }
35
+
19
36
  /// Errors thrown during initialization of [crate::Core]
20
37
  #[derive(thiserror::Error, Debug)]
21
38
  pub enum CoreInitError {
@@ -33,15 +50,6 @@ pub enum CoreInitError {
33
50
  /// Errors thrown by [crate::Core::poll_workflow_activation]
34
51
  #[derive(thiserror::Error, Debug)]
35
52
  pub enum PollWfError {
36
- /// There was an error specific to a workflow instance. The cached workflow should be deleted
37
- /// from lang side.
38
- #[error("There was an error with the workflow instance with run id ({run_id}): {source:?}")]
39
- WorkflowUpdateError {
40
- /// Underlying workflow error
41
- source: anyhow::Error,
42
- /// The run id of the erring workflow
43
- run_id: String,
44
- },
45
53
  /// [crate::Core::shutdown] was called, and there are no more replay tasks to be handled. Lang
46
54
  /// must call [crate::Core::complete_workflow_activation] for any remaining tasks, and then may
47
55
  /// exit.
@@ -61,20 +69,11 @@ pub enum PollWfError {
61
69
  NoWorkerForQueue(String),
62
70
  }
63
71
 
64
- impl From<WorkflowUpdateError> for PollWfError {
65
- fn from(e: WorkflowUpdateError) -> Self {
66
- Self::WorkflowUpdateError {
67
- source: e.source.into(),
68
- run_id: e.run_id,
69
- }
70
- }
71
- }
72
-
73
72
  impl From<WorkerLookupErr> for PollWfError {
74
73
  fn from(e: WorkerLookupErr) -> Self {
75
74
  match e {
76
- WorkerLookupErr::Shutdown(_) => PollWfError::ShutDown,
77
- WorkerLookupErr::NoWorker(s) => PollWfError::NoWorkerForQueue(s),
75
+ WorkerLookupErr::Shutdown(_) => Self::ShutDown,
76
+ WorkerLookupErr::NoWorker(s) => Self::NoWorkerForQueue(s),
78
77
  }
79
78
  }
80
79
  }
@@ -98,8 +97,8 @@ pub enum PollActivityError {
98
97
  impl From<WorkerLookupErr> for PollActivityError {
99
98
  fn from(e: WorkerLookupErr) -> Self {
100
99
  match e {
101
- WorkerLookupErr::Shutdown(_) => PollActivityError::ShutDown,
102
- WorkerLookupErr::NoWorker(s) => PollActivityError::NoWorkerForQueue(s),
100
+ WorkerLookupErr::Shutdown(_) => Self::ShutDown,
101
+ WorkerLookupErr::NoWorker(s) => Self::NoWorkerForQueue(s),
103
102
  }
104
103
  }
105
104
  }
@@ -116,15 +115,6 @@ pub enum CompleteWfError {
116
115
  /// The completion, which may not be included to avoid unnecessary copies.
117
116
  completion: Option<WfActivationCompletion>,
118
117
  },
119
- /// There was an error specific to a workflow instance. The cached workflow should be deleted
120
- /// from lang side.
121
- #[error("There was an error with the workflow instance with run id ({run_id}): {source:?}")]
122
- WorkflowUpdateError {
123
- /// Underlying workflow error
124
- source: anyhow::Error,
125
- /// The run id of the erring workflow
126
- run_id: String,
127
- },
128
118
  /// There is no worker registered for the queue being polled
129
119
  #[error("No worker registered for queue: {0}")]
130
120
  NoWorkerForQueue(String),
@@ -134,20 +124,12 @@ pub enum CompleteWfError {
134
124
  TonicError(#[from] tonic::Status),
135
125
  }
136
126
 
137
- impl From<WorkflowUpdateError> for CompleteWfError {
138
- fn from(e: WorkflowUpdateError) -> Self {
139
- Self::WorkflowUpdateError {
140
- source: e.source.into(),
141
- run_id: e.run_id,
142
- }
143
- }
144
- }
145
-
146
127
  impl From<WorkerLookupErr> for CompleteWfError {
147
128
  fn from(e: WorkerLookupErr) -> Self {
148
129
  match e {
149
- WorkerLookupErr::Shutdown(s) => CompleteWfError::NoWorkerForQueue(s),
150
- WorkerLookupErr::NoWorker(s) => CompleteWfError::NoWorkerForQueue(s),
130
+ WorkerLookupErr::Shutdown(s) | WorkerLookupErr::NoWorker(s) => {
131
+ Self::NoWorkerForQueue(s)
132
+ }
151
133
  }
152
134
  }
153
135
  }
@@ -175,8 +157,9 @@ pub enum CompleteActivityError {
175
157
  impl From<WorkerLookupErr> for CompleteActivityError {
176
158
  fn from(e: WorkerLookupErr) -> Self {
177
159
  match e {
178
- WorkerLookupErr::Shutdown(s) => CompleteActivityError::NoWorkerForQueue(s),
179
- WorkerLookupErr::NoWorker(s) => CompleteActivityError::NoWorkerForQueue(s),
160
+ WorkerLookupErr::Shutdown(s) | WorkerLookupErr::NoWorker(s) => {
161
+ Self::NoWorkerForQueue(s)
162
+ }
180
163
  }
181
164
  }
182
165
  }
@@ -202,7 +202,7 @@ pub struct CoreInitOptions {
202
202
 
203
203
  /// Initializes an instance of the core sdk and establishes a connection to the temporal server.
204
204
  ///
205
- /// Note: Also creates a tokio runtime that will be used for all client-server interactions.
205
+ /// Note: Also creates a tokio runtime that will be used for all client-server interactions.
206
206
  ///
207
207
  /// # Panics
208
208
  /// * Will panic if called from within an async context, as it will construct a runtime and you
@@ -272,7 +272,8 @@ impl Core for CoreSDK {
272
272
  }
273
273
  }
274
274
 
275
- #[instrument(level = "debug", skip(self, completion), fields(completion=%&completion))]
275
+ #[instrument(level = "debug", skip(self, completion),
276
+ fields(completion=%&completion, run_id=%completion.run_id))]
276
277
  async fn complete_workflow_activation(
277
278
  &self,
278
279
  completion: WfActivationCompletion,
@@ -308,7 +309,7 @@ impl Core for CoreSDK {
308
309
 
309
310
  fn request_workflow_eviction(&self, task_queue: &str, run_id: &str) {
310
311
  if let Ok(w) = self.worker(task_queue) {
311
- w.request_wf_eviction(run_id);
312
+ w.request_wf_eviction(run_id, "Eviction explicitly requested by lang");
312
313
  }
313
314
  }
314
315
 
@@ -366,6 +367,15 @@ impl CoreSDK {
366
367
  self.workers.set_worker_for_task_queue(tq, worker).unwrap();
367
368
  }
368
369
 
370
+ #[cfg(test)]
371
+ pub(crate) fn outstanding_wfts(&self, tq: &str) -> usize {
372
+ self.worker(tq).unwrap().outstanding_workflow_tasks()
373
+ }
374
+ #[cfg(test)]
375
+ pub(crate) fn available_wft_permits(&self, tq: &str) -> usize {
376
+ self.worker(tq).unwrap().available_wft_permits()
377
+ }
378
+
369
379
  fn get_sticky_q_name_for_worker(&self, config: &WorkerConfig) -> Option<String> {
370
380
  if config.max_cached_workflows > 0 {
371
381
  Some(format!(