@temporalio/core-bridge 1.11.1 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/Cargo.lock +86 -88
  2. package/lib/index.d.ts +3 -0
  3. package/lib/index.js.map +1 -1
  4. package/package.json +3 -3
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.github/workflows/per-pr.yml +7 -1
  11. package/sdk-core/Cargo.toml +1 -1
  12. package/sdk-core/client/Cargo.toml +3 -3
  13. package/sdk-core/client/src/lib.rs +1 -1
  14. package/sdk-core/client/src/metrics.rs +2 -2
  15. package/sdk-core/client/src/raw.rs +39 -13
  16. package/sdk-core/client/src/retry.rs +108 -62
  17. package/sdk-core/client/src/workflow_handle/mod.rs +1 -2
  18. package/sdk-core/core/Cargo.toml +4 -5
  19. package/sdk-core/core/src/abstractions.rs +2 -3
  20. package/sdk-core/core/src/core_tests/activity_tasks.rs +1 -1
  21. package/sdk-core/core/src/core_tests/local_activities.rs +2 -2
  22. package/sdk-core/core/src/core_tests/queries.rs +8 -4
  23. package/sdk-core/core/src/core_tests/updates.rs +2 -2
  24. package/sdk-core/core/src/core_tests/workflow_cancels.rs +3 -3
  25. package/sdk-core/core/src/core_tests/workflow_tasks.rs +55 -54
  26. package/sdk-core/core/src/ephemeral_server/mod.rs +5 -3
  27. package/sdk-core/core/src/protosext/mod.rs +3 -0
  28. package/sdk-core/core/src/telemetry/mod.rs +0 -8
  29. package/sdk-core/core/src/telemetry/otel.rs +7 -3
  30. package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +11 -0
  31. package/sdk-core/core/src/worker/activities.rs +1 -1
  32. package/sdk-core/core/src/worker/mod.rs +6 -6
  33. package/sdk-core/core/src/worker/slot_provider.rs +4 -3
  34. package/sdk-core/core/src/worker/tuner/resource_based.rs +1 -1
  35. package/sdk-core/core/src/worker/workflow/driven_workflow.rs +28 -2
  36. package/sdk-core/core/src/worker/workflow/history_update.rs +2 -2
  37. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +8 -5
  38. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -1
  39. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +1 -1
  40. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +7 -7
  41. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +10 -15
  42. package/sdk-core/core/src/worker/workflow/machines/mod.rs +1 -1
  43. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +3 -2
  44. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -1
  45. package/sdk-core/core/src/worker/workflow/machines/update_state_machine.rs +4 -4
  46. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +30 -20
  47. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +2 -2
  48. package/sdk-core/core/src/worker/workflow/managed_run.rs +20 -4
  49. package/sdk-core/core/src/worker/workflow/mod.rs +33 -29
  50. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +2 -2
  51. package/sdk-core/core-api/src/telemetry.rs +1 -0
  52. package/sdk-core/docker/docker-compose-telem.yaml +4 -4
  53. package/sdk-core/etc/otel-collector-config.yaml +12 -9
  54. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +2 -2
  55. package/sdk-core/sdk/src/lib.rs +30 -3
  56. package/sdk-core/sdk/src/workflow_context.rs +15 -2
  57. package/sdk-core/sdk/src/workflow_future.rs +28 -8
  58. package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +31 -12
  59. package/sdk-core/sdk-core-protos/src/lib.rs +104 -63
  60. package/sdk-core/test-utils/src/lib.rs +4 -3
  61. package/sdk-core/tests/integ_tests/client_tests.rs +36 -7
  62. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +1 -1
  63. package/sdk-core/tests/integ_tests/metrics_tests.rs +50 -4
  64. package/sdk-core/tests/integ_tests/queries_tests.rs +95 -62
  65. package/sdk-core/tests/integ_tests/update_tests.rs +16 -9
  66. package/sdk-core/tests/integ_tests/visibility_tests.rs +1 -1
  67. package/sdk-core/tests/integ_tests/worker_tests.rs +82 -1
  68. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +46 -8
  69. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +81 -2
  70. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +139 -4
  71. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +43 -28
  72. package/sdk-core/tests/integ_tests/workflow_tests.rs +2 -1
  73. package/sdk-core/tests/main.rs +28 -19
  74. package/sdk-core/tests/runner.rs +7 -2
  75. package/ts/index.ts +3 -0
@@ -1,11 +1,16 @@
1
1
  use assert_matches::assert_matches;
2
- use std::{net::SocketAddr, sync::Arc, time::Duration};
2
+ use std::{env, net::SocketAddr, sync::Arc, time::Duration};
3
3
  use temporal_client::{WorkflowClientTrait, WorkflowOptions, WorkflowService};
4
- use temporal_sdk_core::{init_worker, telemetry::start_prometheus_metric_exporter, CoreRuntime};
4
+ use temporal_sdk_core::{
5
+ init_worker,
6
+ telemetry::{build_otlp_metric_exporter, start_prometheus_metric_exporter},
7
+ CoreRuntime,
8
+ };
5
9
  use temporal_sdk_core_api::{
6
10
  telemetry::{
7
11
  metrics::{CoreMeter, MetricAttributes, MetricParameters},
8
- PrometheusExporterOptionsBuilder, TelemetryOptions,
12
+ OtelCollectorOptionsBuilder, PrometheusExporterOptionsBuilder, TelemetryOptions,
13
+ TelemetryOptionsBuilder,
9
14
  },
10
15
  worker::WorkerConfigBuilder,
11
16
  Worker,
@@ -30,9 +35,10 @@ use temporal_sdk_core_protos::{
30
35
  },
31
36
  };
32
37
  use temporal_sdk_core_test_utils::{
33
- get_integ_server_options, get_integ_telem_options, CoreWfStarter, NAMESPACE,
38
+ get_integ_server_options, get_integ_telem_options, CoreWfStarter, NAMESPACE, OTEL_URL_ENV_VAR,
34
39
  };
35
40
  use tokio::{join, sync::Barrier, task::AbortHandle};
41
+ use url::Url;
36
42
 
37
43
  static ANY_PORT: &str = "127.0.0.1:0";
38
44
 
@@ -376,6 +382,7 @@ async fn query_of_closed_workflow_doesnt_tick_terminal_metric(
376
382
  .complete_workflow_activation(WorkflowActivationCompletion::fail(
377
383
  task.run_id,
378
384
  "whatever".into(),
385
+ None,
379
386
  ))
380
387
  .await
381
388
  .unwrap();
@@ -574,3 +581,42 @@ async fn request_fail_codes() {
574
581
  assert!(matching_line.contains("status_code=\"INVALID_ARGUMENT\""));
575
582
  assert!(matching_line.contains("} 1"));
576
583
  }
584
+
585
+ // OTel collector shutdown hangs in a single-threaded Tokio environment. We used to, in the past
586
+ // have a dedicated runtime just for telemetry which was meant to address problems like this.
587
+ // In reality, users are unlikely to run a single-threaded runtime.
588
+ #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
589
+ async fn request_fail_codes_otel() {
590
+ let exporter = if let Some(url) = env::var(OTEL_URL_ENV_VAR)
591
+ .ok()
592
+ .map(|x| x.parse::<Url>().unwrap())
593
+ {
594
+ let opts = OtelCollectorOptionsBuilder::default()
595
+ .url(url)
596
+ .build()
597
+ .unwrap();
598
+ build_otlp_metric_exporter(opts).unwrap()
599
+ } else {
600
+ // skip
601
+ return;
602
+ };
603
+ let mut telemopts = TelemetryOptionsBuilder::default();
604
+ let exporter = Arc::new(exporter);
605
+ telemopts.metrics(exporter as Arc<dyn CoreMeter>);
606
+
607
+ let rt = CoreRuntime::new_assume_tokio(telemopts.build().unwrap()).unwrap();
608
+ let opts = get_integ_server_options();
609
+ let mut client = opts
610
+ .connect(NAMESPACE, rt.telemetry().get_temporal_metric_meter())
611
+ .await
612
+ .unwrap();
613
+
614
+ for _ in 0..10 {
615
+ // Describe namespace w/ invalid argument (unset namespace field)
616
+ WorkflowService::describe_namespace(&mut client, DescribeNamespaceRequest::default())
617
+ .await
618
+ .unwrap_err();
619
+
620
+ tokio::time::sleep(Duration::from_secs(1)).await;
621
+ }
622
+ }
@@ -12,7 +12,7 @@ use temporal_sdk_core_protos::{
12
12
  temporal::api::{failure::v1::Failure, query::v1::WorkflowQuery},
13
13
  };
14
14
  use temporal_sdk_core_test_utils::{
15
- drain_pollers_and_shutdown, init_core_and_create_wf, WorkerTestHelpers,
15
+ drain_pollers_and_shutdown, init_core_and_create_wf, CoreWfStarter, WorkerTestHelpers,
16
16
  };
17
17
  use tokio::join;
18
18
 
@@ -163,7 +163,7 @@ async fn query_after_execution_complete(#[case] do_evict: bool) {
163
163
  if matches!(
164
164
  task.jobs.as_slice(),
165
165
  [WorkflowActivationJob {
166
- variant: Some(workflow_activation_job::Variant::StartWorkflow(_)),
166
+ variant: Some(workflow_activation_job::Variant::InitializeWorkflow(_)),
167
167
  }]
168
168
  ) {
169
169
  core.complete_timer(&task.run_id, 1, Duration::from_millis(500))
@@ -215,30 +215,15 @@ async fn query_after_execution_complete(#[case] do_evict: bool) {
215
215
  #[tokio::test]
216
216
  async fn fail_legacy_query() {
217
217
  let query_err = "oh no broken";
218
- let mut starter = init_core_and_create_wf("fail_legacy_query").await;
218
+ let mut starter = CoreWfStarter::new("fail_legacy_query");
219
219
  let core = starter.get_worker().await;
220
+ starter.workflow_options.task_timeout = Some(Duration::from_secs(1));
221
+ starter.start_wf().await;
220
222
  let workflow_id = starter.get_task_queue().to_string();
221
223
  let task = core.poll_workflow_activation().await.unwrap();
222
- let t1_resp = vec![
223
- StartTimer {
224
- seq: 1,
225
- start_to_fire_timeout: Some(prost_dur!(from_millis(500))),
226
- }
227
- .into(),
228
- StartTimer {
229
- seq: 2,
230
- start_to_fire_timeout: Some(prost_dur!(from_secs(3))),
231
- }
232
- .into(),
233
- ];
234
- core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
235
- task.run_id.clone(),
236
- t1_resp.clone(),
237
- ))
238
- .await
239
- .unwrap();
240
- tokio::time::sleep(Duration::from_secs(1)).await;
241
- // Query after timer should have fired and there should be new WFT
224
+ // Queries are *always* legacy on closed workflows, so that's the easiest way to ensure that
225
+ // path is used.
226
+ core.complete_execution(&task.run_id).await;
242
227
  let query_fut = async {
243
228
  starter
244
229
  .get_client()
@@ -255,62 +240,26 @@ async fn fail_legacy_query() {
255
240
  .await
256
241
  .unwrap_err()
257
242
  };
258
- let workflow_completions_future = async {
259
- // Give query a beat to get going
260
- tokio::time::sleep(Duration::from_millis(400)).await;
261
- // This poll *should* have the `queries` field populated, but doesn't, seemingly due to
262
- // a server bug. So, complete the WF task of the first timer firing with empty commands
263
- let task = core.poll_workflow_activation().await.unwrap();
264
- assert_matches!(
265
- task.jobs.as_slice(),
266
- [WorkflowActivationJob {
267
- variant: Some(workflow_activation_job::Variant::FireTimer(_)),
268
- }]
269
- );
270
- core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
271
- task.run_id,
272
- vec![],
273
- ))
274
- .await
275
- .unwrap();
243
+ let query_responder = async {
276
244
  let task = core.poll_workflow_activation().await.unwrap();
277
- // Poll again, and we end up getting a `query` field query response
278
245
  assert_matches!(
279
246
  task.jobs.as_slice(),
280
247
  [WorkflowActivationJob {
281
248
  variant: Some(workflow_activation_job::Variant::QueryWorkflow(q)),
282
249
  }] => q
283
250
  );
284
- // Fail this task
285
251
  core.complete_workflow_activation(WorkflowActivationCompletion::fail(
286
252
  task.run_id,
287
253
  Failure {
288
254
  message: query_err.to_string(),
289
255
  ..Default::default()
290
256
  },
257
+ None,
291
258
  ))
292
259
  .await
293
260
  .unwrap();
294
- // Finish the workflow (handling cache removal)
295
- let task = core.poll_workflow_activation().await.unwrap();
296
- core.complete_workflow_activation(WorkflowActivationCompletion::empty(task.run_id))
297
- .await
298
- .unwrap();
299
- let task = core.poll_workflow_activation().await.unwrap();
300
- core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
301
- task.run_id,
302
- t1_resp.clone(),
303
- ))
304
- .await
305
- .unwrap();
306
- let task = core.poll_workflow_activation().await.unwrap();
307
- core.complete_workflow_activation(WorkflowActivationCompletion::empty(task.run_id))
308
- .await
309
- .unwrap();
310
- let task = core.poll_workflow_activation().await.unwrap();
311
- core.complete_execution(&task.run_id).await;
312
261
  };
313
- let (q_resp, _) = join!(query_fut, workflow_completions_future);
262
+ let (q_resp, _) = join!(query_fut, query_responder);
314
263
  // Ensure query response is a failure and has the right message
315
264
  assert_eq!(q_resp.message(), query_err);
316
265
  }
@@ -483,3 +432,87 @@ async fn queries_handled_before_next_wft() {
483
432
  join!(join_all(query_futs), complete_fut);
484
433
  drain_pollers_and_shutdown(&core).await;
485
434
  }
435
+
436
+ #[tokio::test]
437
+ async fn query_should_not_be_sent_if_wft_about_to_fail() {
438
+ let mut starter =
439
+ init_core_and_create_wf("query_should_not_be_sent_if_wft_about_to_fail").await;
440
+ let core = starter.get_worker().await;
441
+ let workflow_id = starter.get_task_queue().to_string();
442
+ let client = starter.get_client().await;
443
+ // query straight away
444
+ let query_fut = client.query_workflow_execution(
445
+ workflow_id.to_string(),
446
+ "".to_string(),
447
+ WorkflowQuery {
448
+ query_type: "myquery".to_string(),
449
+ ..Default::default()
450
+ },
451
+ );
452
+ // Poll for the task and respond with a task failure
453
+ let poll_and_fail_fut = async {
454
+ let task = core.poll_workflow_activation().await.unwrap();
455
+ assert_matches!(
456
+ task.jobs.as_slice(),
457
+ [WorkflowActivationJob {
458
+ variant: Some(workflow_activation_job::Variant::InitializeWorkflow(_)),
459
+ }]
460
+ );
461
+ core.complete_workflow_activation(WorkflowActivationCompletion::fail(
462
+ task.run_id,
463
+ Failure {
464
+ message: "oh no".to_string(),
465
+ ..Default::default()
466
+ },
467
+ None,
468
+ ))
469
+ .await
470
+ .unwrap();
471
+ let task = core.poll_workflow_activation().await.unwrap();
472
+ // Should *not* get a query here. If the bug wasn't fixed, this job would have a query.
473
+ assert_matches!(
474
+ task.jobs.as_slice(),
475
+ [WorkflowActivationJob {
476
+ variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
477
+ }]
478
+ );
479
+ core.complete_workflow_activation(WorkflowActivationCompletion::empty(task.run_id))
480
+ .await
481
+ .unwrap();
482
+
483
+ // We can still service the query by trying again
484
+ let task = core.poll_workflow_activation().await.unwrap();
485
+ assert_matches!(
486
+ task.jobs.as_slice(),
487
+ [WorkflowActivationJob {
488
+ variant: Some(workflow_activation_job::Variant::InitializeWorkflow(_)),
489
+ }]
490
+ );
491
+ core.complete_execution(&task.run_id).await;
492
+ let task = core.poll_workflow_activation().await.unwrap();
493
+ let qid = assert_matches!(
494
+ task.jobs.as_slice(),
495
+ [WorkflowActivationJob {
496
+ variant: Some(workflow_activation_job::Variant::QueryWorkflow(q)),
497
+ }] => &q.query_id
498
+ );
499
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
500
+ task.run_id,
501
+ QueryResult {
502
+ query_id: qid.to_string(),
503
+ variant: Some(
504
+ QuerySuccess {
505
+ response: Some("done".into()),
506
+ }
507
+ .into(),
508
+ ),
509
+ }
510
+ .into(),
511
+ ))
512
+ .await
513
+ .unwrap();
514
+ };
515
+ let (qres, _) = join!(query_fut, poll_and_fail_fut);
516
+ let qres = qres.unwrap().query_result.unwrap();
517
+ assert_eq!(qres.payloads[0].data, b"done");
518
+ }
@@ -78,10 +78,9 @@ async fn update_workflow(#[values(FailUpdate::Yes, FailUpdate::No)] will_fail: F
78
78
  .unwrap();
79
79
  let with_id = HistoryForReplay::new(history, workflow_id.to_string());
80
80
  let replay_worker = init_core_replay_preloaded(workflow_id, [with_id]);
81
- handle_update(will_fail, CompleteWorkflow::Yes, replay_worker.as_ref()).await;
81
+ handle_update(will_fail, CompleteWorkflow::Yes, replay_worker.as_ref(), 1).await;
82
82
  }
83
83
 
84
- #[rstest::rstest]
85
84
  #[tokio::test]
86
85
  async fn reapplied_updates_due_to_reset() {
87
86
  let mut starter = init_core_and_create_wf("update_workflow").await;
@@ -121,7 +120,9 @@ async fn reapplied_updates_due_to_reset() {
121
120
  .into_inner();
122
121
 
123
122
  // Accept and complete the reapplied update
124
- handle_update(FailUpdate::No, CompleteWorkflow::No, core.as_ref()).await;
123
+ // Index here is 2 because there will be start workflow & update random seed (from the reset)
124
+ // first.
125
+ handle_update(FailUpdate::No, CompleteWorkflow::No, core.as_ref(), 2).await;
125
126
 
126
127
  // Send a second update and complete the workflow
127
128
  let post_reset_run_id = send_and_handle_update(
@@ -149,12 +150,19 @@ async fn reapplied_updates_due_to_reset() {
149
150
  // We now recapitulate the actions that the worker took on first execution above, pretending
150
151
  // that we always followed the post-reset history.
151
152
  // First, we handled the post-reset reapplied update and did not complete the workflow.
152
- handle_update(FailUpdate::No, CompleteWorkflow::No, replay_worker.as_ref()).await;
153
+ handle_update(
154
+ FailUpdate::No,
155
+ CompleteWorkflow::No,
156
+ replay_worker.as_ref(),
157
+ 2,
158
+ )
159
+ .await;
153
160
  // Then the client sent a second update; we handled it and completed the workflow.
154
161
  handle_update(
155
162
  FailUpdate::No,
156
163
  CompleteWorkflow::Yes,
157
164
  replay_worker.as_ref(),
165
+ 0,
158
166
  )
159
167
  .await;
160
168
 
@@ -199,7 +207,7 @@ async fn send_and_handle_update(
199
207
  };
200
208
 
201
209
  // Accept update, complete update and complete workflow
202
- let processing_task = handle_update(fail_update, complete_workflow, core);
210
+ let processing_task = handle_update(fail_update, complete_workflow, core, 0);
203
211
  let (ur, _) = join!(update_task, processing_task);
204
212
 
205
213
  let v = ur.outcome.unwrap().value.unwrap();
@@ -218,12 +226,11 @@ async fn handle_update(
218
226
  fail_update: FailUpdate,
219
227
  complete_workflow: CompleteWorkflow,
220
228
  core: &dyn Worker,
229
+ update_job_index: usize,
221
230
  ) {
222
231
  let act = core.poll_workflow_activation().await.unwrap();
223
- // On replay, the first activation has update & start workflow, but on first execution, it does
224
- // not - can happen if update is waiting on some condition.
225
232
  let pid = assert_matches!(
226
- &act.jobs[0],
233
+ &act.jobs[update_job_index],
227
234
  WorkflowActivationJob {
228
235
  variant: Some(workflow_activation_job::Variant::DoUpdate(d)),
229
236
  } => &d.protocol_instance_id
@@ -573,7 +580,7 @@ async fn update_speculative_wft() {
573
580
  assert_matches!(
574
581
  res.jobs.as_slice(),
575
582
  [WorkflowActivationJob {
576
- variant: Some(workflow_activation_job::Variant::StartWorkflow(_)),
583
+ variant: Some(workflow_activation_job::Variant::InitializeWorkflow(_)),
577
584
  }]
578
585
  );
579
586
  core.complete_workflow_activation(WorkflowActivationCompletion::empty(res.run_id))
@@ -29,7 +29,7 @@ async fn client_list_open_closed_workflow_executions() {
29
29
  assert_matches!(
30
30
  task.jobs.as_slice(),
31
31
  [WorkflowActivationJob {
32
- variant: Some(workflow_activation_job::Variant::StartWorkflow(_)),
32
+ variant: Some(workflow_activation_job::Variant::InitializeWorkflow(_)),
33
33
  }]
34
34
  );
35
35
 
@@ -1,7 +1,20 @@
1
+ use std::cell::Cell;
2
+ use std::sync::Arc;
3
+
1
4
  use assert_matches::assert_matches;
5
+ use temporal_client::WorkflowOptions;
6
+ use temporal_sdk::interceptors::WorkerInterceptor;
2
7
  use temporal_sdk_core::{init_worker, CoreRuntime};
3
8
  use temporal_sdk_core_api::{errors::WorkerValidationError, worker::WorkerConfigBuilder, Worker};
4
- use temporal_sdk_core_test_utils::{get_integ_server_options, get_integ_telem_options};
9
+ use temporal_sdk_core_protos::coresdk::workflow_completion::{
10
+ workflow_activation_completion::Status, Failure, WorkflowActivationCompletion,
11
+ };
12
+ use temporal_sdk_core_protos::temporal::api::failure::v1::Failure as InnerFailure;
13
+ use temporal_sdk_core_test_utils::{
14
+ drain_pollers_and_shutdown, get_integ_server_options, get_integ_telem_options, CoreWfStarter,
15
+ };
16
+ use tokio::sync::Notify;
17
+ use uuid::Uuid;
5
18
 
6
19
  #[tokio::test]
7
20
  async fn worker_validation_fails_on_nonexistent_namespace() {
@@ -30,3 +43,71 @@ async fn worker_validation_fails_on_nonexistent_namespace() {
30
43
  Err(WorkerValidationError::NamespaceDescribeError { .. })
31
44
  );
32
45
  }
46
+
47
+ #[tokio::test]
48
+ async fn worker_handles_unknown_workflow_types_gracefully() {
49
+ let wf_type = "worker_handles_unknown_workflow_types_gracefully";
50
+ let mut starter = CoreWfStarter::new(wf_type);
51
+ let mut worker = starter.worker().await;
52
+
53
+ let run_id = worker
54
+ .submit_wf(
55
+ format!("wce-{}", Uuid::new_v4()),
56
+ "unregistered".to_string(),
57
+ vec![],
58
+ WorkflowOptions::default(),
59
+ )
60
+ .await
61
+ .unwrap();
62
+
63
+ struct GracefulAsserter {
64
+ notify: Arc<Notify>,
65
+ run_id: String,
66
+ unregistered_failure_seen: Cell<bool>,
67
+ }
68
+ #[async_trait::async_trait(?Send)]
69
+ impl WorkerInterceptor for GracefulAsserter {
70
+ async fn on_workflow_activation_completion(
71
+ &self,
72
+ completion: &WorkflowActivationCompletion,
73
+ ) {
74
+ if matches!(
75
+ completion,
76
+ WorkflowActivationCompletion {
77
+ status: Some(Status::Failed(Failure {
78
+ failure: Some(InnerFailure { message, .. }),
79
+ ..
80
+ })),
81
+ run_id,
82
+ } if message == "Workflow type unregistered not found" && *run_id == self.run_id
83
+ ) {
84
+ self.unregistered_failure_seen.set(true);
85
+ }
86
+ // If we've seen the failure, and the completion is a success for the same run, we're done
87
+ if matches!(
88
+ completion,
89
+ WorkflowActivationCompletion {
90
+ status: Some(Status::Successful(..)),
91
+ run_id,
92
+ } if self.unregistered_failure_seen.get() && *run_id == self.run_id
93
+ ) {
94
+ // Shutdown the worker
95
+ self.notify.notify_one();
96
+ }
97
+ }
98
+ fn on_shutdown(&self, _: &temporal_sdk::Worker) {}
99
+ }
100
+
101
+ let inner = worker.inner_mut();
102
+ let notify = Arc::new(Notify::new());
103
+ inner.set_worker_interceptor(GracefulAsserter {
104
+ notify: notify.clone(),
105
+ run_id,
106
+ unregistered_failure_seen: Cell::new(false),
107
+ });
108
+ tokio::join!(async { inner.run().await.unwrap() }, async move {
109
+ notify.notified().await;
110
+ let worker = starter.get_worker().await.clone();
111
+ drain_pollers_and_shutdown(&worker).await;
112
+ });
113
+ }
@@ -9,7 +9,7 @@ use std::{
9
9
  use temporal_client::{WfClientExt, WorkflowClientTrait, WorkflowExecutionResult, WorkflowOptions};
10
10
  use temporal_sdk::{
11
11
  ActContext, ActExitValue, ActivityError, ActivityOptions, CancellableFuture, WfContext,
12
- WorkflowResult,
12
+ WfExitValue, WorkflowResult,
13
13
  };
14
14
  use temporal_sdk_core_protos::{
15
15
  coresdk::{
@@ -52,7 +52,7 @@ pub(crate) async fn one_activity_wf(ctx: WfContext) -> WorkflowResult<()> {
52
52
  }
53
53
 
54
54
  #[tokio::test]
55
- async fn one_activity() {
55
+ async fn one_activity_only() {
56
56
  let wf_name = "one_activity";
57
57
  let mut starter = CoreWfStarter::new(wf_name);
58
58
  let mut worker = starter.worker().await;
@@ -129,7 +129,7 @@ async fn activity_workflow() {
129
129
  status: Some(
130
130
  act_res::Status::Completed(activity_result::Success{result: Some(r)})),
131
131
  ..
132
- })}
132
+ }), ..}
133
133
  )),
134
134
  },
135
135
  ] => {
@@ -182,7 +182,7 @@ async fn activity_non_retryable_failure() {
182
182
  ResolveActivity {seq, result: Some(ActivityResolution{
183
183
  status: Some(act_res::Status::Failed(activity_result::Failure{
184
184
  failure: Some(f),
185
- }))})}
185
+ }))}),..}
186
186
  )),
187
187
  },
188
188
  ] => {
@@ -249,7 +249,7 @@ async fn activity_non_retryable_failure_with_error() {
249
249
  ResolveActivity {seq, result: Some(ActivityResolution{
250
250
  status: Some(act_res::Status::Failed(activity_result::Failure{
251
251
  failure: Some(f),
252
- }))})}
252
+ }))}),..}
253
253
  )),
254
254
  },
255
255
  ] => {
@@ -328,7 +328,7 @@ async fn activity_retry() {
328
328
  WorkflowActivationJob {
329
329
  variant: Some(workflow_activation_job::Variant::ResolveActivity(
330
330
  ResolveActivity {seq, result: Some(ActivityResolution{
331
- status: Some(act_res::Status::Completed(activity_result::Success{result: Some(r)}))})}
331
+ status: Some(act_res::Status::Completed(activity_result::Success{result: Some(r)}))}),..}
332
332
  )),
333
333
  },
334
334
  ] => {
@@ -526,7 +526,7 @@ async fn started_activity_timeout() {
526
526
  )
527
527
  ),
528
528
  ..
529
- })
529
+ }), ..
530
530
  }
531
531
  )),
532
532
  },
@@ -714,7 +714,7 @@ async fn async_activity_completion_workflow() {
714
714
  variant: Some(workflow_activation_job::Variant::ResolveActivity(
715
715
  ResolveActivity {seq, result: Some(ActivityResolution {
716
716
  status: Some(act_res::Status::Completed(activity_result::Success{result: Some(r)})),
717
- ..})}
717
+ ..}), ..}
718
718
  )),
719
719
  },
720
720
  ] => {
@@ -1061,3 +1061,41 @@ async fn activity_can_be_cancelled_by_local_timeout() {
1061
1061
  worker.run_until_done().await.unwrap();
1062
1062
  assert!(WAS_CANCELLED.load(Ordering::Relaxed));
1063
1063
  }
1064
+
1065
+ #[tokio::test]
1066
+ #[ignore] // Runs forever, used to manually attempt to repro spurious activity completion rpc errs
1067
+ async fn long_activity_timeout_repro() {
1068
+ let wf_name = "long_activity_timeout_repro";
1069
+ let mut starter = CoreWfStarter::new(wf_name);
1070
+ starter
1071
+ .worker_config
1072
+ .local_timeout_buffer_for_activities(Duration::from_secs(0));
1073
+ let mut worker = starter.worker().await;
1074
+ worker.register_wf(wf_name.to_owned(), |ctx: WfContext| async move {
1075
+ let mut iter = 1;
1076
+ loop {
1077
+ let res = ctx
1078
+ .activity(ActivityOptions {
1079
+ activity_type: "echo_activity".to_string(),
1080
+ start_to_close_timeout: Some(Duration::from_secs(1)),
1081
+ input: "hi!".as_json_payload().expect("serializes fine"),
1082
+ retry_policy: Some(RetryPolicy {
1083
+ maximum_attempts: 1,
1084
+ ..Default::default()
1085
+ }),
1086
+ ..Default::default()
1087
+ })
1088
+ .await;
1089
+ assert!(res.completed_ok());
1090
+ ctx.timer(Duration::from_secs(60 * 3)).await;
1091
+ iter += 1;
1092
+ if iter > 5000 {
1093
+ return Ok(WfExitValue::<()>::continue_as_new(Default::default()));
1094
+ }
1095
+ }
1096
+ });
1097
+ worker.register_activity("echo_activity", echo);
1098
+
1099
+ starter.start_with_worker(wf_name, &mut worker).await;
1100
+ worker.run_until_done().await.unwrap();
1101
+ }
@@ -7,7 +7,7 @@ use std::{
7
7
  };
8
8
  use temporal_client::WorkflowOptions;
9
9
  use temporal_sdk::{
10
- interceptors::WorkerInterceptor, ActContext, ActivityError, CancellableFuture,
10
+ interceptors::WorkerInterceptor, ActContext, ActivityError, ActivityOptions, CancellableFuture,
11
11
  LocalActivityOptions, WfContext, WorkflowResult,
12
12
  };
13
13
  use temporal_sdk_core::replay::HistoryForReplay;
@@ -47,8 +47,13 @@ async fn one_local_activity() {
47
47
  worker.register_wf(wf_name.to_owned(), one_local_activity_wf);
48
48
  worker.register_activity("echo_activity", echo);
49
49
 
50
- starter.start_with_worker(wf_name, &mut worker).await;
50
+ let run_id = starter.start_with_worker(wf_name, &mut worker).await;
51
51
  worker.run_until_done().await.unwrap();
52
+ let tq = starter.get_task_queue().to_string();
53
+ starter
54
+ .fetch_history_and_replay(tq, run_id, worker.inner_mut())
55
+ .await
56
+ .unwrap();
52
57
  }
53
58
 
54
59
  pub(crate) async fn local_act_concurrent_with_timer_wf(ctx: WfContext) -> WorkflowResult<()> {
@@ -663,3 +668,77 @@ async fn third_weird_la_nondeterminism_repro() {
663
668
  });
664
669
  worker.run().await.unwrap();
665
670
  }
671
+
672
+ /// This test demonstrates why it's important to send LA resolutions last within a job.
673
+ /// If we were to (during replay) scan ahead, see the marker, and resolve the LA before the
674
+ /// activity cancellation, that would be wrong because, during execution, the LA resolution is
675
+ /// always going to take _longer_ than the instantaneous cancel effect.
676
+ ///
677
+ /// This affect applies regardless of how you choose to interleave cancellations and LAs. Ultimately
678
+ /// all cancellations will happen at once (in the order they are submitted) while the LA executions
679
+ /// are queued (because this all happens synchronously in the workflow machines). If you were to
680
+ /// _wait_ on an LA, and then cancel something else, and then run another LA, such that all commands
681
+ /// happened in the same workflow task, it would _still_ be fine to sort LA jobs last _within_ the
682
+ /// 2 activations that would necessarily entail (because, one you wait on the LA result, control
683
+ /// will be yielded and it will take another activation to unblock that LA).
684
+ #[tokio::test]
685
+ async fn la_resolve_same_time_as_other_cancel() {
686
+ let wf_name = "la_resolve_same_time_as_other_cancel";
687
+ let mut starter = CoreWfStarter::new(wf_name);
688
+ // The activity won't get a chance to receive the cancel so make sure we still exit fast
689
+ starter
690
+ .worker_config
691
+ .graceful_shutdown_period(Duration::from_millis(100));
692
+ let mut worker = starter.worker().await;
693
+
694
+ worker.register_wf(wf_name.to_owned(), |ctx: WfContext| async move {
695
+ let normal_act = ctx.activity(ActivityOptions {
696
+ activity_type: "delay".to_string(),
697
+ input: 9000.as_json_payload().expect("serializes fine"),
698
+ cancellation_type: ActivityCancellationType::TryCancel,
699
+ start_to_close_timeout: Some(Duration::from_secs(9000)),
700
+ ..Default::default()
701
+ });
702
+ // Make new task
703
+ ctx.timer(Duration::from_millis(1)).await;
704
+
705
+ // Start LA and cancel the activity at the same time
706
+ let local_act = ctx.local_activity(LocalActivityOptions {
707
+ activity_type: "delay".to_string(),
708
+ input: 100.as_json_payload().expect("serializes fine"),
709
+ ..Default::default()
710
+ });
711
+ normal_act.cancel(&ctx);
712
+ // Race them, starting a timer if LA completes first
713
+ tokio::select! {
714
+ biased;
715
+ _ = normal_act => {},
716
+ _ = local_act => {
717
+ ctx.timer(Duration::from_millis(1)).await;
718
+ },
719
+ }
720
+ Ok(().into())
721
+ });
722
+ worker.register_activity("delay", |ctx: ActContext, wait_time: u64| async move {
723
+ tokio::select! {
724
+ _ = tokio::time::sleep(Duration::from_millis(wait_time)) => {}
725
+ _ = ctx.cancelled() => {}
726
+ }
727
+ Ok(())
728
+ });
729
+
730
+ let run_id = worker
731
+ .submit_wf(
732
+ wf_name.to_owned(),
733
+ wf_name.to_owned(),
734
+ vec![],
735
+ WorkflowOptions::default(),
736
+ )
737
+ .await
738
+ .unwrap();
739
+ worker.run_until_done().await.unwrap();
740
+ starter
741
+ .fetch_history_and_replay(wf_name, run_id, worker.inner_mut())
742
+ .await
743
+ .unwrap();
744
+ }