@temporalio/core-bridge 1.4.4 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/Cargo.lock +327 -419
  2. package/Cargo.toml +1 -1
  3. package/index.js +25 -2
  4. package/lib/errors.d.ts +22 -0
  5. package/lib/errors.js +65 -0
  6. package/lib/errors.js.map +1 -0
  7. package/lib/index.d.ts +440 -0
  8. package/lib/index.js +8 -0
  9. package/lib/index.js.map +1 -0
  10. package/package.json +11 -5
  11. package/releases/aarch64-apple-darwin/index.node +0 -0
  12. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  13. package/releases/x86_64-apple-darwin/index.node +0 -0
  14. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  15. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  16. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  17. package/sdk-core/.buildkite/docker/docker-compose.yaml +2 -2
  18. package/sdk-core/bridge-ffi/Cargo.toml +1 -1
  19. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -25
  20. package/sdk-core/bridge-ffi/src/lib.rs +29 -108
  21. package/sdk-core/bridge-ffi/src/wrappers.rs +35 -25
  22. package/sdk-core/client/Cargo.toml +1 -1
  23. package/sdk-core/client/src/lib.rs +12 -20
  24. package/sdk-core/client/src/raw.rs +9 -8
  25. package/sdk-core/client/src/retry.rs +100 -23
  26. package/sdk-core/core/Cargo.toml +5 -5
  27. package/sdk-core/core/benches/workflow_replay.rs +13 -10
  28. package/sdk-core/core/src/abstractions.rs +22 -22
  29. package/sdk-core/core/src/core_tests/activity_tasks.rs +1 -1
  30. package/sdk-core/core/src/core_tests/local_activities.rs +228 -6
  31. package/sdk-core/core/src/core_tests/queries.rs +247 -89
  32. package/sdk-core/core/src/core_tests/workers.rs +2 -2
  33. package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
  34. package/sdk-core/core/src/core_tests/workflow_tasks.rs +46 -27
  35. package/sdk-core/core/src/lib.rs +139 -32
  36. package/sdk-core/core/src/replay/mod.rs +185 -41
  37. package/sdk-core/core/src/telemetry/log_export.rs +190 -0
  38. package/sdk-core/core/src/telemetry/metrics.rs +184 -139
  39. package/sdk-core/core/src/telemetry/mod.rs +296 -318
  40. package/sdk-core/core/src/telemetry/prometheus_server.rs +4 -3
  41. package/sdk-core/core/src/test_help/mod.rs +9 -7
  42. package/sdk-core/core/src/worker/activities/local_activities.rs +2 -1
  43. package/sdk-core/core/src/worker/activities.rs +40 -23
  44. package/sdk-core/core/src/worker/client/mocks.rs +1 -1
  45. package/sdk-core/core/src/worker/client.rs +30 -4
  46. package/sdk-core/core/src/worker/mod.rs +22 -18
  47. package/sdk-core/core/src/worker/workflow/driven_workflow.rs +10 -19
  48. package/sdk-core/core/src/worker/workflow/history_update.rs +99 -25
  49. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +1 -5
  50. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -5
  51. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +1 -5
  52. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +1 -5
  53. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +1 -5
  54. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +2 -6
  55. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +1 -5
  56. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +18 -21
  57. package/sdk-core/core/src/worker/workflow/machines/mod.rs +12 -38
  58. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +178 -0
  59. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +1 -5
  60. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -5
  61. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +1 -5
  62. package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +8 -2
  63. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +1 -5
  64. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +232 -216
  65. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +1 -6
  66. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +4 -4
  67. package/sdk-core/core/src/worker/workflow/managed_run.rs +13 -5
  68. package/sdk-core/core/src/worker/workflow/mod.rs +61 -9
  69. package/sdk-core/core/src/worker/workflow/wft_poller.rs +2 -2
  70. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +56 -11
  71. package/sdk-core/core-api/Cargo.toml +4 -3
  72. package/sdk-core/core-api/src/lib.rs +1 -43
  73. package/sdk-core/core-api/src/telemetry.rs +147 -0
  74. package/sdk-core/core-api/src/worker.rs +13 -0
  75. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
  76. package/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
  77. package/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
  78. package/sdk-core/protos/api_upstream/.github/CODEOWNERS +1 -1
  79. package/sdk-core/protos/api_upstream/buf.yaml +0 -3
  80. package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +3 -7
  81. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +8 -0
  82. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -2
  83. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +2 -0
  84. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
  85. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +13 -0
  86. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +19 -59
  87. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +0 -19
  88. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +108 -29
  89. package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
  90. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +1 -0
  91. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +47 -8
  92. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +15 -1
  93. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +2 -0
  94. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +8 -1
  95. package/sdk-core/sdk/src/interceptors.rs +36 -3
  96. package/sdk-core/sdk/src/lib.rs +7 -4
  97. package/sdk-core/sdk/src/workflow_context.rs +13 -2
  98. package/sdk-core/sdk-core-protos/src/history_builder.rs +47 -1
  99. package/sdk-core/sdk-core-protos/src/history_info.rs +22 -22
  100. package/sdk-core/sdk-core-protos/src/lib.rs +49 -27
  101. package/sdk-core/test-utils/Cargo.toml +1 -0
  102. package/sdk-core/test-utils/src/lib.rs +81 -29
  103. package/sdk-core/tests/integ_tests/metrics_tests.rs +37 -0
  104. package/sdk-core/tests/integ_tests/polling_tests.rs +0 -13
  105. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +145 -4
  106. package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +53 -0
  107. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +106 -20
  108. package/sdk-core/tests/integ_tests/workflow_tests.rs +18 -8
  109. package/sdk-core/tests/main.rs +6 -4
  110. package/src/conversions.rs +52 -47
  111. package/src/errors.rs +28 -86
  112. package/src/helpers.rs +3 -4
  113. package/src/lib.rs +2 -2
  114. package/src/runtime.rs +132 -61
  115. package/src/testing.rs +7 -4
  116. package/src/worker.rs +67 -50
  117. package/ts/errors.ts +55 -0
  118. package/{index.d.ts → ts/index.ts} +121 -15
  119. package/sdk-core/core/src/log_export.rs +0 -62
  120. package/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +0 -127
  121. package/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +0 -71
  122. package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +0 -83
  123. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +0 -40
@@ -357,6 +357,7 @@ impl ManagedRun {
357
357
  let query_responses = data.query_responses;
358
358
  let has_query_responses = !query_responses.is_empty();
359
359
  let is_query_playback = data.has_pending_query && !has_query_responses;
360
+ let mut force_new_wft = due_to_heartbeat_timeout;
360
361
 
361
362
  // We only actually want to send commands back to the server if there are no more
362
363
  // pending activations and we are caught up on replay. We don't want to complete a wft
@@ -366,19 +367,26 @@ impl ManagedRun {
366
367
  // either.
367
368
  let no_commands_and_evicting =
368
369
  outgoing_cmds.commands.is_empty() && data.activation_was_only_eviction;
370
+ let should_respond = !(self.wfm.machines.has_pending_jobs()
371
+ || outgoing_cmds.replaying
372
+ || is_query_playback
373
+ || no_commands_and_evicting);
374
+ // If there are pending LA resolutions, and we're responding to a query here,
375
+ // we want to make sure to force a new task, as otherwise once we tell lang about
376
+ // the LA resolution there wouldn't be any task to reply to with the result of iterating
377
+ // the workflow.
378
+ if has_query_responses && self.wfm.machines.has_pending_la_resolutions() {
379
+ force_new_wft = true;
380
+ }
369
381
  let to_be_sent = ServerCommandsWithWorkflowInfo {
370
382
  task_token: data.task_token,
371
383
  action: ActivationAction::WftComplete {
372
- force_new_wft: due_to_heartbeat_timeout,
384
+ force_new_wft,
373
385
  commands: outgoing_cmds.commands,
374
386
  query_responses,
375
387
  },
376
388
  };
377
389
 
378
- let should_respond = !(self.wfm.machines.has_pending_jobs()
379
- || outgoing_cmds.replaying
380
- || is_query_playback
381
- || no_commands_and_evicting);
382
390
  let outcome = if should_respond || has_query_responses {
383
391
  ActivationCompleteOutcome::ReportWFTSuccess(to_be_sent)
384
392
  } else {
@@ -24,7 +24,7 @@ use crate::{
24
24
  telemetry::VecDisplayer,
25
25
  worker::{
26
26
  activities::{ActivitiesFromWFTsHandle, PermittedTqResp},
27
- client::WorkerClient,
27
+ client::{WorkerClient, WorkflowTaskCompletion},
28
28
  workflow::{
29
29
  managed_run::{ManagedRun, WorkflowManager},
30
30
  wft_poller::validate_wft,
@@ -36,6 +36,7 @@ use crate::{
36
36
  };
37
37
  use futures::{stream::BoxStream, Stream, StreamExt};
38
38
  use std::{
39
+ collections::HashSet,
39
40
  fmt::{Debug, Display, Formatter},
40
41
  future::Future,
41
42
  ops::DerefMut,
@@ -43,12 +44,12 @@ use std::{
43
44
  sync::Arc,
44
45
  time::{Duration, Instant},
45
46
  };
46
- use temporal_client::WorkflowTaskCompletion;
47
47
  use temporal_sdk_core_api::errors::{CompleteWfError, PollWfError};
48
48
  use temporal_sdk_core_protos::{
49
49
  coresdk::{
50
50
  workflow_activation::{
51
- remove_from_cache::EvictionReason, QueryWorkflow, WorkflowActivation,
51
+ remove_from_cache::EvictionReason, workflow_activation_job, QueryWorkflow,
52
+ WorkflowActivation, WorkflowActivationJob,
52
53
  },
53
54
  workflow_commands::*,
54
55
  workflow_completion,
@@ -108,6 +109,7 @@ pub(super) struct WorkflowBasics {
108
109
  pub metrics: MetricsContext,
109
110
  pub namespace: String,
110
111
  pub task_queue: String,
112
+ pub ignore_evicts_on_shutdown: bool,
111
113
  }
112
114
 
113
115
  impl Workflows {
@@ -180,7 +182,7 @@ impl Workflows {
180
182
  }
181
183
  stream.next().await.unwrap_or(Err(PollWfError::ShutDown))?
182
184
  };
183
- Span::current().record("run_id", &r.run_id());
185
+ Span::current().record("run_id", r.run_id());
184
186
  match r {
185
187
  ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
186
188
  debug!(activation=%act, "Sending activation to lang");
@@ -548,6 +550,11 @@ struct ManagedRunHandle {
548
550
  run_actions_tx: UnboundedSender<RunAction>,
549
551
  /// Handle to the task where the actual machines live
550
552
  handle: JoinHandle<()>,
553
+
554
+ /// We track if we have recorded useful debugging values onto a certain span yet, to overcome
555
+ /// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
556
+ /// is fixed.
557
+ recorded_span_ids: HashSet<tracing::Id>,
551
558
  metrics: MetricsContext,
552
559
  }
553
560
  impl ManagedRunHandle {
@@ -569,9 +576,10 @@ impl ManagedRunHandle {
569
576
  more_pending_work: false,
570
577
  trying_to_evict: None,
571
578
  last_action_acked: true,
579
+ run_actions_tx,
572
580
  handle,
581
+ recorded_span_ids: Default::default(),
573
582
  metrics,
574
- run_actions_tx,
575
583
  }
576
584
  }
577
585
 
@@ -772,6 +780,11 @@ impl OutstandingActivation {
772
780
  pub struct WorkflowTaskInfo {
773
781
  pub task_token: TaskToken,
774
782
  pub attempt: u32,
783
+ /// Exists to allow easy tagging of spans with workflow ids. Is duplicative of info inside the
784
+ /// run machines themselves, but that can't be accessed easily. Would be nice to somehow have a
785
+ /// shared repository, or refcounts, or whatever, for strings like these that get duped all
786
+ /// sorts of places.
787
+ pub wf_id: String,
775
788
  }
776
789
 
777
790
  #[derive(Debug)]
@@ -892,10 +905,12 @@ fn validate_completion(
892
905
  )
893
906
  {
894
907
  return Err(CompleteWfError::MalformedWorkflowCompletion {
895
- reason: "Workflow completion had a legacy query response along with other \
896
- commands. This is not allowed and constitutes an error in the \
897
- lang SDK"
898
- .to_owned(),
908
+ reason: format!(
909
+ "Workflow completion had a legacy query response along with other \
910
+ commands. This is not allowed and constitutes an error in the \
911
+ lang SDK. Commands: {:?}",
912
+ commands
913
+ ),
899
914
  run_id: completion.run_id,
900
915
  });
901
916
  }
@@ -993,6 +1008,14 @@ enum RunUpdateResponseKind {
993
1008
  Good(GoodRunUpdate),
994
1009
  Fail(FailRunUpdate),
995
1010
  }
1011
+ impl RunUpdateResponseKind {
1012
+ pub(crate) fn run_id(&self) -> &str {
1013
+ match self {
1014
+ RunUpdateResponseKind::Good(g) => &g.run_id,
1015
+ RunUpdateResponseKind::Fail(f) => &f.run_id,
1016
+ }
1017
+ }
1018
+ }
996
1019
 
997
1020
  #[derive(Debug)]
998
1021
  struct GoodRunUpdate {
@@ -1078,6 +1101,7 @@ pub enum WFCommand {
1078
1101
  SignalExternalWorkflow(SignalExternalWorkflowExecution),
1079
1102
  CancelSignalWorkflow(CancelSignalWorkflow),
1080
1103
  UpsertSearchAttributes(UpsertWorkflowSearchAttributes),
1104
+ ModifyWorkflowProperties(ModifyWorkflowProperties),
1081
1105
  }
1082
1106
 
1083
1107
  impl TryFrom<WorkflowCommand> for WFCommand {
@@ -1119,6 +1143,9 @@ impl TryFrom<WorkflowCommand> for WFCommand {
1119
1143
  workflow_command::Variant::UpsertWorkflowSearchAttributes(s) => {
1120
1144
  Ok(Self::UpsertSearchAttributes(s))
1121
1145
  }
1146
+ workflow_command::Variant::ModifyWorkflowProperties(s) => {
1147
+ Ok(Self::ModifyWorkflowProperties(s))
1148
+ }
1122
1149
  }
1123
1150
  }
1124
1151
  }
@@ -1146,3 +1173,28 @@ pub struct WorkflowStartedInfo {
1146
1173
 
1147
1174
  type LocalActivityRequestSink =
1148
1175
  Arc<dyn Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution> + Send + Sync>;
1176
+
1177
+ /// Wraps outgoing activation job protos with some internal details core might care about
1178
+ #[derive(Debug, derive_more::Display)]
1179
+ #[display(fmt = "{}", variant)]
1180
+ struct OutgoingJob {
1181
+ variant: workflow_activation_job::Variant,
1182
+ /// Since LA resolutions are not distinguished from non-LA resolutions as far as lang is
1183
+ /// concerned, but core cares about that sometimes, attach that info here.
1184
+ is_la_resolution: bool,
1185
+ }
1186
+ impl<WA: Into<workflow_activation_job::Variant>> From<WA> for OutgoingJob {
1187
+ fn from(wa: WA) -> Self {
1188
+ Self {
1189
+ variant: wa.into(),
1190
+ is_la_resolution: false,
1191
+ }
1192
+ }
1193
+ }
1194
+ impl From<OutgoingJob> for WorkflowActivationJob {
1195
+ fn from(og: OutgoingJob) -> Self {
1196
+ Self {
1197
+ variant: Some(og.variant),
1198
+ }
1199
+ }
1200
+ }
@@ -69,7 +69,7 @@ mod tests {
69
69
  .times(1)
70
70
  .returning(|| Some(Ok(PollWorkflowTaskQueueResponse::default())));
71
71
  mock_poller.expect_poll().times(1).returning(|| None);
72
- let stream = new_wft_poller(Box::new(mock_poller), Default::default());
72
+ let stream = new_wft_poller(Box::new(mock_poller), MetricsContext::no_op());
73
73
  pin_mut!(stream);
74
74
  assert_matches!(stream.next().await, None);
75
75
  }
@@ -81,7 +81,7 @@ mod tests {
81
81
  .expect_poll()
82
82
  .times(1)
83
83
  .returning(|| Some(Err(tonic::Status::internal("ahhh"))));
84
- let stream = new_wft_poller(Box::new(mock_poller), Default::default());
84
+ let stream = new_wft_poller(Box::new(mock_poller), MetricsContext::no_op());
85
85
  pin_mut!(stream);
86
86
  assert_matches!(stream.next().await, Some(Err(_)));
87
87
  }
@@ -41,9 +41,27 @@ pub(crate) struct WFStream {
41
41
  /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
42
42
  wft_semaphore: MeteredSemaphore,
43
43
  shutdown_token: CancellationToken,
44
+ ignore_evicts_on_shutdown: bool,
44
45
 
45
46
  metrics: MetricsContext,
46
47
  }
48
+ impl WFStream {
49
+ fn record_span_fields(&mut self, run_id: &str, span: &Span) {
50
+ if let Some(run_handle) = self.runs.get_mut(run_id) {
51
+ if let Some(spid) = span.id() {
52
+ if run_handle.recorded_span_ids.contains(&spid) {
53
+ return;
54
+ }
55
+ run_handle.recorded_span_ids.insert(spid);
56
+
57
+ if let Some(wid) = run_handle.wft.as_ref().map(|wft| &wft.info.wf_id) {
58
+ span.record("workflow_id", wid.as_str());
59
+ }
60
+ }
61
+ }
62
+ }
63
+ }
64
+
47
65
  /// All possible inputs to the [WFStream]
48
66
  #[derive(derive_more::From, Debug)]
49
67
  enum WFStreamInput {
@@ -81,6 +99,18 @@ pub(super) enum LocalInputs {
81
99
  RequestEviction(RequestEvictMsg),
82
100
  GetStateInfo(GetStateInfoMsg),
83
101
  }
102
+ impl LocalInputs {
103
+ fn run_id(&self) -> Option<&str> {
104
+ Some(match self {
105
+ LocalInputs::Completion(c) => c.completion.run_id(),
106
+ LocalInputs::LocalResolution(lr) => &lr.run_id,
107
+ LocalInputs::PostActivation(pa) => &pa.run_id,
108
+ LocalInputs::RunUpdateResponse(rur) => rur.run_id(),
109
+ LocalInputs::RequestEviction(re) => &re.run_id,
110
+ LocalInputs::GetStateInfo(_) => return None,
111
+ })
112
+ }
113
+ }
84
114
  #[derive(Debug, derive_more::From)]
85
115
  #[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
86
116
  enum ExternalPollerInputs {
@@ -130,10 +160,9 @@ impl WFStream {
130
160
  MetricsContext::available_task_slots,
131
161
  );
132
162
  let wft_sem_clone = wft_semaphore.clone();
133
- let proceeder = move || {
134
- let wft_sem_clone = wft_sem_clone.clone();
135
- async move { wft_sem_clone.acquire_owned().await.unwrap() }
136
- };
163
+ let proceeder = stream::unfold(wft_sem_clone, |sem| async move {
164
+ Some((sem.acquire_owned().await.unwrap(), sem))
165
+ });
137
166
  let poller_wfts = stream_when_allowed(external_wfts, proceeder);
138
167
  let (run_update_tx, run_update_rx) = unbounded_channel();
139
168
  let local_rx = stream::select(
@@ -165,6 +194,7 @@ impl WFStream {
165
194
  client,
166
195
  wft_semaphore,
167
196
  shutdown_token: basics.shutdown_token,
197
+ ignore_evicts_on_shutdown: basics.ignore_evicts_on_shutdown,
168
198
  metrics: basics.metrics,
169
199
  };
170
200
  all_inputs
@@ -180,6 +210,9 @@ impl WFStream {
180
210
  }
181
211
  WFStreamInput::Local(local_input) => {
182
212
  let _span_g = local_input.span.enter();
213
+ if let Some(rid) = local_input.input.run_id() {
214
+ state.record_span_fields(rid, &local_input.span);
215
+ }
183
216
  match local_input.input {
184
217
  LocalInputs::RunUpdateResponse(resp) => {
185
218
  state.process_run_update_response(resp)
@@ -281,7 +314,7 @@ impl WFStream {
281
314
  // If there are in-poll queries, insert jobs for those queries into the
282
315
  // activation, but only if we hit the cache. If we didn't, those queries
283
316
  // will need to be dealt with once replay is over
284
- if !wft.pending_queries.is_empty() && wft.hit_cache {
317
+ if wft.hit_cache {
285
318
  put_queries_in_act(&mut activation, wft);
286
319
  }
287
320
  }
@@ -376,8 +409,9 @@ impl WFStream {
376
409
  }
377
410
  }
378
411
 
379
- #[instrument(level = "debug", skip(self, pwft),
380
- fields(run_id=%pwft.wft.workflow_execution.run_id))]
412
+ #[instrument(skip(self, pwft),
413
+ fields(run_id=%pwft.wft.workflow_execution.run_id,
414
+ workflow_id=%pwft.wft.workflow_execution.workflow_id))]
381
415
  fn instantiate_or_update(&mut self, pwft: PermittedWFT) {
382
416
  let (mut work, permit) = if let Some(w) = self.buffer_resp_if_outstanding_work(pwft) {
383
417
  (w.wft, w.permit)
@@ -407,6 +441,7 @@ impl WFStream {
407
441
  let wft_info = WorkflowTaskInfo {
408
442
  attempt: work.attempt,
409
443
  task_token: work.task_token,
444
+ wf_id: work.workflow_execution.workflow_id.clone(),
410
445
  };
411
446
  let poll_resp_is_incremental = work
412
447
  .history
@@ -476,8 +511,6 @@ impl WFStream {
476
511
  })
477
512
  }
478
513
 
479
- #[instrument(level = "debug", skip(self, complete),
480
- fields(run_id=%complete.completion.run_id()))]
481
514
  fn process_completion(&mut self, complete: WFActCompleteMsg) {
482
515
  match complete.completion {
483
516
  ValidatedCompletion::Success { run_id, commands } => {
@@ -691,7 +724,7 @@ impl WFStream {
691
724
  EvictionRequestResult::EvictionAlreadyRequested(attempts)
692
725
  }
693
726
  } else {
694
- warn!(run_id=%info.run_id, "Eviction requested for unknown run");
727
+ debug!(run_id=%info.run_id, "Eviction requested for unknown run");
695
728
  EvictionRequestResult::NotFound
696
729
  }
697
730
  }
@@ -875,7 +908,7 @@ impl WFStream {
875
908
  let all_runs_ready = self
876
909
  .runs
877
910
  .handles()
878
- .all(|r| !r.has_any_pending_work(true, false));
911
+ .all(|r| !r.has_any_pending_work(self.ignore_evicts_on_shutdown, false));
879
912
  if self.shutdown_token.is_cancelled() && all_runs_ready {
880
913
  info!("Workflow shutdown is done");
881
914
  true
@@ -931,6 +964,18 @@ impl WFStream {
931
964
 
932
965
  /// Drains pending queries from the workflow task and appends them to the activation's jobs
933
966
  fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
967
+ // Nothing to do if there are no pending queries
968
+ if wft.pending_queries.is_empty() {
969
+ return;
970
+ }
971
+
972
+ let has_legacy = wft.has_pending_legacy_query();
973
+ // Cannot dispatch legacy query if there are any other jobs - which can happen if, ex, a local
974
+ // activity resolves while we've gotten a legacy query after heartbeating.
975
+ if has_legacy && !act.jobs.is_empty() {
976
+ return;
977
+ }
978
+
934
979
  debug!(queries=?wft.pending_queries, "Dispatching queries");
935
980
  let query_jobs = wft
936
981
  .pending_queries
@@ -13,14 +13,15 @@ categories = ["development-tools"]
13
13
  # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
14
14
 
15
15
  [dependencies]
16
- anyhow = "1.0"
17
16
  async-trait = "0.1"
18
- derive_builder = "0.11"
19
- log = "0.4"
17
+ derive_builder = "0.12"
20
18
  opentelemetry = "0.18"
21
19
  prost-types = "0.11"
20
+ serde_json = "1.0"
22
21
  thiserror = "1.0"
23
22
  tonic = "0.8"
23
+ tracing-core = "0.1"
24
+ url = "2.3"
24
25
 
25
26
  [dependencies.temporal-sdk-core-protos]
26
27
  path = "../sdk-core-protos"
@@ -1,13 +1,11 @@
1
1
  pub mod errors;
2
+ pub mod telemetry;
2
3
  pub mod worker;
3
4
 
4
5
  use crate::{
5
6
  errors::{CompleteActivityError, CompleteWfError, PollActivityError, PollWfError},
6
7
  worker::WorkerConfig,
7
8
  };
8
- use log::Level;
9
- use opentelemetry::metrics::Meter;
10
- use std::time::{Duration, SystemTime, UNIX_EPOCH};
11
9
  use temporal_sdk_core_protos::coresdk::{
12
10
  activity_task::ActivityTask, workflow_activation::WorkflowActivation,
13
11
  workflow_completion::WorkflowActivationCompletion, ActivityHeartbeat, ActivityTaskCompletion,
@@ -109,43 +107,3 @@ pub trait Worker: Send + Sync {
109
107
  /// This should be called only after [Worker::shutdown] has resolved.
110
108
  async fn finalize_shutdown(self);
111
109
  }
112
-
113
- /// Should be backed by a process-wide singleton who is responsible for telemetry and logging
114
- /// management.
115
- pub trait CoreTelemetry {
116
- /// Core buffers logs that should be shuttled over to lang so that they may be rendered with
117
- /// the user's desired logging library. Use this function to grab the most recent buffered logs
118
- /// since the last time it was called. A fixed number of such logs are retained at maximum, with
119
- /// the oldest being dropped when full.
120
- ///
121
- /// Returns the list of logs from oldest to newest. Returns an empty vec if the feature is not
122
- /// configured.
123
- fn fetch_buffered_logs(&self) -> Vec<CoreLog>;
124
-
125
- /// If metrics gathering is enabled, returns the OTel meter for core telemetry, which can be
126
- /// used to create metrics instruments, or passed to things that create/record metrics (ex:
127
- /// clients).
128
- fn get_metric_meter(&self) -> Option<&Meter>;
129
- }
130
-
131
- /// A log line (which ultimately came from a tracing event) exported from Core->Lang
132
- #[derive(Debug)]
133
- pub struct CoreLog {
134
- /// Log message
135
- pub message: String,
136
- /// Time log was generated (not when it was exported to lang)
137
- pub timestamp: SystemTime,
138
- /// Message level
139
- pub level: Level,
140
- // KV pairs aren't meaningfully exposed yet to the log interface by tracing
141
- }
142
-
143
- impl CoreLog {
144
- /// Return timestamp as ms since epoch
145
- pub fn millis_since_epoch(&self) -> u128 {
146
- self.timestamp
147
- .duration_since(UNIX_EPOCH)
148
- .unwrap_or(Duration::ZERO)
149
- .as_millis()
150
- }
151
- }
@@ -0,0 +1,147 @@
1
+ use opentelemetry::metrics::Meter;
2
+ use std::{
3
+ collections::HashMap,
4
+ net::SocketAddr,
5
+ time::{Duration, SystemTime, UNIX_EPOCH},
6
+ };
7
+ use tracing_core::Level;
8
+ use url::Url;
9
+
10
+ /// Each core runtime instance has a telemetry subsystem associated with it, this trait defines the
11
+ /// operations that lang might want to perform on that telemetry after it's initialized.
12
+ pub trait CoreTelemetry {
13
+ /// Each worker buffers logs that should be shuttled over to lang so that they may be rendered
14
+ /// with the user's desired logging library. Use this function to grab the most recent buffered
15
+ /// logs since the last time it was called. A fixed number of such logs are retained at maximum,
16
+ /// with the oldest being dropped when full.
17
+ ///
18
+ /// Returns the list of logs from oldest to newest. Returns an empty vec if the feature is not
19
+ /// configured.
20
+ fn fetch_buffered_logs(&self) -> Vec<CoreLog>;
21
+
22
+ /// If metrics gathering is enabled, returns the OTel meter for core telemetry, which can be
23
+ /// used to create metrics instruments, or passed to things that create/record metrics (ex:
24
+ /// clients).
25
+ fn get_metric_meter(&self) -> Option<&Meter>;
26
+ }
27
+
28
+ /// Telemetry configuration options. Construct with [TelemetryOptionsBuilder]
29
+ #[derive(Debug, Clone, derive_builder::Builder)]
30
+ #[non_exhaustive]
31
+ pub struct TelemetryOptions {
32
+ /// Optional trace exporter - set as None to disable.
33
+ #[builder(setter(into, strip_option), default)]
34
+ pub tracing: Option<TraceExportConfig>,
35
+ /// Optional logger - set as None to disable.
36
+ #[builder(setter(into, strip_option), default)]
37
+ pub logging: Option<Logger>,
38
+ /// Optional metrics exporter - set as None to disable.
39
+ #[builder(setter(into, strip_option), default)]
40
+ pub metrics: Option<MetricsExporter>,
41
+
42
+ /// If set true, do not prefix metrics with `temporal_`. Will be removed eventually as
43
+ /// the prefix is consistent with other SDKs.
44
+ #[builder(default)]
45
+ pub no_temporal_prefix_for_metrics: bool,
46
+
47
+ /// Specifies the aggregation temporality for metric export. Defaults to cumulative.
48
+ #[builder(default = "MetricTemporality::Cumulative")]
49
+ pub metric_temporality: MetricTemporality,
50
+ }
51
+
52
+ /// Options for exporting to an OpenTelemetry Collector
53
+ #[derive(Debug, Clone)]
54
+ pub struct OtelCollectorOptions {
55
+ /// The url of the OTel collector to export telemetry and metrics to. Lang SDK should also
56
+ /// export to this same collector.
57
+ pub url: Url,
58
+ /// Optional set of HTTP headers to send to the Collector, e.g for authentication.
59
+ pub headers: HashMap<String, String>,
60
+ /// Optionally specify how frequently metrics should be exported. Defaults to 1 second.
61
+ pub metric_periodicity: Option<Duration>,
62
+ }
63
+
64
+ /// Configuration for the external export of traces
65
+ #[derive(Debug, Clone)]
66
+ pub struct TraceExportConfig {
67
+ /// An [EnvFilter](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/struct.EnvFilter.html) filter string.
68
+ pub filter: String,
69
+ /// Where they should go
70
+ pub exporter: TraceExporter,
71
+ }
72
+
73
+ /// Control where traces are exported.
74
+ #[derive(Debug, Clone)]
75
+ pub enum TraceExporter {
76
+ /// Export traces to an OpenTelemetry Collector <https://opentelemetry.io/docs/collector/>.
77
+ Otel(OtelCollectorOptions),
78
+ }
79
+
80
+ /// Control where metrics are exported
81
+ #[derive(Debug, Clone)]
82
+ pub enum MetricsExporter {
83
+ /// Export metrics to an OpenTelemetry Collector <https://opentelemetry.io/docs/collector/>.
84
+ Otel(OtelCollectorOptions),
85
+ /// Expose metrics directly via an embedded http server bound to the provided address.
86
+ Prometheus(SocketAddr),
87
+ }
88
+
89
+ /// Control where logs go
90
+ #[derive(Debug, Clone)]
91
+ pub enum Logger {
92
+ /// Log directly to console.
93
+ Console {
94
+ /// An [EnvFilter](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/struct.EnvFilter.html) filter string.
95
+ filter: String,
96
+ },
97
+ /// Forward logs to Lang - collectable with `fetch_global_buffered_logs`.
98
+ Forward {
99
+ /// An [EnvFilter](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/struct.EnvFilter.html) filter string.
100
+ filter: String,
101
+ },
102
+ }
103
+
104
+ /// Types of aggregation temporality for metric export.
105
+ /// See: <https://github.com/open-telemetry/opentelemetry-specification/blob/ce50e4634efcba8da445cc23523243cb893905cb/specification/metrics/datamodel.md#temporality>
106
+ #[derive(Debug, Clone, Copy)]
107
+ pub enum MetricTemporality {
108
+ /// Successive data points repeat the starting timestamp
109
+ Cumulative,
110
+ /// Successive data points advance the starting timestamp
111
+ Delta,
112
+ }
113
+
114
+ impl Default for TelemetryOptions {
115
+ fn default() -> Self {
116
+ TelemetryOptionsBuilder::default().build().unwrap()
117
+ }
118
+ }
119
+
120
+ /// A log line (which ultimately came from a tracing event) exported from Core->Lang
121
+ #[derive(Debug)]
122
+ pub struct CoreLog {
123
+ /// The module within core this message originated from
124
+ pub target: String,
125
+ /// Log message
126
+ pub message: String,
127
+ /// Time log was generated (not when it was exported to lang)
128
+ pub timestamp: SystemTime,
129
+ /// Message level
130
+ pub level: Level,
131
+ /// Arbitrary k/v pairs (span k/vs are collapsed with event k/vs here). We could change this
132
+ /// to include them in `span_contexts` instead, but there's probably not much value for log
133
+ /// forwarding.
134
+ pub fields: HashMap<String, serde_json::Value>,
135
+ /// A list of the outermost to the innermost span names
136
+ pub span_contexts: Vec<String>,
137
+ }
138
+
139
+ impl CoreLog {
140
+ /// Return timestamp as ms since epoch
141
+ pub fn millis_since_epoch(&self) -> u128 {
142
+ self.timestamp
143
+ .duration_since(UNIX_EPOCH)
144
+ .unwrap_or(Duration::ZERO)
145
+ .as_millis()
146
+ }
147
+ }
@@ -95,6 +95,16 @@ pub struct WorkerConfig {
95
95
  /// todo: link to feature docs
96
96
  #[builder(default = "false")]
97
97
  pub use_worker_versioning: bool,
98
+
99
+ /// If set false (default), shutdown will not finish until all pending evictions have been
100
+ /// issued and replied to. If set true shutdown will be considered complete when the only
101
+ /// remaining work is pending evictions.
102
+ ///
103
+ /// This flag is useful during tests to avoid needing to deal with lots of uninteresting
104
+ /// evictions during shutdown. Alternatively, if a lang implementation finds it easy to clean
105
+ /// up during shutdown, setting this true saves some back-and-forth.
106
+ #[builder(default = "false")]
107
+ pub ignore_evicts_on_shutdown: bool,
98
108
  }
99
109
 
100
110
  impl WorkerConfig {
@@ -114,6 +124,9 @@ impl WorkerConfigBuilder {
114
124
  if self.max_concurrent_wft_polls == Some(0) {
115
125
  return Err("`max_concurrent_wft_polls` must be at least 1".to_owned());
116
126
  }
127
+ if self.max_concurrent_at_polls == Some(0) {
128
+ return Err("`max_concurrent_at_polls` must be at least 1".to_owned());
129
+ }
117
130
  if self.max_cached_workflows > Some(0)
118
131
  && self.max_outstanding_workflow_tasks > self.max_cached_workflows
119
132
  {
@@ -4,7 +4,7 @@ error[E0277]: the trait bound `One: From<Two>` is not satisfied
4
4
  11 | Two --(B)--> One;
5
5
  | ^^^ the trait `From<Two>` is not implemented for `One`
6
6
  |
7
- = note: required because of the requirements on the impl of `Into<One>` for `Two`
7
+ = note: required for `Two` to implement `Into<One>`
8
8
  note: required by a bound in `TransitionResult::<Sm, Ds>::from`
9
9
  --> $WORKSPACE/fsm/rustfsm_trait/src/lib.rs
10
10
  |
@@ -1,4 +1,4 @@
1
1
  # Syntax is here:
2
2
  # https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax
3
3
 
4
- * @temporalio/server @temporalio/sdk
4
+ * @temporalio/server @temporalio/sdk
@@ -1,8 +1,5 @@
1
1
  version: v1
2
2
  breaking:
3
- ignore:
4
- - temporal/api/schedule/v1
5
- - temporal/api/update/v1
6
3
  use:
7
4
  - PACKAGE
8
5
  lint: