@temporalio/core-bridge 1.11.7 → 1.11.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/Cargo.lock +504 -341
  2. package/package.json +3 -3
  3. package/releases/aarch64-apple-darwin/index.node +0 -0
  4. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  5. package/releases/x86_64-apple-darwin/index.node +0 -0
  6. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  7. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  8. package/sdk-core/.cargo/config.toml +5 -0
  9. package/sdk-core/.github/workflows/per-pr.yml +59 -5
  10. package/sdk-core/Cargo.toml +3 -2
  11. package/sdk-core/client/Cargo.toml +3 -3
  12. package/sdk-core/client/src/lib.rs +154 -161
  13. package/sdk-core/client/src/metrics.rs +15 -8
  14. package/sdk-core/client/src/proxy.rs +1 -1
  15. package/sdk-core/client/src/raw.rs +176 -33
  16. package/sdk-core/client/src/retry.rs +102 -465
  17. package/sdk-core/client/src/worker_registry/mod.rs +2 -2
  18. package/sdk-core/client/src/workflow_handle/mod.rs +19 -1
  19. package/sdk-core/core/Cargo.toml +12 -14
  20. package/sdk-core/core/benches/workflow_replay.rs +1 -1
  21. package/sdk-core/core/src/abstractions.rs +2 -2
  22. package/sdk-core/core/src/core_tests/activity_tasks.rs +99 -46
  23. package/sdk-core/core/src/core_tests/child_workflows.rs +68 -9
  24. package/sdk-core/core/src/core_tests/determinism.rs +2 -2
  25. package/sdk-core/core/src/core_tests/local_activities.rs +20 -33
  26. package/sdk-core/core/src/core_tests/mod.rs +7 -8
  27. package/sdk-core/core/src/core_tests/queries.rs +79 -79
  28. package/sdk-core/core/src/core_tests/replay_flag.rs +5 -5
  29. package/sdk-core/core/src/core_tests/updates.rs +6 -6
  30. package/sdk-core/core/src/core_tests/workers.rs +19 -22
  31. package/sdk-core/core/src/core_tests/workflow_cancels.rs +3 -3
  32. package/sdk-core/core/src/core_tests/workflow_tasks.rs +154 -106
  33. package/sdk-core/core/src/ephemeral_server/mod.rs +66 -10
  34. package/sdk-core/core/src/internal_flags.rs +103 -12
  35. package/sdk-core/core/src/lib.rs +21 -13
  36. package/sdk-core/core/src/pollers/mod.rs +200 -6
  37. package/sdk-core/core/src/pollers/poll_buffer.rs +32 -8
  38. package/sdk-core/core/src/protosext/mod.rs +7 -7
  39. package/sdk-core/core/src/protosext/protocol_messages.rs +2 -2
  40. package/sdk-core/core/src/replay/mod.rs +8 -9
  41. package/sdk-core/core/src/retry_logic.rs +8 -6
  42. package/sdk-core/core/src/telemetry/log_export.rs +4 -4
  43. package/sdk-core/core/src/telemetry/metrics.rs +111 -25
  44. package/sdk-core/core/src/telemetry/mod.rs +11 -4
  45. package/sdk-core/core/src/telemetry/otel.rs +108 -144
  46. package/sdk-core/core/src/telemetry/prometheus_server.rs +1 -4
  47. package/sdk-core/core/src/test_help/mod.rs +27 -21
  48. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +7 -5
  49. package/sdk-core/core/src/worker/activities/local_activities.rs +9 -9
  50. package/sdk-core/core/src/worker/activities.rs +34 -46
  51. package/sdk-core/core/src/worker/client/mocks.rs +24 -2
  52. package/sdk-core/core/src/worker/client.rs +169 -33
  53. package/sdk-core/core/src/worker/mod.rs +132 -56
  54. package/sdk-core/core/src/worker/nexus.rs +410 -0
  55. package/sdk-core/core/src/worker/tuner/resource_based.rs +27 -5
  56. package/sdk-core/core/src/worker/tuner.rs +29 -2
  57. package/sdk-core/core/src/worker/workflow/driven_workflow.rs +8 -3
  58. package/sdk-core/core/src/worker/workflow/history_update.rs +5 -8
  59. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +83 -87
  60. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +38 -38
  61. package/sdk-core/core/src/worker/workflow/machines/cancel_nexus_op_state_machine.rs +117 -0
  62. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +8 -18
  63. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +114 -108
  64. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +16 -31
  65. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -14
  66. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +8 -15
  67. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +34 -75
  68. package/sdk-core/core/src/worker/workflow/machines/mod.rs +26 -48
  69. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +10 -17
  70. package/sdk-core/core/src/worker/workflow/machines/nexus_operation_state_machine.rs +543 -0
  71. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +22 -31
  72. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +53 -51
  73. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +40 -45
  74. package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
  75. package/sdk-core/core/src/worker/workflow/machines/update_state_machine.rs +8 -10
  76. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +24 -30
  77. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +182 -116
  78. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +4 -8
  79. package/sdk-core/core/src/worker/workflow/managed_run.rs +75 -45
  80. package/sdk-core/core/src/worker/workflow/mod.rs +104 -55
  81. package/sdk-core/core/src/worker/workflow/run_cache.rs +23 -4
  82. package/sdk-core/core/src/worker/workflow/wft_extraction.rs +4 -4
  83. package/sdk-core/core/src/worker/workflow/wft_poller.rs +3 -3
  84. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +32 -13
  85. package/sdk-core/core-api/Cargo.toml +2 -3
  86. package/sdk-core/core-api/src/errors.rs +22 -20
  87. package/sdk-core/core-api/src/lib.rs +24 -5
  88. package/sdk-core/core-api/src/telemetry/metrics.rs +27 -1
  89. package/sdk-core/core-api/src/telemetry.rs +37 -3
  90. package/sdk-core/core-api/src/worker.rs +36 -3
  91. package/sdk-core/docker/docker-compose-ci.yaml +25 -0
  92. package/sdk-core/etc/otel-collector-ci.yaml +36 -0
  93. package/sdk-core/etc/otel-collector-config.yaml +3 -3
  94. package/sdk-core/etc/prometheus.yaml +1 -1
  95. package/sdk-core/fsm/Cargo.toml +1 -1
  96. package/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +1 -1
  97. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +3 -4
  98. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
  99. package/sdk-core/fsm/rustfsm_trait/Cargo.toml +1 -1
  100. package/sdk-core/sdk/Cargo.toml +1 -2
  101. package/sdk-core/sdk/src/activity_context.rs +1 -1
  102. package/sdk-core/sdk/src/interceptors.rs +1 -1
  103. package/sdk-core/sdk/src/lib.rs +126 -54
  104. package/sdk-core/sdk/src/workflow_context/options.rs +184 -74
  105. package/sdk-core/sdk/src/workflow_context.rs +193 -79
  106. package/sdk-core/sdk/src/workflow_future.rs +151 -131
  107. package/sdk-core/sdk-core-protos/Cargo.toml +3 -4
  108. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/VERSION +1 -1
  109. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/account/v1/message.proto +46 -0
  110. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/cloudservice/v1/request_response.proto +254 -5
  111. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/cloudservice/v1/service.proto +108 -2
  112. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/identity/v1/message.proto +94 -15
  113. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/namespace/v1/message.proto +102 -4
  114. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/nexus/v1/message.proto +84 -0
  115. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/operation/v1/message.proto +25 -10
  116. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/region/v1/message.proto +14 -1
  117. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/resource/v1/message.proto +25 -0
  118. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/sink/v1/message.proto +41 -0
  119. package/sdk-core/sdk-core-protos/protos/api_cloud_upstream/temporal/api/cloud/usage/v1/message.proto +59 -0
  120. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/PULL_REQUEST_TEMPLATE.md +2 -0
  121. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/workflows/create-release.yml +135 -0
  122. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/workflows/push-to-buf.yml +20 -0
  123. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/workflows/trigger-api-go-delete-release.yml +13 -0
  124. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/workflows/trigger-api-go-publish-release.yml +13 -0
  125. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/workflows/trigger-api-go-update.yml +13 -21
  126. package/sdk-core/sdk-core-protos/protos/api_upstream/Makefile +2 -2
  127. package/sdk-core/sdk-core-protos/protos/api_upstream/buf.yaml +1 -0
  128. package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv2.json +3386 -1047
  129. package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv3.yaml +3529 -1144
  130. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/batch/v1/message.proto +39 -1
  131. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/command/v1/message.proto +6 -0
  132. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/common/v1/message.proto +39 -1
  133. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/deployment/v1/message.proto +252 -0
  134. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +1 -0
  135. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/common.proto +6 -0
  136. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/deployment.proto +96 -0
  137. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/event_type.proto +2 -0
  138. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +2 -0
  139. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/nexus.proto +42 -0
  140. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -0
  141. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/workflow.proto +43 -2
  142. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/errordetails/v1/message.proto +13 -1
  143. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/failure/v1/message.proto +14 -0
  144. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/history/v1/message.proto +70 -12
  145. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/nexus/v1/message.proto +12 -0
  146. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/query/v1/message.proto +9 -2
  147. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +46 -2
  148. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflow/v1/message.proto +206 -0
  149. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +482 -97
  150. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +230 -43
  151. package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/core_interface.proto +6 -0
  152. package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/nexus/nexus.proto +71 -0
  153. package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +46 -2
  154. package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +55 -9
  155. package/sdk-core/sdk-core-protos/src/history_builder.rs +5 -5
  156. package/sdk-core/sdk-core-protos/src/history_info.rs +5 -6
  157. package/sdk-core/sdk-core-protos/src/lib.rs +414 -34
  158. package/sdk-core/sdk-core-protos/src/task_token.rs +1 -1
  159. package/sdk-core/test-utils/Cargo.toml +3 -11
  160. package/sdk-core/test-utils/src/canned_histories.rs +1 -1
  161. package/sdk-core/test-utils/src/lib.rs +159 -85
  162. package/sdk-core/tests/fuzzy_workflow.rs +3 -3
  163. package/sdk-core/tests/heavy_tests.rs +3 -3
  164. package/sdk-core/tests/integ_tests/client_tests.rs +171 -20
  165. package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +45 -39
  166. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +7 -6
  167. package/sdk-core/tests/integ_tests/metrics_tests.rs +492 -35
  168. package/sdk-core/tests/integ_tests/polling_tests.rs +7 -5
  169. package/sdk-core/tests/integ_tests/queries_tests.rs +14 -17
  170. package/sdk-core/tests/integ_tests/update_tests.rs +47 -44
  171. package/sdk-core/tests/integ_tests/visibility_tests.rs +4 -3
  172. package/sdk-core/tests/integ_tests/worker_tests.rs +5 -5
  173. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +15 -13
  174. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +28 -14
  175. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +7 -1
  176. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +57 -4
  177. package/sdk-core/tests/integ_tests/workflow_tests/eager.rs +1 -1
  178. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +24 -18
  179. package/sdk-core/tests/integ_tests/workflow_tests/nexus.rs +506 -0
  180. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +1 -1
  181. package/sdk-core/tests/integ_tests/workflow_tests/priority.rs +104 -0
  182. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +34 -31
  183. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
  184. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -7
  185. package/sdk-core/tests/integ_tests/workflow_tests.rs +152 -116
  186. package/sdk-core/tests/main.rs +36 -6
  187. package/sdk-core/tests/runner.rs +30 -9
  188. package/src/conversions/slot_supplier_bridge.rs +4 -0
  189. package/src/conversions.rs +1 -0
  190. package/src/worker.rs +5 -7
  191. package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +0 -78
@@ -0,0 +1,410 @@
1
+ use crate::{
2
+ abstractions::UsedMeteredSemPermit,
3
+ pollers::{BoxedNexusPoller, NexusPollItem, new_nexus_task_poller},
4
+ telemetry::{
5
+ metrics,
6
+ metrics::{FailureReason, MetricsContext},
7
+ },
8
+ worker::client::WorkerClient,
9
+ };
10
+ use anyhow::anyhow;
11
+ use futures_util::{
12
+ Stream, StreamExt, stream,
13
+ stream::{BoxStream, PollNext},
14
+ };
15
+ use std::{
16
+ collections::HashMap,
17
+ sync::{
18
+ Arc,
19
+ atomic::{AtomicBool, Ordering},
20
+ },
21
+ time::{Duration, Instant, SystemTime},
22
+ };
23
+ use temporal_sdk_core_api::{
24
+ errors::{CompleteNexusError, PollError},
25
+ worker::NexusSlotKind,
26
+ };
27
+ use temporal_sdk_core_protos::{
28
+ TaskToken,
29
+ coresdk::{
30
+ NexusSlotInfo,
31
+ nexus::{
32
+ CancelNexusTask, NexusTask, NexusTaskCancelReason, nexus_task, nexus_task_completion,
33
+ },
34
+ },
35
+ temporal::api::nexus::v1::{request::Variant, response, start_operation_response},
36
+ };
37
+ use tokio::{
38
+ join,
39
+ sync::{Mutex, Notify, mpsc::UnboundedSender},
40
+ task::JoinHandle,
41
+ };
42
+ use tokio_stream::wrappers::UnboundedReceiverStream;
43
+ use tokio_util::sync::CancellationToken;
44
+
45
+ static REQUEST_TIMEOUT_HEADER: &str = "Request-Timeout";
46
+
47
+ /// Centralizes all state related to received nexus tasks
48
+ pub(super) struct NexusManager {
49
+ task_stream: Mutex<BoxStream<'static, Result<NexusTask, PollError>>>,
50
+ /// Token to notify when poll returned a shutdown error
51
+ poll_returned_shutdown_token: CancellationToken,
52
+ /// Outstanding nexus tasks that have been issued to lang but not yet completed
53
+ outstanding_task_map: OutstandingTaskMap,
54
+ /// Notified every time a task in the map is completed
55
+ task_completed_notify: Arc<Notify>,
56
+
57
+ ever_polled: AtomicBool,
58
+ metrics: MetricsContext,
59
+ }
60
+
61
+ impl NexusManager {
62
+ pub(super) fn new(
63
+ poller: BoxedNexusPoller,
64
+ metrics: MetricsContext,
65
+ graceful_shutdown: Option<Duration>,
66
+ shutdown_initiated_token: CancellationToken,
67
+ ) -> Self {
68
+ let source_stream =
69
+ new_nexus_task_poller(poller, metrics.clone(), shutdown_initiated_token);
70
+ let (cancels_tx, cancels_rx) = tokio::sync::mpsc::unbounded_channel();
71
+ let task_stream_input = stream::select_with_strategy(
72
+ UnboundedReceiverStream::new(cancels_rx).map(TaskStreamInput::from),
73
+ source_stream
74
+ .map(TaskStreamInput::from)
75
+ .chain(stream::once(async move { TaskStreamInput::SourceComplete })),
76
+ |_: &mut ()| PollNext::Left,
77
+ );
78
+ let task_completed_notify = Arc::new(Notify::new());
79
+ let task_stream = NexusTaskStream::new(
80
+ task_stream_input,
81
+ cancels_tx,
82
+ task_completed_notify.clone(),
83
+ graceful_shutdown,
84
+ metrics.clone(),
85
+ );
86
+ let outstanding_task_map = task_stream.outstanding_task_map.clone();
87
+ Self {
88
+ task_stream: Mutex::new(task_stream.into_stream().boxed()),
89
+ poll_returned_shutdown_token: CancellationToken::new(),
90
+ outstanding_task_map,
91
+ task_completed_notify,
92
+ ever_polled: AtomicBool::new(false),
93
+ metrics,
94
+ }
95
+ }
96
+
97
+ /// Block until then next nexus task is received from server
98
+ pub(super) async fn next_nexus_task(&self) -> Result<NexusTask, PollError> {
99
+ self.ever_polled.store(true, Ordering::Relaxed);
100
+ let mut sl = self.task_stream.lock().await;
101
+ let r = sl.next().await.unwrap_or_else(|| Err(PollError::ShutDown));
102
+ // This can't happen in the or_else closure because ShutDown is typically returned by the
103
+ // stream directly, before it terminates.
104
+ if let Err(PollError::ShutDown) = &r {
105
+ self.poll_returned_shutdown_token.cancel();
106
+ }
107
+ r
108
+ }
109
+
110
+ pub(super) async fn complete_task(
111
+ &self,
112
+ tt: TaskToken,
113
+ status: nexus_task_completion::Status,
114
+ client: &dyn WorkerClient,
115
+ ) -> Result<(), CompleteNexusError> {
116
+ let removed = self.outstanding_task_map.lock().remove(&tt);
117
+ if let Some(task_info) = removed {
118
+ self.metrics
119
+ .nexus_task_execution_latency(task_info.start_time.elapsed());
120
+ task_info.timeout_task.inspect(|jh| jh.abort());
121
+ let (did_send, maybe_net_err) = match status {
122
+ nexus_task_completion::Status::Completed(c) => {
123
+ // Server doesn't provide obvious errors for this validation, so it's done
124
+ // here to make life easier for lang implementors.
125
+ match &c.variant {
126
+ Some(response::Variant::StartOperation(so)) => {
127
+ if let Some(start_operation_response::Variant::OperationError(oe)) =
128
+ so.variant.as_ref()
129
+ {
130
+ self.metrics
131
+ .with_new_attrs([metrics::failure_reason(
132
+ FailureReason::NexusOperation(oe.operation_state.clone()),
133
+ )])
134
+ .nexus_task_execution_failed();
135
+ };
136
+ if task_info.request_kind != RequestKind::Start {
137
+ return Err(CompleteNexusError::MalformedNexusCompletion {
138
+ reason: "Nexus response was StartOperation but request was not"
139
+ .to_string(),
140
+ });
141
+ }
142
+ }
143
+ Some(response::Variant::CancelOperation(_)) => {
144
+ if task_info.request_kind != RequestKind::Cancel {
145
+ return Err(CompleteNexusError::MalformedNexusCompletion {
146
+ reason:
147
+ "Nexus response was CancelOperation but request was not"
148
+ .to_string(),
149
+ });
150
+ }
151
+ }
152
+ None => {
153
+ return Err(CompleteNexusError::MalformedNexusCompletion {
154
+ reason: "Nexus completion must contain a status variant "
155
+ .to_string(),
156
+ });
157
+ }
158
+ }
159
+ (true, client.complete_nexus_task(tt, c).await.err())
160
+ }
161
+ nexus_task_completion::Status::AckCancel(_) => {
162
+ self.metrics
163
+ .with_new_attrs([metrics::failure_reason(FailureReason::Timeout)])
164
+ .nexus_task_execution_failed();
165
+ (false, None)
166
+ }
167
+ nexus_task_completion::Status::Error(e) => {
168
+ self.metrics
169
+ .with_new_attrs([metrics::failure_reason(
170
+ FailureReason::NexusHandlerError(e.error_type.clone()),
171
+ )])
172
+ .nexus_task_execution_failed();
173
+ (true, client.fail_nexus_task(tt, e).await.err())
174
+ }
175
+ };
176
+
177
+ self.task_completed_notify.notify_waiters();
178
+
179
+ if let Some(e) = maybe_net_err {
180
+ if e.code() == tonic::Code::NotFound {
181
+ warn!(details=?e, "Nexus task not found on completion. This \
182
+ may happen if the operation has already been cancelled but completed anyway.");
183
+ } else {
184
+ warn!(error=?e, "Network error while completing Nexus task");
185
+ }
186
+ } else if did_send {
187
+ // Record e2e latency if we sent replied to server without an RPC error
188
+ if let Some(elapsed) = task_info.scheduled_time.and_then(|t| t.elapsed().ok()) {
189
+ self.metrics.nexus_task_e2e_latency(elapsed);
190
+ }
191
+ }
192
+ } else {
193
+ warn!(
194
+ "Attempted to complete nexus task {} but we were not tracking it",
195
+ &tt
196
+ );
197
+ }
198
+ Ok(())
199
+ }
200
+
201
+ pub(super) async fn shutdown(&self) {
202
+ if !self.ever_polled.load(Ordering::Relaxed) {
203
+ return;
204
+ }
205
+ self.poll_returned_shutdown_token.cancelled().await;
206
+ }
207
+ }
208
+
209
+ struct NexusTaskStream<S> {
210
+ source_stream: S,
211
+ outstanding_task_map: OutstandingTaskMap,
212
+ cancels_tx: UnboundedSender<CancelNexusTask>,
213
+ task_completed_notify: Arc<Notify>,
214
+ grace_period: Option<Duration>,
215
+ metrics: MetricsContext,
216
+ }
217
+
218
+ impl<S> NexusTaskStream<S>
219
+ where
220
+ S: Stream<Item = TaskStreamInput>,
221
+ {
222
+ fn new(
223
+ source: S,
224
+ cancels_tx: UnboundedSender<CancelNexusTask>,
225
+ task_completed_notify: Arc<Notify>,
226
+ grace_period: Option<Duration>,
227
+ metrics: MetricsContext,
228
+ ) -> Self {
229
+ Self {
230
+ source_stream: source,
231
+ outstanding_task_map: Arc::new(Default::default()),
232
+ cancels_tx,
233
+ task_completed_notify,
234
+ grace_period,
235
+ metrics,
236
+ }
237
+ }
238
+
239
+ fn into_stream(self) -> impl Stream<Item = Result<NexusTask, PollError>> {
240
+ let outstanding_task_clone = self.outstanding_task_map.clone();
241
+ let source_done = CancellationToken::new();
242
+ let source_done_clone = source_done.clone();
243
+ let cancels_tx_clone = self.cancels_tx.clone();
244
+ self.source_stream
245
+ .filter_map(move |t| {
246
+ let res = match t {
247
+ TaskStreamInput::Poll(Ok(t)) => {
248
+ if let Some(dur) = t.resp.sched_to_start() {
249
+ self.metrics.nexus_task_sched_to_start_latency(dur);
250
+ };
251
+
252
+ let tt = TaskToken(t.resp.task_token.clone());
253
+ let mut timeout_task = None;
254
+ if let Some(timeout_str) = t
255
+ .resp
256
+ .request
257
+ .as_ref()
258
+ .and_then(|r| r.header.get(REQUEST_TIMEOUT_HEADER))
259
+ {
260
+ if let Ok(timeout_dur) = parse_request_timeout(timeout_str) {
261
+ let tt_clone = tt.clone();
262
+ let cancels_tx = self.cancels_tx.clone();
263
+ timeout_task = Some(tokio::task::spawn(async move {
264
+ tokio::time::sleep(timeout_dur).await;
265
+ debug!(
266
+ task_token=%tt_clone,
267
+ "Timing out nexus task due to elapsed local timeout timer"
268
+ );
269
+ let _ = cancels_tx.send(CancelNexusTask {
270
+ task_token: tt_clone.0,
271
+ reason: NexusTaskCancelReason::TimedOut.into(),
272
+ });
273
+ }));
274
+ } else {
275
+ // This could auto-respond and fail the nexus task, but given that
276
+ // the server is going to try to parse this as well, and all we're
277
+ // doing with this parsing is notifying the handler of a local
278
+ // timeout, it seems reasonable to rely on server to handle this.
279
+ warn!(
280
+ "Failed to parse nexus timeout header value '{}'",
281
+ timeout_str
282
+ );
283
+ }
284
+ }
285
+
286
+ let (service, operation, request_kind) = t
287
+ .resp
288
+ .request
289
+ .as_ref()
290
+ .and_then(|r| r.variant.as_ref())
291
+ .map(|v| match v {
292
+ Variant::StartOperation(s) => (
293
+ s.service.to_owned(),
294
+ s.operation.to_owned(),
295
+ RequestKind::Start,
296
+ ),
297
+ Variant::CancelOperation(c) => (
298
+ c.service.to_owned(),
299
+ c.operation.to_owned(),
300
+ RequestKind::Cancel,
301
+ ),
302
+ })
303
+ .unwrap_or_default();
304
+ self.outstanding_task_map.lock().insert(
305
+ tt,
306
+ NexusInFlightTask {
307
+ request_kind,
308
+ timeout_task,
309
+ scheduled_time: t
310
+ .resp
311
+ .request
312
+ .as_ref()
313
+ .and_then(|r| r.scheduled_time)
314
+ .and_then(|t| t.try_into().ok()),
315
+ start_time: Instant::now(),
316
+ _permit: t.permit.into_used(NexusSlotInfo { service, operation }),
317
+ },
318
+ );
319
+ Some(Ok(NexusTask {
320
+ variant: Some(nexus_task::Variant::Task(t.resp)),
321
+ }))
322
+ }
323
+ TaskStreamInput::Cancel(c) => Some(Ok(NexusTask {
324
+ variant: Some(nexus_task::Variant::CancelTask(c)),
325
+ })),
326
+ TaskStreamInput::SourceComplete => {
327
+ source_done.cancel();
328
+ None
329
+ }
330
+ TaskStreamInput::Poll(Err(e)) => Some(Err(PollError::TonicError(e))),
331
+ };
332
+ async move { res }
333
+ })
334
+ .take_until(async move {
335
+ source_done_clone.cancelled().await;
336
+ let (grace_killer, stop_grace) = futures_util::future::abortable(async {
337
+ if let Some(gp) = self.grace_period {
338
+ tokio::time::sleep(gp).await;
339
+ for (tt, _) in outstanding_task_clone.lock().iter() {
340
+ let _ = cancels_tx_clone.send(CancelNexusTask {
341
+ task_token: tt.0.clone(),
342
+ reason: NexusTaskCancelReason::WorkerShutdown.into(),
343
+ });
344
+ }
345
+ }
346
+ });
347
+ join!(
348
+ async {
349
+ while !outstanding_task_clone.lock().is_empty() {
350
+ self.task_completed_notify.notified().await;
351
+ }
352
+ // If we were waiting for the grace period but everything already finished,
353
+ // we don't need to keep waiting.
354
+ stop_grace.abort();
355
+ },
356
+ grace_killer
357
+ )
358
+ })
359
+ .chain(stream::once(async move { Err(PollError::ShutDown) }))
360
+ }
361
+ }
362
+
363
+ type OutstandingTaskMap = Arc<parking_lot::Mutex<HashMap<TaskToken, NexusInFlightTask>>>;
364
+
365
+ struct NexusInFlightTask {
366
+ request_kind: RequestKind,
367
+ timeout_task: Option<JoinHandle<()>>,
368
+ scheduled_time: Option<SystemTime>,
369
+ start_time: Instant,
370
+ _permit: UsedMeteredSemPermit<NexusSlotKind>,
371
+ }
372
+
373
+ #[derive(Eq, PartialEq, Copy, Clone, Default)]
374
+ enum RequestKind {
375
+ #[default]
376
+ Start,
377
+ Cancel,
378
+ }
379
+
380
+ #[derive(derive_more::From)]
381
+ enum TaskStreamInput {
382
+ Poll(NexusPollItem),
383
+ Cancel(CancelNexusTask),
384
+ SourceComplete,
385
+ }
386
+
387
+ fn parse_request_timeout(timeout: &str) -> Result<Duration, anyhow::Error> {
388
+ let timeout = timeout.trim();
389
+ let (value, unit) = timeout.split_at(
390
+ timeout
391
+ .find(|c: char| !c.is_ascii_digit() && c != '.')
392
+ .unwrap_or(timeout.len()),
393
+ );
394
+
395
+ match unit {
396
+ "m" => value
397
+ .parse::<f64>()
398
+ .map(|v| Duration::from_secs_f64(60.0 * v))
399
+ .map_err(Into::into),
400
+ "s" => value
401
+ .parse::<f64>()
402
+ .map(Duration::from_secs_f64)
403
+ .map_err(Into::into),
404
+ "ms" => value
405
+ .parse::<u64>()
406
+ .map(Duration::from_millis)
407
+ .map_err(Into::into),
408
+ _ => Err(anyhow!("Invalid timeout format")),
409
+ }
410
+ }
@@ -3,17 +3,17 @@ use parking_lot::Mutex;
3
3
  use std::{
4
4
  marker::PhantomData,
5
5
  sync::{
6
- atomic::{AtomicU64, AtomicUsize, Ordering},
7
6
  Arc, OnceLock,
7
+ atomic::{AtomicU64, AtomicUsize, Ordering},
8
8
  },
9
9
  time::{Duration, Instant},
10
10
  };
11
11
  use temporal_sdk_core_api::{
12
12
  telemetry::metrics::{CoreMeter, GaugeF64, MetricAttributes, TemporalMeter},
13
13
  worker::{
14
- ActivitySlotKind, LocalActivitySlotKind, SlotInfo, SlotInfoTrait, SlotKind, SlotKindType,
15
- SlotMarkUsedContext, SlotReleaseContext, SlotReservationContext, SlotSupplier,
16
- SlotSupplierPermit, WorkerTuner, WorkflowSlotKind,
14
+ ActivitySlotKind, LocalActivitySlotKind, NexusSlotKind, SlotInfo, SlotInfoTrait, SlotKind,
15
+ SlotKindType, SlotMarkUsedContext, SlotReleaseContext, SlotReservationContext,
16
+ SlotSupplier, SlotSupplierPermit, WorkerTuner, WorkflowSlotKind,
17
17
  },
18
18
  };
19
19
  use tokio::{sync::watch, task::JoinHandle};
@@ -30,6 +30,7 @@ pub struct ResourceBasedTuner<MI> {
30
30
  wf_opts: Option<ResourceSlotOptions>,
31
31
  act_opts: Option<ResourceSlotOptions>,
32
32
  la_opts: Option<ResourceSlotOptions>,
33
+ nexus_opts: Option<ResourceSlotOptions>,
33
34
  }
34
35
 
35
36
  impl ResourceBasedTuner<RealSysInfo> {
@@ -59,6 +60,7 @@ impl<MI> ResourceBasedTuner<MI> {
59
60
  wf_opts: None,
60
61
  act_opts: None,
61
62
  la_opts: None,
63
+ nexus_opts: None,
62
64
  }
63
65
  }
64
66
 
@@ -79,6 +81,12 @@ impl<MI> ResourceBasedTuner<MI> {
79
81
  self.la_opts = Some(opts);
80
82
  self
81
83
  }
84
+
85
+ /// Set nexus slot options
86
+ pub fn with_nexus_slots_options(&mut self, opts: ResourceSlotOptions) -> &mut Self {
87
+ self.nexus_opts = Some(opts);
88
+ self
89
+ }
82
90
  }
83
91
 
84
92
  const DEFAULT_WF_SLOT_OPTS: ResourceSlotOptions = ResourceSlotOptions {
@@ -91,6 +99,13 @@ const DEFAULT_ACT_SLOT_OPTS: ResourceSlotOptions = ResourceSlotOptions {
91
99
  max_slots: 10_000,
92
100
  ramp_throttle: Duration::from_millis(50),
93
101
  };
102
+ const DEFAULT_NEXUS_SLOT_OPTS: ResourceSlotOptions = ResourceSlotOptions {
103
+ min_slots: 1,
104
+ max_slots: 10_000,
105
+ // No ramp is chosen under the assumption that nexus tasks are unlikely to use many resources
106
+ // and would prefer lowest latency over protection against oversubscription.
107
+ ramp_throttle: Duration::from_millis(0),
108
+ };
94
109
 
95
110
  /// Options for a specific slot type
96
111
  #[derive(Debug, Clone, Copy, derive_more::Constructor)]
@@ -375,6 +390,13 @@ impl<MI: SystemResourceInfo + Sync + Send + 'static> WorkerTuner for ResourceBas
375
390
  self.slots.as_kind(o)
376
391
  }
377
392
 
393
+ fn nexus_task_slot_supplier(
394
+ &self,
395
+ ) -> Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync> {
396
+ let o = self.nexus_opts.unwrap_or(DEFAULT_NEXUS_SLOT_OPTS);
397
+ self.slots.as_kind(o)
398
+ }
399
+
378
400
  fn attach_metrics(&self, metrics: TemporalMeter) {
379
401
  self.slots.attach_metrics(metrics);
380
402
  }
@@ -524,8 +546,8 @@ mod tests {
524
546
  use super::*;
525
547
  use crate::{abstractions::MeteredPermitDealer, telemetry::metrics::MetricsContext};
526
548
  use std::sync::{
527
- atomic::{AtomicU64, Ordering},
528
549
  Arc,
550
+ atomic::{AtomicU64, Ordering},
529
551
  };
530
552
  use temporal_sdk_core_api::worker::WorkflowSlotKind;
531
553
 
@@ -11,8 +11,8 @@ use std::sync::{Arc, OnceLock};
11
11
  use temporal_sdk_core_api::{
12
12
  telemetry::metrics::TemporalMeter,
13
13
  worker::{
14
- ActivitySlotKind, LocalActivitySlotKind, SlotKind, SlotSupplier, WorkerConfig, WorkerTuner,
15
- WorkflowSlotKind,
14
+ ActivitySlotKind, LocalActivitySlotKind, NexusSlotKind, SlotKind, SlotSupplier,
15
+ WorkerConfig, WorkerTuner, WorkflowSlotKind,
16
16
  },
17
17
  };
18
18
 
@@ -21,6 +21,7 @@ pub struct TunerHolder {
21
21
  wft_supplier: Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync>,
22
22
  act_supplier: Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>,
23
23
  la_supplier: Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>,
24
+ nexus_supplier: Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync>,
24
25
  metrics: OnceLock<TemporalMeter>,
25
26
  }
26
27
 
@@ -39,6 +40,9 @@ pub struct TunerHolderOptions {
39
40
  /// Options for local activity slots
40
41
  #[builder(default, setter(strip_option))]
41
42
  pub local_activity_slot_options: Option<SlotSupplierOptions<LocalActivitySlotKind>>,
43
+ /// Options for nexus slots
44
+ #[builder(default, setter(strip_option))]
45
+ pub nexus_slot_options: Option<SlotSupplierOptions<NexusSlotKind>>,
42
46
  /// Options that will apply to all resource based slot suppliers. Must be set if any slot
43
47
  /// options are [SlotSupplierOptions::ResourceBased]
44
48
  #[builder(default, setter(strip_option))]
@@ -165,6 +169,7 @@ pub struct TunerBuilder {
165
169
  Option<Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>>,
166
170
  local_activity_slot_supplier:
167
171
  Option<Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>>,
172
+ nexus_slot_supplier: Option<Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync>>,
168
173
  }
169
174
 
170
175
  impl TunerBuilder {
@@ -179,6 +184,9 @@ impl TunerBuilder {
179
184
  if let Some(m) = cfg.max_outstanding_local_activities {
180
185
  builder.local_activity_slot_supplier(Arc::new(FixedSizeSlotSupplier::new(m)));
181
186
  }
187
+ if let Some(m) = cfg.max_outstanding_nexus_tasks {
188
+ builder.nexus_slot_supplier(Arc::new(FixedSizeSlotSupplier::new(m)));
189
+ }
182
190
  builder
183
191
  }
184
192
 
@@ -209,6 +217,15 @@ impl TunerBuilder {
209
217
  self
210
218
  }
211
219
 
220
+ /// Set a nexus slot supplier
221
+ pub fn nexus_slot_supplier(
222
+ &mut self,
223
+ supplier: Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync>,
224
+ ) -> &mut Self {
225
+ self.nexus_slot_supplier = Some(supplier);
226
+ self
227
+ }
228
+
212
229
  /// Build a [WorkerTuner] from the configured slot suppliers
213
230
  pub fn build(&mut self) -> TunerHolder {
214
231
  TunerHolder {
@@ -224,6 +241,10 @@ impl TunerBuilder {
224
241
  .local_activity_slot_supplier
225
242
  .clone()
226
243
  .unwrap_or_else(|| Arc::new(FixedSizeSlotSupplier::new(100))),
244
+ nexus_supplier: self
245
+ .nexus_slot_supplier
246
+ .clone()
247
+ .unwrap_or_else(|| Arc::new(FixedSizeSlotSupplier::new(100))),
227
248
  metrics: OnceLock::new(),
228
249
  }
229
250
  }
@@ -248,6 +269,12 @@ impl WorkerTuner for TunerHolder {
248
269
  self.la_supplier.clone()
249
270
  }
250
271
 
272
+ fn nexus_task_slot_supplier(
273
+ &self,
274
+ ) -> Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync> {
275
+ self.nexus_supplier.clone()
276
+ }
277
+
251
278
  fn attach_metrics(&self, m: TemporalMeter) {
252
279
  let _ = self.metrics.set(m);
253
280
  }
@@ -1,6 +1,6 @@
1
1
  use crate::{
2
2
  telemetry::VecDisplayer,
3
- worker::workflow::{OutgoingJob, WFCommand, WorkflowStartedInfo},
3
+ worker::workflow::{OutgoingJob, WFCommand, WFCommandVariant, WorkflowStartedInfo},
4
4
  };
5
5
  use prost_types::Timestamp;
6
6
  use std::{
@@ -8,7 +8,7 @@ use std::{
8
8
  sync::mpsc::{self, Receiver, Sender},
9
9
  };
10
10
  use temporal_sdk_core_protos::{
11
- coresdk::workflow_activation::{start_workflow_from_attribs, WorkflowActivationJob},
11
+ coresdk::workflow_activation::{WorkflowActivationJob, start_workflow_from_attribs},
12
12
  temporal::api::{common::v1::Payload, history::v1::WorkflowExecutionStartedEventAttributes},
13
13
  utilities::TryIntoOrNone,
14
14
  };
@@ -86,7 +86,12 @@ impl DrivenWorkflow {
86
86
  /// from a buffer that the language side sinks into when it calls [crate::Core::complete_task]
87
87
  pub(super) fn fetch_workflow_iteration_output(&mut self) -> Vec<WFCommand> {
88
88
  let in_cmds = self.incoming_commands.try_recv();
89
- let in_cmds = in_cmds.unwrap_or_else(|_| vec![WFCommand::NoCommandsFromLang]);
89
+ let in_cmds = in_cmds.unwrap_or_else(|_| {
90
+ vec![WFCommand {
91
+ variant: WFCommandVariant::NoCommandsFromLang,
92
+ metadata: None,
93
+ }]
94
+ });
90
95
  debug!(in_cmds = %in_cmds.display(), "wf bridge iteration fetch");
91
96
  in_cmds
92
97
  }
@@ -5,7 +5,7 @@ use crate::{
5
5
  workflow::{CacheMissFetchReq, PermittedWFT, PreparedWFT},
6
6
  },
7
7
  };
8
- use futures_util::{future::BoxFuture, FutureExt, Stream, TryFutureExt};
8
+ use futures_util::{FutureExt, Stream, TryFutureExt, future::BoxFuture};
9
9
  use itertools::Itertools;
10
10
  use std::{
11
11
  collections::VecDeque,
@@ -20,8 +20,7 @@ use std::{
20
20
  use temporal_sdk_core_protos::temporal::api::{
21
21
  enums::v1::EventType,
22
22
  history::v1::{
23
- history_event, history_event::Attributes, History, HistoryEvent,
24
- WorkflowTaskCompletedEventAttributes,
23
+ History, HistoryEvent, WorkflowTaskCompletedEventAttributes, history_event::Attributes,
25
24
  },
26
25
  };
27
26
  use tracing::Instrument;
@@ -651,9 +650,7 @@ impl HistoryUpdate {
651
650
  .iter()
652
651
  .skip_while(|e| e.event_id < from_id)
653
652
  .find_map(|e| match &e.attributes {
654
- Some(history_event::Attributes::WorkflowTaskCompletedEventAttributes(ref a)) => {
655
- Some(a)
656
- }
653
+ Some(Attributes::WorkflowTaskCompletedEventAttributes(a)) => Some(a),
657
654
  _ => None,
658
655
  })
659
656
  }
@@ -794,7 +791,7 @@ mod tests {
794
791
  use super::*;
795
792
  use crate::{
796
793
  replay::{HistoryInfo, TestHistoryBuilder},
797
- test_help::{canned_histories, hist_to_poll_resp, mock_sdk_cfg, MockPollCfg, ResponseType},
794
+ test_help::{MockPollCfg, ResponseType, canned_histories, hist_to_poll_resp, mock_sdk_cfg},
798
795
  worker::client::mocks::mock_workflow_client,
799
796
  };
800
797
  use futures_util::{StreamExt, TryStreamExt};
@@ -802,11 +799,11 @@ mod tests {
802
799
  use temporal_client::WorkflowOptions;
803
800
  use temporal_sdk::WfContext;
804
801
  use temporal_sdk_core_protos::{
802
+ DEFAULT_WORKFLOW_TYPE,
805
803
  temporal::api::{
806
804
  common::v1::WorkflowExecution, enums::v1::WorkflowTaskFailedCause,
807
805
  workflowservice::v1::GetWorkflowExecutionHistoryResponse,
808
806
  },
809
- DEFAULT_WORKFLOW_TYPE,
810
807
  };
811
808
 
812
809
  impl From<HistoryInfo> for HistoryUpdate {