temporalio 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +25 -23
  3. data/bridge/Cargo.lock +168 -59
  4. data/bridge/Cargo.toml +4 -2
  5. data/bridge/sdk-core/README.md +19 -6
  6. data/bridge/sdk-core/client/src/lib.rs +215 -39
  7. data/bridge/sdk-core/client/src/metrics.rs +17 -8
  8. data/bridge/sdk-core/client/src/raw.rs +4 -4
  9. data/bridge/sdk-core/client/src/retry.rs +32 -20
  10. data/bridge/sdk-core/core/Cargo.toml +22 -9
  11. data/bridge/sdk-core/core/src/abstractions.rs +203 -14
  12. data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +76 -41
  13. data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
  14. data/bridge/sdk-core/core/src/core_tests/local_activities.rs +204 -83
  15. data/bridge/sdk-core/core/src/core_tests/queries.rs +3 -4
  16. data/bridge/sdk-core/core/src/core_tests/workers.rs +1 -3
  17. data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +397 -54
  18. data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
  19. data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
  20. data/bridge/sdk-core/core/src/lib.rs +16 -9
  21. data/bridge/sdk-core/core/src/telemetry/log_export.rs +1 -1
  22. data/bridge/sdk-core/core/src/telemetry/metrics.rs +69 -35
  23. data/bridge/sdk-core/core/src/telemetry/mod.rs +29 -13
  24. data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +17 -12
  25. data/bridge/sdk-core/core/src/test_help/mod.rs +62 -12
  26. data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
  27. data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
  28. data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +352 -122
  29. data/bridge/sdk-core/core/src/worker/activities.rs +233 -157
  30. data/bridge/sdk-core/core/src/worker/client/mocks.rs +22 -2
  31. data/bridge/sdk-core/core/src/worker/client.rs +18 -2
  32. data/bridge/sdk-core/core/src/worker/mod.rs +165 -58
  33. data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
  34. data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
  35. data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +856 -277
  36. data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +100 -43
  37. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +7 -7
  38. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +5 -4
  39. data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +87 -27
  40. data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +5 -4
  41. data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +5 -4
  42. data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +5 -4
  43. data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +137 -62
  44. data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +25 -17
  45. data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +7 -6
  46. data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +103 -152
  47. data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +7 -7
  48. data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -9
  49. data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
  50. data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -7
  51. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +5 -16
  52. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +201 -121
  53. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +11 -14
  54. data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +30 -15
  55. data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1026 -376
  56. data/bridge/sdk-core/core/src/worker/workflow/mod.rs +460 -384
  57. data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
  58. data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
  59. data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
  60. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
  61. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
  62. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +448 -718
  63. data/bridge/sdk-core/core-api/Cargo.toml +2 -1
  64. data/bridge/sdk-core/core-api/src/errors.rs +1 -34
  65. data/bridge/sdk-core/core-api/src/lib.rs +6 -2
  66. data/bridge/sdk-core/core-api/src/telemetry.rs +0 -6
  67. data/bridge/sdk-core/core-api/src/worker.rs +14 -1
  68. data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
  69. data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
  70. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
  71. data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +5 -17
  72. data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +11 -0
  73. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -6
  74. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +6 -6
  75. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +5 -0
  76. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +22 -6
  77. data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +48 -19
  78. data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -0
  79. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +3 -0
  80. data/bridge/sdk-core/protos/api_upstream/temporal/api/{enums/v1/interaction_type.proto → protocol/v1/message.proto} +29 -11
  81. data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
  82. data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +111 -0
  83. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +59 -28
  84. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
  85. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
  86. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
  87. data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
  88. data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
  89. data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
  90. data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
  91. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +7 -0
  92. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +1 -0
  93. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
  94. data/bridge/sdk-core/sdk/Cargo.toml +3 -2
  95. data/bridge/sdk-core/sdk/src/lib.rs +87 -20
  96. data/bridge/sdk-core/sdk/src/workflow_future.rs +9 -8
  97. data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
  98. data/bridge/sdk-core/sdk-core-protos/build.rs +36 -1
  99. data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +100 -87
  100. data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +5 -1
  101. data/bridge/sdk-core/sdk-core-protos/src/lib.rs +175 -57
  102. data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
  103. data/bridge/sdk-core/test-utils/Cargo.toml +3 -1
  104. data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
  105. data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
  106. data/bridge/sdk-core/test-utils/src/lib.rs +82 -23
  107. data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
  108. data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
  109. data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
  110. data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
  111. data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
  112. data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -3
  113. data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
  114. data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
  115. data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
  116. data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
  117. data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
  118. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
  119. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
  120. data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
  121. data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
  122. data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
  123. data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +72 -191
  124. data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +1 -0
  125. data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
  126. data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +12 -7
  127. data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
  128. data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
  129. data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
  130. data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
  131. data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
  132. data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +10 -11
  133. data/bridge/sdk-core/tests/main.rs +3 -13
  134. data/bridge/sdk-core/tests/runner.rs +75 -36
  135. data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
  136. data/bridge/src/connection.rs +41 -25
  137. data/bridge/src/lib.rs +269 -14
  138. data/bridge/src/runtime.rs +1 -1
  139. data/bridge/src/test_server.rs +153 -0
  140. data/bridge/src/worker.rs +89 -16
  141. data/lib/gen/temporal/api/command/v1/message_pb.rb +4 -18
  142. data/lib/gen/temporal/api/common/v1/message_pb.rb +4 -0
  143. data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +1 -3
  144. data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +3 -3
  145. data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +2 -0
  146. data/lib/gen/temporal/api/enums/v1/update_pb.rb +6 -4
  147. data/lib/gen/temporal/api/history/v1/message_pb.rb +27 -19
  148. data/lib/gen/temporal/api/namespace/v1/message_pb.rb +1 -0
  149. data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +3 -0
  150. data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
  151. data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
  152. data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
  153. data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
  154. data/lib/gen/temporal/api/update/v1/message_pb.rb +72 -0
  155. data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +26 -16
  156. data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
  157. data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
  158. data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
  159. data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
  160. data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
  161. data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
  162. data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +27 -21
  163. data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +28 -24
  164. data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
  165. data/lib/temporalio/activity/context.rb +13 -8
  166. data/lib/temporalio/activity/info.rb +1 -1
  167. data/lib/temporalio/bridge/connect_options.rb +15 -0
  168. data/lib/temporalio/bridge/retry_config.rb +24 -0
  169. data/lib/temporalio/bridge/tls_options.rb +19 -0
  170. data/lib/temporalio/client/implementation.rb +8 -8
  171. data/lib/temporalio/connection/retry_config.rb +44 -0
  172. data/lib/temporalio/connection/service.rb +20 -0
  173. data/lib/temporalio/connection/test_service.rb +92 -0
  174. data/lib/temporalio/connection/tls_options.rb +51 -0
  175. data/lib/temporalio/connection/workflow_service.rb +731 -0
  176. data/lib/temporalio/connection.rb +55 -720
  177. data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
  178. data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
  179. data/lib/temporalio/interceptor/chain.rb +5 -5
  180. data/lib/temporalio/interceptor/client.rb +8 -4
  181. data/lib/temporalio/interceptor.rb +22 -0
  182. data/lib/temporalio/retry_policy.rb +13 -3
  183. data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
  184. data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
  185. data/lib/temporalio/testing/workflow_environment.rb +112 -0
  186. data/lib/temporalio/testing.rb +175 -0
  187. data/lib/temporalio/version.rb +1 -1
  188. data/lib/temporalio/worker/activity_runner.rb +26 -4
  189. data/lib/temporalio/worker/activity_worker.rb +44 -18
  190. data/lib/temporalio/worker/sync_worker.rb +47 -11
  191. data/lib/temporalio/worker.rb +27 -21
  192. data/lib/temporalio/workflow/async.rb +46 -0
  193. data/lib/temporalio/workflow/future.rb +138 -0
  194. data/lib/temporalio/workflow/info.rb +76 -0
  195. data/temporalio.gemspec +4 -3
  196. metadata +67 -17
  197. data/bridge/sdk-core/Cargo.lock +0 -2606
  198. data/bridge/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +0 -87
  199. data/lib/bridge.so +0 -0
  200. data/lib/gen/temporal/api/enums/v1/interaction_type_pb.rb +0 -25
  201. data/lib/gen/temporal/api/interaction/v1/message_pb.rb +0 -49
  202. data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
@@ -1,4 +1,5 @@
1
1
  mod activity_heartbeat_manager;
2
+ mod activity_task_poller_stream;
2
3
  mod local_activities;
3
4
 
4
5
  pub(crate) use local_activities::{
@@ -7,11 +8,14 @@ pub(crate) use local_activities::{
7
8
  LocalInFlightActInfo, NewLocalAct,
8
9
  };
9
10
 
10
- use crate::telemetry::metrics::eager;
11
+ use crate::abstractions::{ClosableMeteredSemaphore, TrackedOwnedMeteredSemPermit};
12
+ use crate::worker::activities::activity_task_poller_stream::new_activity_task_poller;
11
13
  use crate::{
12
- abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
14
+ abstractions::{MeteredSemaphore, OwnedMeteredSemPermit, UsedMeteredSemPermit},
13
15
  pollers::BoxedActPoller,
14
- telemetry::metrics::{activity_type, activity_worker_type, workflow_type, MetricsContext},
16
+ telemetry::metrics::{
17
+ activity_type, activity_worker_type, eager, workflow_type, MetricsContext,
18
+ },
15
19
  worker::{
16
20
  activities::activity_heartbeat_manager::ActivityHeartbeatError, client::WorkerClient,
17
21
  },
@@ -19,14 +23,11 @@ use crate::{
19
23
  };
20
24
  use activity_heartbeat_manager::ActivityHeartbeatManager;
21
25
  use dashmap::DashMap;
22
- use governor::{
23
- clock::DefaultClock,
24
- middleware::NoOpMiddleware,
25
- state::{InMemoryState, NotKeyed},
26
- Quota, RateLimiter,
27
- };
26
+ use futures::{stream, stream::BoxStream, stream::PollNext, Stream, StreamExt};
27
+ use governor::{Quota, RateLimiter};
28
28
  use std::{
29
29
  convert::TryInto,
30
+ future,
30
31
  sync::Arc,
31
32
  time::{Duration, Instant},
32
33
  };
@@ -41,7 +42,10 @@ use temporal_sdk_core_protos::{
41
42
  workflowservice::v1::PollActivityTaskQueueResponse,
42
43
  },
43
44
  };
44
- use tokio::sync::Notify;
45
+ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
46
+ use tokio::sync::{Mutex, Notify};
47
+ use tokio_stream::wrappers::UnboundedReceiverStream;
48
+ use tokio_util::sync::CancellationToken;
45
49
  use tracing::Span;
46
50
 
47
51
  #[derive(Debug, derive_more::Constructor)]
@@ -74,10 +78,10 @@ struct RemoteInFlightActInfo {
74
78
  /// discard the reply.
75
79
  pub known_not_found: bool,
76
80
  /// The permit from the max concurrent semaphore
77
- _permit: OwnedMeteredSemPermit,
81
+ _permit: UsedMeteredSemPermit,
78
82
  }
79
83
  impl RemoteInFlightActInfo {
80
- fn new(poll_resp: &PollActivityTaskQueueResponse, permit: OwnedMeteredSemPermit) -> Self {
84
+ fn new(poll_resp: &PollActivityTaskQueueResponse, permit: UsedMeteredSemPermit) -> Self {
81
85
  let wec = poll_resp.workflow_execution.clone().unwrap_or_default();
82
86
  Self {
83
87
  base: InFlightActInfo {
@@ -95,43 +99,38 @@ impl RemoteInFlightActInfo {
95
99
  }
96
100
  }
97
101
 
98
- struct NonPollActBuffer {
99
- tx: async_channel::Sender<PermittedTqResp>,
100
- rx: async_channel::Receiver<PermittedTqResp>,
101
- }
102
- impl NonPollActBuffer {
103
- pub fn new() -> Self {
104
- let (tx, rx) = async_channel::unbounded();
105
- Self { tx, rx }
106
- }
107
-
108
- pub async fn next(&self) -> PermittedTqResp {
109
- self.rx.recv().await.expect("Send half cannot be dropped")
110
- }
111
- }
112
-
113
102
  pub(crate) struct WorkerActivityTasks {
103
+ /// Token used to signal the server task poller that shutdown is beginning
104
+ poller_shutdown_token: CancellationToken,
114
105
  /// Centralizes management of heartbeat issuing / throttling
115
106
  heartbeat_manager: ActivityHeartbeatManager,
107
+ /// Combined stream for any ActivityTask producing source (polls, eager activities, cancellations)
108
+ activity_task_stream: Mutex<BoxStream<'static, Result<ActivityTask, PollActivityError>>>,
116
109
  /// Activities that have been issued to lang but not yet completed
117
- outstanding_activity_tasks: DashMap<TaskToken, RemoteInFlightActInfo>,
118
- /// Buffers activity task polling in the event we need to return a cancellation while a poll is
119
- /// ongoing.
120
- poller: BoxedActPoller,
121
- /// Holds activity tasks we have received by non-polling means. EX: In direct response to
122
- /// workflow task completion.
123
- non_poll_tasks: NonPollActBuffer,
124
- /// Ensures we stay at or below this worker's maximum concurrent activity limit
125
- activities_semaphore: Arc<MeteredSemaphore>,
126
- /// Enables per-worker rate-limiting of activity tasks
127
- ratelimiter: Option<RateLimiter<NotKeyed, InMemoryState, DefaultClock, NoOpMiddleware>>,
128
- /// Wakes every time an activity is removed from the outstanding map
129
- complete_notify: Notify,
110
+ outstanding_activity_tasks: Arc<DashMap<TaskToken, RemoteInFlightActInfo>>,
111
+ /// Ensures we don't exceed this worker's maximum concurrent activity limit for activities.
112
+ /// This semaphore is used to limit eager activities but shares the same underlying [MeteredSemaphore] that is used
113
+ /// to limit the concurrency for non-eager activities.
114
+ eager_activities_semaphore: Arc<ClosableMeteredSemaphore>,
115
+ /// Holds activity tasks we have received in direct response to workflow task completion (a.k.a eager activities).
116
+ /// Tasks received in this stream hold a "tracked" permit that is issued by the `eager_activities_semaphore`.
117
+ eager_activities_tx: UnboundedSender<TrackedPermittedTqResp>,
130
118
 
131
119
  metrics: MetricsContext,
132
120
 
133
121
  max_heartbeat_throttle_interval: Duration,
134
122
  default_heartbeat_throttle_interval: Duration,
123
+
124
+ /// Wakes every time an activity is removed from the outstanding map
125
+ complete_notify: Arc<Notify>,
126
+ /// Token to notify when poll returned a shutdown error
127
+ poll_returned_shutdown_token: CancellationToken,
128
+ }
129
+
130
+ #[derive(derive_more::From)]
131
+ enum ActivityTaskSource {
132
+ PendingCancel(PendingActivityCancel),
133
+ PendingStart(Result<(PermittedTqResp, bool), PollActivityError>),
135
134
  }
136
135
 
137
136
  impl WorkerActivityTasks {
@@ -144,90 +143,186 @@ impl WorkerActivityTasks {
144
143
  max_heartbeat_throttle_interval: Duration,
145
144
  default_heartbeat_throttle_interval: Duration,
146
145
  ) -> Self {
147
- Self {
148
- heartbeat_manager: ActivityHeartbeatManager::new(client),
149
- outstanding_activity_tasks: Default::default(),
146
+ let semaphore = Arc::new(MeteredSemaphore::new(
147
+ max_activity_tasks,
148
+ metrics.with_new_attrs([activity_worker_type()]),
149
+ MetricsContext::available_task_slots,
150
+ ));
151
+ let poller_shutdown_token = CancellationToken::new();
152
+ let rate_limiter = max_worker_act_per_sec.and_then(|ps| {
153
+ Quota::with_period(Duration::from_secs_f64(ps.recip())).map(RateLimiter::direct)
154
+ });
155
+ let outstanding_activity_tasks = Arc::new(DashMap::new());
156
+ let server_poller_stream = new_activity_task_poller(
150
157
  poller,
151
- non_poll_tasks: NonPollActBuffer::new(),
152
- activities_semaphore: Arc::new(MeteredSemaphore::new(
153
- max_activity_tasks,
154
- metrics.with_new_attrs([activity_worker_type()]),
155
- MetricsContext::available_task_slots,
156
- )),
157
- ratelimiter: max_worker_act_per_sec.and_then(|ps| {
158
- Quota::with_period(Duration::from_secs_f64(ps.recip())).map(RateLimiter::direct)
159
- }),
160
- complete_notify: Notify::new(),
158
+ semaphore.clone(),
159
+ rate_limiter,
160
+ metrics.clone(),
161
+ poller_shutdown_token.clone(),
162
+ );
163
+ let (eager_activities_tx, eager_activities_rx) = unbounded_channel();
164
+ let eager_activities_semaphore = ClosableMeteredSemaphore::new_arc(semaphore);
165
+
166
+ let start_tasks_stream_complete = CancellationToken::new();
167
+ let starts_stream = Self::merge_start_task_sources(
168
+ eager_activities_rx,
169
+ server_poller_stream,
170
+ eager_activities_semaphore.clone(),
171
+ start_tasks_stream_complete.clone(),
172
+ );
173
+ let (heartbeat_manager, cancels_rx) = ActivityHeartbeatManager::new(client);
174
+ let complete_notify = Arc::new(Notify::new());
175
+ let source_stream = stream::select_with_strategy(
176
+ UnboundedReceiverStream::new(cancels_rx).map(ActivityTaskSource::from),
177
+ starts_stream.map(ActivityTaskSource::from),
178
+ |_: &mut ()| PollNext::Left,
179
+ );
180
+ // Create a task stream composed of (in poll preference order):
181
+ // cancels_stream ------------------------------+--- activity_task_stream
182
+ // eager_activities_rx ---+--- starts_stream ---|
183
+ // server_poll_stream ---|
184
+ let activity_task_stream = Self::merge_source_streams(
185
+ source_stream,
186
+ outstanding_activity_tasks.clone(),
187
+ start_tasks_stream_complete,
188
+ complete_notify.clone(),
189
+ metrics.clone(),
190
+ );
191
+
192
+ Self {
193
+ poller_shutdown_token,
194
+ eager_activities_tx,
195
+ heartbeat_manager,
196
+ activity_task_stream: Mutex::new(activity_task_stream.boxed()),
197
+ outstanding_activity_tasks,
198
+ eager_activities_semaphore,
199
+ complete_notify,
161
200
  metrics,
162
201
  max_heartbeat_throttle_interval,
163
202
  default_heartbeat_throttle_interval,
203
+ poll_returned_shutdown_token: CancellationToken::new(),
164
204
  }
165
205
  }
166
206
 
167
- pub(crate) fn notify_shutdown(&self) {
168
- self.poller.notify_shutdown();
207
+ /// Merges the server poll and eager [ActivityTask] sources
208
+ fn merge_start_task_sources(
209
+ non_poll_tasks_rx: UnboundedReceiver<TrackedPermittedTqResp>,
210
+ poller_stream: impl Stream<Item = Result<PermittedTqResp, tonic::Status>>,
211
+ eager_activities_semaphore: Arc<ClosableMeteredSemaphore>,
212
+ on_complete_token: CancellationToken,
213
+ ) -> impl Stream<Item = Result<(PermittedTqResp, bool), PollActivityError>> {
214
+ let non_poll_stream = stream::unfold(
215
+ (non_poll_tasks_rx, eager_activities_semaphore),
216
+ |(mut non_poll_tasks_rx, eager_activities_semaphore)| async move {
217
+ loop {
218
+ tokio::select! {
219
+ biased;
220
+
221
+ task_opt = non_poll_tasks_rx.recv() => {
222
+ // Add is_eager true and wrap in Result
223
+ return task_opt.map(|task| (Ok((PermittedTqResp{ permit: task.permit.into(), resp: task.resp }, true)), (non_poll_tasks_rx, eager_activities_semaphore)));
224
+ }
225
+ _ = eager_activities_semaphore.close_complete() => {
226
+ // Once shutting down, we stop accepting eager activities
227
+ non_poll_tasks_rx.close();
228
+ continue;
229
+ }
230
+ }
231
+ }
232
+ },
233
+ );
234
+ // Add is_eager false
235
+ let poller_stream = poller_stream.map(|res| res.map(|task| (task, false)));
236
+
237
+ // Prefer eager activities over polling the server
238
+ stream::select_with_strategy(non_poll_stream, poller_stream, |_: &mut ()| PollNext::Left)
239
+ .map(|res| res.map_err(|err| err.into()))
240
+ // This map, chain, filter_map sequence is here to cancel the token when this stream ends.
241
+ .map(Some)
242
+ .chain(futures::stream::once(async move {
243
+ on_complete_token.cancel();
244
+ None
245
+ }))
246
+ .filter_map(future::ready)
169
247
  }
170
248
 
171
- /// Wait for all outstanding activity tasks to finish
172
- pub(crate) async fn wait_all_finished(&self) {
173
- while !self.outstanding_activity_tasks.is_empty() {
174
- self.complete_notify.notified().await
175
- }
249
+ /// Builds an [ActivityTask] stream for cancellation tasks from cancels delivered from heartbeats
250
+ fn merge_source_streams(
251
+ source_stream: impl Stream<Item = ActivityTaskSource>,
252
+ outstanding_tasks: Arc<DashMap<TaskToken, RemoteInFlightActInfo>>,
253
+ start_tasks_stream_complete: CancellationToken,
254
+ complete_notify: Arc<Notify>,
255
+ metrics: MetricsContext,
256
+ ) -> impl Stream<Item = Result<ActivityTask, PollActivityError>> {
257
+ let outstanding_tasks_clone = outstanding_tasks.clone();
258
+ source_stream.filter_map(move |source| {
259
+ let outstanding_tasks = outstanding_tasks.clone();
260
+ let metrics = metrics.clone();
261
+ async move {
262
+ match source {
263
+ ActivityTaskSource::PendingCancel(next_pc) => {
264
+ // It's possible that activity has been completed and we no longer have an
265
+ // outstanding activity task. This is fine because it means that we no
266
+ // longer need to cancel this activity, so we'll just ignore such orphaned
267
+ // cancellations.
268
+ if let Some(mut details) = outstanding_tasks.get_mut(&next_pc.task_token) {
269
+ if details.issued_cancel_to_lang {
270
+ // Don't double-issue cancellations
271
+ return None
272
+ }
273
+
274
+ details.issued_cancel_to_lang = true;
275
+ if next_pc.reason == ActivityCancelReason::NotFound {
276
+ details.known_not_found = true;
277
+ }
278
+ Some(Ok(ActivityTask::cancel_from_ids(next_pc.task_token.0, next_pc.reason)))
279
+ } else {
280
+ debug!(task_token = ?next_pc.task_token, "Unknown activity task when issuing cancel");
281
+ // If we can't find the activity here, it's already been completed,
282
+ // in which case issuing a cancel again is pointless.
283
+ None
284
+ }
285
+ },
286
+ ActivityTaskSource::PendingStart(res) => {
287
+ Some(res.map(|(task, is_eager)| {
288
+ Self::about_to_issue_task(outstanding_tasks, task, is_eager, metrics)
289
+ }))
290
+ }
291
+ }
292
+ }
293
+ }).take_until(async move {
294
+ start_tasks_stream_complete.cancelled().await;
295
+ while !outstanding_tasks_clone.is_empty() {
296
+ complete_notify.notified().await
297
+ }
298
+ })
176
299
  }
177
300
 
178
- pub(crate) async fn shutdown(self) {
179
- self.poller.shutdown_box().await;
180
- self.heartbeat_manager.shutdown().await;
301
+ pub(crate) fn notify_shutdown(&self) {
302
+ self.poller_shutdown_token.cancel();
303
+ self.eager_activities_semaphore.close();
181
304
  }
182
305
 
183
- /// Wait until not at the outstanding activity limit, and then poll for an activity task.
184
- ///
185
- /// Returns `Ok(None)` if no activity is ready and the overall polling loop should be retried.
186
- pub(crate) async fn poll(&self) -> Result<Option<ActivityTask>, PollActivityError> {
187
- let poll_with_semaphore = async {
188
- // Acquire and subsequently forget a permit for an outstanding activity. When they are
189
- // completed, we must add a new permit to the semaphore, since holding the permit the
190
- // entire time lang does work would be a challenge.
191
- let perm = self
192
- .activities_semaphore
193
- .acquire_owned()
194
- .await
195
- .expect("outstanding activity semaphore not closed");
196
- if let Some(ref rl) = self.ratelimiter {
197
- rl.until_ready().await;
198
- }
199
- (self.poller.poll().await, perm)
200
- };
306
+ async fn shutdown_complete(&self) {
307
+ self.poll_returned_shutdown_token.cancelled().await;
308
+ self.heartbeat_manager.shutdown().await;
309
+ }
201
310
 
202
- tokio::select! {
203
- biased;
311
+ pub(crate) async fn shutdown(&self) {
312
+ self.notify_shutdown();
313
+ self.shutdown_complete().await;
314
+ }
204
315
 
205
- cancel_task = self.next_pending_cancel_task() => {
206
- cancel_task
207
- }
208
- task = self.non_poll_tasks.next() => {
209
- Ok(Some(self.about_to_issue_task(task, true)))
210
- }
211
- (work, permit) = poll_with_semaphore => {
212
- match work {
213
- Some(Ok(work)) => {
214
- if work == PollActivityTaskQueueResponse::default() {
215
- // Timeout
216
- self.metrics.act_poll_timeout();
217
- return Ok(None)
218
- }
219
- let work = self.about_to_issue_task(PermittedTqResp {
220
- resp: work, permit
221
- }, false);
222
- Ok(Some(work))
223
- }
224
- None => {
225
- Err(PollActivityError::ShutDown)
226
- }
227
- Some(Err(e)) => Err(e.into())
228
- }
229
- }
230
- }
316
+ /// Exclusive poll for activity tasks
317
+ ///
318
+ /// Polls the various task sources (server polls, eager activities, cancellations) while respecting the provided rate limits and allowed concurrency.
319
+ /// Returns Err(PollActivityError::ShutDown) after shutdown is completed and all tasks sources are depleted.
320
+ pub(crate) async fn poll(&self) -> Result<ActivityTask, PollActivityError> {
321
+ let mut poller_stream = self.activity_task_stream.lock().await;
322
+ poller_stream.next().await.unwrap_or_else(|| {
323
+ self.poll_returned_shutdown_token.cancel();
324
+ Err(PollActivityError::ShutDown)
325
+ })
231
326
  }
232
327
 
233
328
  pub(crate) async fn complete(
@@ -337,48 +432,21 @@ impl WorkerActivityTasks {
337
432
  /// Returns a handle that the workflows management side can use to interact with this manager
338
433
  pub(crate) fn get_handle_for_workflows(&self) -> ActivitiesFromWFTsHandle {
339
434
  ActivitiesFromWFTsHandle {
340
- sem: self.activities_semaphore.clone(),
341
- tx: self.non_poll_tasks.tx.clone(),
435
+ sem: self.eager_activities_semaphore.clone(),
436
+ tx: self.eager_activities_tx.clone(),
342
437
  }
343
438
  }
344
439
 
345
- async fn next_pending_cancel_task(&self) -> Result<Option<ActivityTask>, PollActivityError> {
346
- let next_pc = self.heartbeat_manager.next_pending_cancel().await;
347
- // Issue cancellations for anything we noticed was cancelled during heartbeating
348
- if let Some(PendingActivityCancel { task_token, reason }) = next_pc {
349
- // It's possible that activity has been completed and we no longer have an
350
- // outstanding activity task. This is fine because it means that we no
351
- // longer need to cancel this activity, so we'll just ignore such orphaned
352
- // cancellations.
353
- if let Some(mut details) = self.outstanding_activity_tasks.get_mut(&task_token) {
354
- if details.issued_cancel_to_lang {
355
- // Don't double-issue cancellations
356
- return Ok(None);
357
- }
358
-
359
- details.issued_cancel_to_lang = true;
360
- if reason == ActivityCancelReason::NotFound {
361
- details.known_not_found = true;
362
- }
363
- Ok(Some(ActivityTask::cancel_from_ids(task_token.0, reason)))
364
- } else {
365
- debug!(task_token = ?task_token, "Unknown activity task when issuing cancel");
366
- // If we can't find the activity here, it's already been completed,
367
- // in which case issuing a cancel again is pointless.
368
- Ok(None)
369
- }
370
- } else {
371
- // The only situation where the next cancel would return none is if the manager
372
- // was dropped, which can only happen on shutdown.
373
- Err(PollActivityError::ShutDown)
374
- }
375
- }
376
-
377
- /// Called when there is a new act task about to be bubbled up out of the manager
378
- fn about_to_issue_task(&self, task: PermittedTqResp, is_eager: bool) -> ActivityTask {
440
+ /// Called when there is a new [ActivityTask] about to be bubbled up out of the poller
441
+ fn about_to_issue_task(
442
+ outstanding_tasks: Arc<DashMap<TaskToken, RemoteInFlightActInfo>>,
443
+ task: PermittedTqResp,
444
+ is_eager: bool,
445
+ metrics: MetricsContext,
446
+ ) -> ActivityTask {
379
447
  if let Some(ref act_type) = task.resp.activity_type {
380
448
  if let Some(ref wf_type) = task.resp.workflow_type {
381
- self.metrics
449
+ metrics
382
450
  .with_new_attrs([
383
451
  activity_type(act_type.name.clone()),
384
452
  workflow_type(wf_type.name.clone()),
@@ -391,12 +459,12 @@ impl WorkerActivityTasks {
391
459
  // activity_type and workflow_type, we won't bother.
392
460
 
393
461
  if let Some(dur) = task.resp.sched_to_start() {
394
- self.metrics.act_sched_to_start_latency(dur);
462
+ metrics.act_sched_to_start_latency(dur);
395
463
  };
396
464
 
397
- self.outstanding_activity_tasks.insert(
465
+ outstanding_tasks.insert(
398
466
  task.resp.task_token.clone().into(),
399
- RemoteInFlightActInfo::new(&task.resp, task.permit),
467
+ RemoteInFlightActInfo::new(&task.resp, task.permit.into_used()),
400
468
  );
401
469
 
402
470
  ActivityTask::start_from_poll_resp(task.resp)
@@ -404,40 +472,48 @@ impl WorkerActivityTasks {
404
472
 
405
473
  #[cfg(test)]
406
474
  pub(crate) fn remaining_activity_capacity(&self) -> usize {
407
- self.activities_semaphore.available_permits()
475
+ self.eager_activities_semaphore.available_permits()
408
476
  }
409
477
  }
410
478
 
411
479
  /// Provides facilities for the workflow side of things to interact with the activity manager.
412
480
  /// Allows for the handling of activities returned by WFT completions.
413
481
  pub(crate) struct ActivitiesFromWFTsHandle {
414
- sem: Arc<MeteredSemaphore>,
415
- tx: async_channel::Sender<PermittedTqResp>,
482
+ sem: Arc<ClosableMeteredSemaphore>,
483
+ tx: UnboundedSender<TrackedPermittedTqResp>,
416
484
  }
417
485
 
418
486
  impl ActivitiesFromWFTsHandle {
419
487
  /// Returns a handle that can be used to reserve an activity slot. EX: When requesting eager
420
488
  /// dispatch of an activity to this worker upon workflow task completion
421
- pub(crate) fn reserve_slot(&self) -> Option<OwnedMeteredSemPermit> {
489
+ pub(crate) fn reserve_slot(&self) -> Option<TrackedOwnedMeteredSemPermit> {
490
+ // TODO: check if rate limit is not exceeded and count this reservation towards the rate limit
422
491
  self.sem.try_acquire_owned().ok()
423
492
  }
424
493
 
425
494
  /// Queue new activity tasks for dispatch received from non-polling sources (ex: eager returns
426
495
  /// from WFT completion)
427
- pub(crate) fn add_tasks(&self, tasks: impl IntoIterator<Item = PermittedTqResp>) {
496
+ pub(crate) fn add_tasks(&self, tasks: impl IntoIterator<Item = TrackedPermittedTqResp>) {
428
497
  for t in tasks.into_iter() {
429
498
  // Technically we should be reporting `activity_task_received` here, but for simplicity
430
499
  // and time insensitivity, that metric is tracked in `about_to_issue_task`.
431
- self.tx.try_send(t).expect("Receive half cannot be dropped");
500
+ self.tx.send(t).expect("Receive half cannot be dropped");
432
501
  }
433
502
  }
434
503
  }
435
504
 
505
+ #[derive(Debug)]
436
506
  pub(crate) struct PermittedTqResp {
437
507
  pub permit: OwnedMeteredSemPermit,
438
508
  pub resp: PollActivityTaskQueueResponse,
439
509
  }
440
510
 
511
+ #[derive(Debug)]
512
+ pub(crate) struct TrackedPermittedTqResp {
513
+ pub permit: TrackedOwnedMeteredSemPermit,
514
+ pub resp: PollActivityTaskQueueResponse,
515
+ }
516
+
441
517
  #[cfg(test)]
442
518
  mod tests {
443
519
  use super::*;
@@ -471,8 +547,8 @@ mod tests {
471
547
  Duration::from_secs(1),
472
548
  );
473
549
  let start = Instant::now();
474
- atm.poll().await.unwrap().unwrap();
475
- atm.poll().await.unwrap().unwrap();
550
+ atm.poll().await.unwrap();
551
+ atm.poll().await.unwrap();
476
552
  // At least half a second will have elapsed since we only allow 2 tasks per second.
477
553
  // With no ratelimit, even on a slow CI server with lots of load, this would typically take
478
554
  // low single digit ms or less.
@@ -1,15 +1,33 @@
1
1
  use super::*;
2
2
  use futures::Future;
3
3
 
4
+ pub(crate) static DEFAULT_TEST_CAPABILITIES: &Capabilities = &Capabilities {
5
+ signal_and_query_header: true,
6
+ internal_error_differentiation: true,
7
+ activity_failure_include_heartbeat: true,
8
+ supports_schedules: true,
9
+ encoded_failure_attributes: true,
10
+ build_id_based_versioning: true,
11
+ upsert_memo: true,
12
+ eager_workflow_start: true,
13
+ sdk_metadata: true,
14
+ };
15
+
4
16
  #[cfg(test)]
5
17
  /// Create a mock client primed with basic necessary expectations
6
18
  pub(crate) fn mock_workflow_client() -> MockWorkerClient {
7
- MockWorkerClient::new()
19
+ let mut r = MockWorkerClient::new();
20
+ r.expect_capabilities()
21
+ .returning(|| Some(DEFAULT_TEST_CAPABILITIES));
22
+ r
8
23
  }
9
24
 
10
25
  /// Create a mock manual client primed with basic necessary expectations
11
26
  pub(crate) fn mock_manual_workflow_client() -> MockManualWorkerClient {
12
- MockManualWorkerClient::new()
27
+ let mut r = MockManualWorkerClient::new();
28
+ r.expect_capabilities()
29
+ .returning(|| Some(DEFAULT_TEST_CAPABILITIES));
30
+ r
13
31
  }
14
32
 
15
33
  // Need a version of the mock that can return futures so we can return potentially pending
@@ -83,5 +101,7 @@ mockall::mock! {
83
101
  query_result: QueryResult,
84
102
  ) -> impl Future<Output = Result<RespondQueryTaskCompletedResponse>> + Send + 'b
85
103
  where 'a: 'b, Self: 'b;
104
+
105
+ fn capabilities(&self) -> Option<&'static get_system_info_response::Capabilities>;
86
106
  }
87
107
  }
@@ -7,12 +7,13 @@ use temporal_sdk_core_protos::{
7
7
  coresdk::workflow_commands::QueryResult,
8
8
  temporal::api::{
9
9
  command::v1::Command,
10
- common::v1::{Payloads, WorkflowExecution},
10
+ common::v1::{MeteringMetadata, Payloads, WorkflowExecution},
11
11
  enums::v1::{TaskQueueKind, WorkflowTaskFailedCause},
12
12
  failure::v1::Failure,
13
13
  query::v1::WorkflowQueryResult,
14
+ sdk::v1::WorkflowTaskCompletedMetadata,
14
15
  taskqueue::v1::{StickyExecutionAttributes, TaskQueue, TaskQueueMetadata, VersionId},
15
- workflowservice::v1::*,
16
+ workflowservice::v1::{get_system_info_response::Capabilities, *},
16
17
  },
17
18
  TaskToken,
18
19
  };
@@ -109,6 +110,9 @@ pub(crate) trait WorkerClient: Sync + Send {
109
110
  task_token: TaskToken,
110
111
  query_result: QueryResult,
111
112
  ) -> Result<RespondQueryTaskCompletedResponse>;
113
+
114
+ #[allow(clippy::needless_lifetimes)] // Clippy is wrong here
115
+ fn capabilities<'a>(&'a self) -> Option<&'a get_system_info_response::Capabilities>;
112
116
  }
113
117
 
114
118
  #[async_trait::async_trait]
@@ -189,6 +193,7 @@ impl WorkerClient for WorkerClientBag {
189
193
  worker_versioning_id: Some(VersionId {
190
194
  worker_build_id: self.versioning_build_id(),
191
195
  }),
196
+ messages: vec![],
192
197
  binary_checksum: self.worker_build_id.clone(),
193
198
  query_results: request
194
199
  .query_responses
@@ -206,6 +211,8 @@ impl WorkerClient for WorkerClientBag {
206
211
  })
207
212
  .collect(),
208
213
  namespace: self.namespace.clone(),
214
+ sdk_metadata: Some(request.sdk_metadata),
215
+ metering_metadata: Some(request.metering_metadata),
209
216
  };
210
217
  Ok(self
211
218
  .client
@@ -302,6 +309,7 @@ impl WorkerClient for WorkerClientBag {
302
309
  identity: self.identity.clone(),
303
310
  binary_checksum: self.worker_build_id.clone(),
304
311
  namespace: self.namespace.clone(),
312
+ messages: vec![],
305
313
  };
306
314
  Ok(self
307
315
  .client
@@ -352,6 +360,10 @@ impl WorkerClient for WorkerClientBag {
352
360
  .await?
353
361
  .into_inner())
354
362
  }
363
+
364
+ fn capabilities(&self) -> Option<&Capabilities> {
365
+ self.client.get_client().inner().capabilities()
366
+ }
355
367
  }
356
368
 
357
369
  /// A version of [RespondWorkflowTaskCompletedRequest] that will finish being filled out by the
@@ -370,4 +382,8 @@ pub(crate) struct WorkflowTaskCompletion {
370
382
  pub return_new_workflow_task: bool,
371
383
  /// Force a new WFT to be created after this completion
372
384
  pub force_create_new_workflow_task: bool,
385
+ /// SDK-specific metadata to send
386
+ pub sdk_metadata: WorkflowTaskCompletedMetadata,
387
+ /// Metering info
388
+ pub metering_metadata: MeteringMetadata,
373
389
  }