@temporalio/core-bridge 1.9.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/Cargo.lock +754 -473
  2. package/Cargo.toml +3 -3
  3. package/lib/index.d.ts +33 -2
  4. package/lib/index.js.map +1 -1
  5. package/package.json +4 -4
  6. package/releases/aarch64-apple-darwin/index.node +0 -0
  7. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  8. package/releases/x86_64-apple-darwin/index.node +0 -0
  9. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  10. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  11. package/scripts/build.js +4 -3
  12. package/sdk-core/.cargo/config.toml +2 -4
  13. package/sdk-core/.github/workflows/heavy.yml +1 -1
  14. package/sdk-core/.github/workflows/per-pr.yml +6 -4
  15. package/sdk-core/Cargo.toml +10 -3
  16. package/sdk-core/README.md +4 -6
  17. package/sdk-core/client/Cargo.toml +13 -5
  18. package/sdk-core/client/src/lib.rs +123 -34
  19. package/sdk-core/client/src/metrics.rs +70 -18
  20. package/sdk-core/client/src/proxy.rs +85 -0
  21. package/sdk-core/client/src/raw.rs +67 -5
  22. package/sdk-core/client/src/worker_registry/mod.rs +5 -3
  23. package/sdk-core/client/src/workflow_handle/mod.rs +3 -1
  24. package/sdk-core/core/Cargo.toml +31 -37
  25. package/sdk-core/core/src/abstractions/take_cell.rs +3 -3
  26. package/sdk-core/core/src/abstractions.rs +176 -108
  27. package/sdk-core/core/src/core_tests/activity_tasks.rs +4 -13
  28. package/sdk-core/core/src/core_tests/determinism.rs +2 -1
  29. package/sdk-core/core/src/core_tests/local_activities.rs +3 -3
  30. package/sdk-core/core/src/core_tests/mod.rs +3 -3
  31. package/sdk-core/core/src/core_tests/queries.rs +42 -5
  32. package/sdk-core/core/src/core_tests/workers.rs +2 -3
  33. package/sdk-core/core/src/core_tests/workflow_tasks.rs +115 -15
  34. package/sdk-core/core/src/ephemeral_server/mod.rs +109 -136
  35. package/sdk-core/core/src/internal_flags.rs +8 -8
  36. package/sdk-core/core/src/lib.rs +16 -11
  37. package/sdk-core/core/src/pollers/mod.rs +11 -5
  38. package/sdk-core/core/src/pollers/poll_buffer.rs +48 -29
  39. package/sdk-core/core/src/protosext/mod.rs +32 -32
  40. package/sdk-core/core/src/protosext/protocol_messages.rs +14 -24
  41. package/sdk-core/core/src/retry_logic.rs +2 -2
  42. package/sdk-core/core/src/telemetry/log_export.rs +10 -9
  43. package/sdk-core/core/src/telemetry/metrics.rs +233 -330
  44. package/sdk-core/core/src/telemetry/mod.rs +11 -38
  45. package/sdk-core/core/src/telemetry/otel.rs +355 -0
  46. package/sdk-core/core/src/telemetry/prometheus_server.rs +36 -23
  47. package/sdk-core/core/src/test_help/mod.rs +80 -59
  48. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +6 -6
  49. package/sdk-core/core/src/worker/activities/local_activities.rs +46 -43
  50. package/sdk-core/core/src/worker/activities.rs +45 -46
  51. package/sdk-core/core/src/worker/client/mocks.rs +8 -7
  52. package/sdk-core/core/src/worker/client.rs +40 -39
  53. package/sdk-core/core/src/worker/mod.rs +72 -42
  54. package/sdk-core/core/src/worker/slot_provider.rs +28 -28
  55. package/sdk-core/core/src/worker/slot_supplier.rs +1 -0
  56. package/sdk-core/core/src/worker/tuner/fixed_size.rs +52 -0
  57. package/sdk-core/core/src/worker/tuner/resource_based.rs +561 -0
  58. package/sdk-core/core/src/worker/tuner.rs +122 -0
  59. package/sdk-core/core/src/worker/workflow/driven_workflow.rs +6 -6
  60. package/sdk-core/core/src/worker/workflow/history_update.rs +27 -53
  61. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +4 -17
  62. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -10
  63. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +4 -11
  64. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +17 -35
  65. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +0 -8
  66. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +1 -5
  67. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +0 -5
  68. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +0 -5
  69. package/sdk-core/core/src/worker/workflow/machines/mod.rs +0 -14
  70. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +0 -5
  71. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +0 -5
  72. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -10
  73. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +3 -10
  74. package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +12 -8
  75. package/sdk-core/core/src/worker/workflow/machines/update_state_machine.rs +0 -10
  76. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +6 -13
  77. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +27 -37
  78. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +3 -14
  79. package/sdk-core/core/src/worker/workflow/managed_run.rs +84 -54
  80. package/sdk-core/core/src/worker/workflow/mod.rs +63 -160
  81. package/sdk-core/core/src/worker/workflow/run_cache.rs +22 -13
  82. package/sdk-core/core/src/worker/workflow/wft_extraction.rs +16 -3
  83. package/sdk-core/core/src/worker/workflow/wft_poller.rs +15 -12
  84. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +39 -78
  85. package/sdk-core/core-api/Cargo.toml +6 -5
  86. package/sdk-core/core-api/src/errors.rs +8 -0
  87. package/sdk-core/core-api/src/telemetry/metrics.rs +75 -4
  88. package/sdk-core/core-api/src/telemetry.rs +7 -1
  89. package/sdk-core/core-api/src/worker.rs +212 -56
  90. package/sdk-core/fsm/Cargo.toml +3 -0
  91. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
  92. package/sdk-core/sdk/Cargo.toml +5 -7
  93. package/sdk-core/sdk/src/app_data.rs +3 -3
  94. package/sdk-core/sdk/src/lib.rs +5 -3
  95. package/sdk-core/sdk/src/workflow_context/options.rs +1 -1
  96. package/sdk-core/sdk/src/workflow_context.rs +10 -9
  97. package/sdk-core/sdk/src/workflow_future.rs +1 -1
  98. package/sdk-core/sdk-core-protos/Cargo.toml +8 -6
  99. package/sdk-core/sdk-core-protos/build.rs +1 -10
  100. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/PULL_REQUEST_TEMPLATE.md +3 -0
  101. package/sdk-core/sdk-core-protos/protos/api_upstream/.github/workflows/ci.yml +26 -0
  102. package/sdk-core/sdk-core-protos/protos/api_upstream/Makefile +42 -20
  103. package/sdk-core/sdk-core-protos/protos/api_upstream/README.md +2 -0
  104. package/sdk-core/sdk-core-protos/protos/api_upstream/api-linter.yaml +36 -26
  105. package/sdk-core/sdk-core-protos/protos/api_upstream/buf.lock +2 -0
  106. package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/struct.proto +95 -0
  107. package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv2.json +9632 -0
  108. package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv3.yaml +7337 -0
  109. package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/payload_description.txt +2 -0
  110. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/command/v1/message.proto +45 -11
  111. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/common/v1/message.proto +22 -4
  112. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/command_type.proto +2 -0
  113. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/common.proto +44 -0
  114. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/event_type.proto +18 -3
  115. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +20 -0
  116. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +30 -0
  117. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/update.proto +7 -8
  118. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/workflow.proto +23 -5
  119. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/errordetails/v1/message.proto +20 -0
  120. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/failure/v1/message.proto +25 -0
  121. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/history/v1/message.proto +141 -15
  122. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/namespace/v1/message.proto +12 -0
  123. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/nexus/v1/message.proto +193 -0
  124. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +73 -6
  125. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +46 -4
  126. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/schedule/v1/message.proto +4 -0
  127. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/sdk/v1/workflow_metadata.proto +2 -2
  128. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +116 -0
  129. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflow/v1/message.proto +134 -0
  130. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +274 -29
  131. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +57 -1
  132. package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +10 -12
  133. package/sdk-core/sdk-core-protos/src/history_builder.rs +1 -1
  134. package/sdk-core/sdk-core-protos/src/lib.rs +54 -51
  135. package/sdk-core/sdk-core-protos/src/task_token.rs +11 -2
  136. package/sdk-core/test-utils/Cargo.toml +7 -4
  137. package/sdk-core/test-utils/src/histfetch.rs +1 -1
  138. package/sdk-core/test-utils/src/lib.rs +44 -62
  139. package/sdk-core/tests/fuzzy_workflow.rs +5 -2
  140. package/sdk-core/tests/heavy_tests.rs +114 -17
  141. package/sdk-core/tests/integ_tests/activity_functions.rs +1 -1
  142. package/sdk-core/tests/integ_tests/client_tests.rs +2 -2
  143. package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +38 -26
  144. package/sdk-core/tests/integ_tests/metrics_tests.rs +126 -17
  145. package/sdk-core/tests/integ_tests/polling_tests.rs +118 -2
  146. package/sdk-core/tests/integ_tests/update_tests.rs +3 -5
  147. package/sdk-core/tests/integ_tests/visibility_tests.rs +3 -3
  148. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +1 -1
  149. package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +1 -1
  150. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -1
  151. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
  152. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +3 -3
  153. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +5 -4
  154. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -2
  155. package/sdk-core/tests/integ_tests/workflow_tests/eager.rs +6 -10
  156. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +9 -7
  157. package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +1 -1
  158. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +14 -9
  159. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
  160. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +6 -13
  161. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +9 -6
  162. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +5 -5
  163. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +1 -1
  164. package/sdk-core/tests/integ_tests/workflow_tests.rs +115 -11
  165. package/sdk-core/tests/main.rs +2 -2
  166. package/src/conversions.rs +57 -0
  167. package/src/lib.rs +1 -0
  168. package/src/runtime.rs +51 -35
  169. package/ts/index.ts +67 -3
  170. package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +0 -117
  171. package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +0 -24
  172. package/sdk-core/sdk/src/payload_converter.rs +0 -11
  173. package/sdk-core/sdk-core-protos/protos/api_upstream/.buildkite/Dockerfile +0 -2
  174. package/sdk-core/sdk-core-protos/protos/api_upstream/.buildkite/docker-compose.yml +0 -15
  175. package/sdk-core/sdk-core-protos/protos/api_upstream/.buildkite/pipeline.yml +0 -10
  176. package/sdk-core/test-utils/src/wf_input_saver.rs +0 -50
  177. package/sdk-core/tests/wf_input_replay.rs +0 -32
@@ -0,0 +1,561 @@
1
+ use crossbeam_utils::atomic::AtomicCell;
2
+ use parking_lot::Mutex;
3
+ use std::{
4
+ marker::PhantomData,
5
+ sync::{
6
+ atomic::{AtomicU64, Ordering},
7
+ Arc, OnceLock,
8
+ },
9
+ time::{Duration, Instant},
10
+ };
11
+ use temporal_sdk_core_api::{
12
+ telemetry::metrics::{CoreMeter, GaugeF64, MetricAttributes, TemporalMeter},
13
+ worker::{
14
+ ActivitySlotKind, LocalActivitySlotKind, SlotKind, SlotReservationContext, SlotSupplier,
15
+ SlotSupplierPermit, WorkerTuner, WorkflowSlotKind,
16
+ },
17
+ };
18
+ use tokio::{sync::watch, task::JoinHandle};
19
+
20
+ /// Implements [WorkerTuner] and attempts to maintain certain levels of resource usage when
21
+ /// under load.
22
+ ///
23
+ /// It does so by using two PID controllers, one for memory and one for CPU, which are fed the
24
+ /// current usage levels of their respective resource as measurements. The user specifies a target
25
+ /// threshold for each, and slots are handed out if the output of both PID controllers is above some
26
+ /// defined threshold. See [ResourceBasedSlotsOptions] for the default PID controller settings.
27
+ pub struct ResourceBasedTuner<MI> {
28
+ slots: Arc<ResourceBasedSlots<MI>>,
29
+ wf_opts: Option<ResourceSlotOptions>,
30
+ act_opts: Option<ResourceSlotOptions>,
31
+ la_opts: Option<ResourceSlotOptions>,
32
+ }
33
+
34
+ impl<MI> ResourceBasedTuner<MI> {
35
+ /// Build a new tuner from a [ResourceBasedSlots] instance
36
+ pub fn new(resourcer: ResourceBasedSlots<MI>) -> Self {
37
+ Self {
38
+ slots: Arc::new(resourcer),
39
+ wf_opts: None,
40
+ act_opts: None,
41
+ la_opts: None,
42
+ }
43
+ }
44
+
45
+ /// Set workflow slot options
46
+ pub fn with_workflow_slots_options(&mut self, opts: ResourceSlotOptions) -> &mut Self {
47
+ self.wf_opts = Some(opts);
48
+ self
49
+ }
50
+
51
+ /// Set activity slot options
52
+ pub fn with_activity_slots_options(&mut self, opts: ResourceSlotOptions) -> &mut Self {
53
+ self.act_opts = Some(opts);
54
+ self
55
+ }
56
+
57
+ /// Set local activity slot options
58
+ pub fn with_local_activity_slots_options(&mut self, opts: ResourceSlotOptions) -> &mut Self {
59
+ self.la_opts = Some(opts);
60
+ self
61
+ }
62
+ }
63
+
64
+ const DEFAULT_WF_SLOT_OPTS: ResourceSlotOptions = ResourceSlotOptions {
65
+ min_slots: 2,
66
+ max_slots: 10_000,
67
+ ramp_throttle: Duration::from_millis(0),
68
+ };
69
+ const DEFAULT_ACT_SLOT_OPTS: ResourceSlotOptions = ResourceSlotOptions {
70
+ min_slots: 1,
71
+ max_slots: 10_000,
72
+ ramp_throttle: Duration::from_millis(50),
73
+ };
74
+
75
+ /// Options for a specific slot type
76
+ #[derive(Debug, Clone, Copy, derive_more::Constructor)]
77
+ pub struct ResourceSlotOptions {
78
+ /// Amount of slots of this type that will be issued regardless of any other checks
79
+ min_slots: usize,
80
+ /// Maximum amount of slots of this type permitted
81
+ max_slots: usize,
82
+ /// Minimum time we will wait (after passing the minimum slots number) between handing out new
83
+ /// slots
84
+ ramp_throttle: Duration,
85
+ }
86
+
87
+ /// Implements [SlotSupplier] and attempts to maintain certain levels of resource usage when
88
+ /// under load.
89
+ ///
90
+ /// It does so by using two PID controllers, one for memory and one for CPU, which are fed the
91
+ /// current usage levels of their respective resource as measurements. The user specifies a target
92
+ /// threshold for each, and slots are handed out if the output of both PID controllers is above some
93
+ /// defined threshold. See [ResourceBasedSlotsOptions] for the default PID controller settings.
94
+ pub struct ResourceBasedSlots<MI> {
95
+ options: ResourceBasedSlotsOptions,
96
+ sys_info_supplier: MI,
97
+ metrics: OnceLock<JoinHandle<()>>,
98
+ pids: Mutex<PidControllers>,
99
+ last_metric_vals: Arc<AtomicCell<LastMetricVals>>,
100
+ }
101
+ /// Wraps [ResourceBasedSlots] for a specific slot type
102
+ pub struct ResourceBasedSlotsForType<MI, SK> {
103
+ inner: Arc<ResourceBasedSlots<MI>>,
104
+
105
+ opts: ResourceSlotOptions,
106
+
107
+ last_slot_issued_tx: watch::Sender<Instant>,
108
+ last_slot_issued_rx: watch::Receiver<Instant>,
109
+ _slot_kind: PhantomData<SK>,
110
+ }
111
+ #[derive(Clone, Debug, derive_builder::Builder)]
112
+ #[non_exhaustive]
113
+ pub struct ResourceBasedSlotsOptions {
114
+ /// A value in the range [0.0, 1.0] representing the target memory usage.
115
+ target_mem_usage: f64,
116
+ /// A value in the range [0.0, 1.0] representing the target CPU usage.
117
+ target_cpu_usage: f64,
118
+
119
+ #[builder(default = "5.0")]
120
+ pub mem_p_gain: f64,
121
+ #[builder(default = "0.0")]
122
+ pub mem_i_gain: f64,
123
+ #[builder(default = "1.0")]
124
+ pub mem_d_gain: f64,
125
+ /// If the mem PID controller outputs a value higher than this, we say the mem half of things
126
+ /// will allow a slot
127
+ #[builder(default = "0.25")]
128
+ pub mem_output_threshold: f64,
129
+ #[builder(default = "5.0")]
130
+ pub cpu_p_gain: f64,
131
+ #[builder(default = "0.0")]
132
+ pub cpu_i_gain: f64,
133
+ #[builder(default = "1.0")]
134
+ pub cpu_d_gain: f64,
135
+ /// If the CPU PID controller outputs a value higher than this, we say the CPU half of things
136
+ /// will allow a slot
137
+ #[builder(default = "0.05")]
138
+ pub cpu_output_threshold: f64,
139
+ }
140
+ struct PidControllers {
141
+ mem: pid::Pid<f64>,
142
+ cpu: pid::Pid<f64>,
143
+ }
144
+ struct MetricInstruments {
145
+ attribs: MetricAttributes,
146
+ mem_usage: Arc<dyn GaugeF64>,
147
+ cpu_usage: Arc<dyn GaugeF64>,
148
+ mem_pid_output: Arc<dyn GaugeF64>,
149
+ cpu_pid_output: Arc<dyn GaugeF64>,
150
+ }
151
+ #[derive(Clone, Copy, Default)]
152
+ struct LastMetricVals {
153
+ mem_output: f64,
154
+ cpu_output: f64,
155
+ mem_used_percent: f64,
156
+ cpu_used_percent: f64,
157
+ }
158
+
159
+ impl ResourceBasedSlots<RealSysInfo> {
160
+ /// Create an instance attempting to target the provided memory and cpu thresholds as values
161
+ /// between 0 and 1.
162
+ pub fn new(target_mem_usage: f64, target_cpu_usage: f64) -> Self {
163
+ let opts = ResourceBasedSlotsOptionsBuilder::default()
164
+ .target_mem_usage(target_mem_usage)
165
+ .target_cpu_usage(target_cpu_usage)
166
+ .build()
167
+ .expect("default resource based slot options can't fail to build");
168
+ Self::new_with_sysinfo(opts, RealSysInfo::new())
169
+ }
170
+
171
+ /// Create an instance using the fully configurable set of PID controller options
172
+ pub fn new_from_options(options: ResourceBasedSlotsOptions) -> Self {
173
+ Self::new_with_sysinfo(options, RealSysInfo::new())
174
+ }
175
+ }
176
+
177
+ impl PidControllers {
178
+ fn new(options: &ResourceBasedSlotsOptions) -> Self {
179
+ let mut mem = pid::Pid::new(options.target_mem_usage, 100.0);
180
+ mem.p(options.mem_p_gain, 100)
181
+ .i(options.mem_i_gain, 100)
182
+ .d(options.mem_d_gain, 100);
183
+ let mut cpu = pid::Pid::new(options.target_cpu_usage, 100.0);
184
+ cpu.p(options.cpu_p_gain, 100)
185
+ .i(options.cpu_i_gain, 100)
186
+ .d(options.cpu_d_gain, 100);
187
+ Self { mem, cpu }
188
+ }
189
+ }
190
+
191
+ impl MetricInstruments {
192
+ fn new(meter: TemporalMeter) -> Self {
193
+ let mem_usage = meter.inner.gauge_f64("resource_slots_mem_usage".into());
194
+ let cpu_usage = meter.inner.gauge_f64("resource_slots_cpu_usage".into());
195
+ let mem_pid_output = meter
196
+ .inner
197
+ .gauge_f64("resource_slots_mem_pid_output".into());
198
+ let cpu_pid_output = meter
199
+ .inner
200
+ .gauge_f64("resource_slots_cpu_pid_output".into());
201
+ let attribs = meter.inner.new_attributes(meter.default_attribs);
202
+ Self {
203
+ attribs,
204
+ mem_usage,
205
+ cpu_usage,
206
+ mem_pid_output,
207
+ cpu_pid_output,
208
+ }
209
+ }
210
+ }
211
+
212
+ /// Implementors provide information about system resource usage
213
+ pub trait SystemResourceInfo {
214
+ /// Return total available system memory in bytes
215
+ fn total_mem(&self) -> u64;
216
+ /// Return memory used by the system in bytes
217
+ fn used_mem(&self) -> u64;
218
+ /// Return system used CPU as a float in the range [0.0, 1.0] where 1.0 is defined as all
219
+ /// cores pegged
220
+ fn used_cpu_percent(&self) -> f64;
221
+ /// Return system used memory as a float in the range [0.0, 1.0]
222
+ fn used_mem_percent(&self) -> f64 {
223
+ self.used_mem() as f64 / self.total_mem() as f64
224
+ }
225
+ }
226
+
227
+ #[async_trait::async_trait]
228
+ impl<MI, SK> SlotSupplier for ResourceBasedSlotsForType<MI, SK>
229
+ where
230
+ MI: SystemResourceInfo + Send + Sync + 'static,
231
+ SK: SlotKind + Send + Sync,
232
+ {
233
+ type SlotKind = SK;
234
+
235
+ async fn reserve_slot(&self, ctx: &dyn SlotReservationContext) -> SlotSupplierPermit {
236
+ loop {
237
+ if ctx.num_issued_slots() < self.opts.min_slots {
238
+ return self.issue_slot();
239
+ } else {
240
+ let must_wait_for = self
241
+ .opts
242
+ .ramp_throttle
243
+ .saturating_sub(self.time_since_last_issued());
244
+ if must_wait_for > Duration::from_millis(0) {
245
+ tokio::time::sleep(must_wait_for).await;
246
+ }
247
+ if let Some(p) = self.try_reserve_slot(ctx) {
248
+ return p;
249
+ } else {
250
+ tokio::time::sleep(Duration::from_millis(10)).await;
251
+ }
252
+ }
253
+ }
254
+ }
255
+
256
+ fn try_reserve_slot(&self, ctx: &dyn SlotReservationContext) -> Option<SlotSupplierPermit> {
257
+ let num_issued = ctx.num_issued_slots();
258
+ if num_issued < self.opts.min_slots
259
+ || (self.time_since_last_issued() > self.opts.ramp_throttle
260
+ && num_issued < self.opts.max_slots
261
+ && self.inner.pid_decision()
262
+ && self.inner.can_reserve())
263
+ {
264
+ Some(self.issue_slot())
265
+ } else {
266
+ None
267
+ }
268
+ }
269
+
270
+ fn mark_slot_used(&self, _info: SK::Info<'_>) {}
271
+
272
+ fn release_slot(&self) {}
273
+ }
274
+
275
+ impl<MI, SK> ResourceBasedSlotsForType<MI, SK>
276
+ where
277
+ MI: Send + Sync + SystemResourceInfo,
278
+ SK: Send + SlotKind + Sync,
279
+ {
280
+ }
281
+
282
+ impl<MI, SK> ResourceBasedSlotsForType<MI, SK>
283
+ where
284
+ MI: SystemResourceInfo + Send + Sync,
285
+ SK: SlotKind + Send + Sync,
286
+ {
287
+ fn new(inner: Arc<ResourceBasedSlots<MI>>, opts: ResourceSlotOptions) -> Self {
288
+ let (tx, rx) = watch::channel(Instant::now());
289
+ Self {
290
+ opts,
291
+ last_slot_issued_tx: tx,
292
+ last_slot_issued_rx: rx,
293
+ inner,
294
+ _slot_kind: PhantomData,
295
+ }
296
+ }
297
+
298
+ fn issue_slot(&self) -> SlotSupplierPermit {
299
+ let _ = self.last_slot_issued_tx.send(Instant::now());
300
+ SlotSupplierPermit::default()
301
+ }
302
+
303
+ fn time_since_last_issued(&self) -> Duration {
304
+ Instant::now()
305
+ .checked_duration_since(*self.last_slot_issued_rx.borrow())
306
+ .unwrap_or_default()
307
+ }
308
+ }
309
+
310
+ impl<MI: SystemResourceInfo + Sync + Send + 'static> WorkerTuner for ResourceBasedTuner<MI> {
311
+ fn workflow_task_slot_supplier(
312
+ &self,
313
+ ) -> Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync> {
314
+ let o = self.wf_opts.unwrap_or(DEFAULT_WF_SLOT_OPTS);
315
+ self.slots.as_kind(o)
316
+ }
317
+
318
+ fn activity_task_slot_supplier(
319
+ &self,
320
+ ) -> Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync> {
321
+ let o = self.act_opts.unwrap_or(DEFAULT_ACT_SLOT_OPTS);
322
+ self.slots.as_kind(o)
323
+ }
324
+
325
+ fn local_activity_slot_supplier(
326
+ &self,
327
+ ) -> Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync> {
328
+ let o = self.la_opts.unwrap_or(DEFAULT_ACT_SLOT_OPTS);
329
+ self.slots.as_kind(o)
330
+ }
331
+
332
+ fn attach_metrics(&self, metrics: TemporalMeter) {
333
+ self.slots.attach_metrics(metrics);
334
+ }
335
+ }
336
+
337
+ impl<MI: SystemResourceInfo + Sync + Send> ResourceBasedSlots<MI> {
338
+ /// Create a [ResourceBasedSlotsForType] for this instance which is willing to hand out
339
+ /// `minimum` slots with no checks at all and `max` slots ever. Otherwise the underlying
340
+ /// mem/cpu targets will attempt to be matched while under load.
341
+ ///
342
+ /// `ramp_throttle` determines how long this will pause for between making determinations about
343
+ /// whether it is OK to hand out new slot(s). This is important to set to nonzero in situations
344
+ /// where activities might use a lot of resources, because otherwise the implementation may
345
+ /// hand out many slots quickly before resource usage has a chance to be reflected, possibly
346
+ /// resulting in OOM (for example).
347
+ pub fn as_kind<SK: SlotKind + Send + Sync>(
348
+ self: &Arc<Self>,
349
+ opts: ResourceSlotOptions,
350
+ ) -> Arc<ResourceBasedSlotsForType<MI, SK>> {
351
+ Arc::new(ResourceBasedSlotsForType::new(self.clone(), opts))
352
+ }
353
+
354
+ fn new_with_sysinfo(options: ResourceBasedSlotsOptions, sys_info: MI) -> Self {
355
+ Self {
356
+ pids: Mutex::new(PidControllers::new(&options)),
357
+ options,
358
+ metrics: OnceLock::new(),
359
+ sys_info_supplier: sys_info,
360
+ last_metric_vals: Arc::new(AtomicCell::new(Default::default())),
361
+ }
362
+ }
363
+
364
+ fn can_reserve(&self) -> bool {
365
+ self.sys_info_supplier.used_mem_percent() <= self.options.target_mem_usage
366
+ }
367
+
368
+ /// Returns true if the pid controllers think a new slot should be given out
369
+ fn pid_decision(&self) -> bool {
370
+ let mut pids = self.pids.lock();
371
+ let mem_used_percent = self.sys_info_supplier.used_mem_percent();
372
+ let cpu_used_percent = self.sys_info_supplier.used_cpu_percent();
373
+ let mem_output = pids.mem.next_control_output(mem_used_percent).output;
374
+ let cpu_output = pids.cpu.next_control_output(cpu_used_percent).output;
375
+ self.last_metric_vals.store(LastMetricVals {
376
+ mem_output,
377
+ cpu_output,
378
+ mem_used_percent,
379
+ cpu_used_percent,
380
+ });
381
+ mem_output > self.options.mem_output_threshold
382
+ && cpu_output > self.options.cpu_output_threshold
383
+ }
384
+
385
+ fn attach_metrics(&self, metrics: TemporalMeter) {
386
+ // Launch a task to periodically emit metrics
387
+ self.metrics.get_or_init(move || {
388
+ let m = MetricInstruments::new(metrics);
389
+ let last_vals = self.last_metric_vals.clone();
390
+ tokio::task::spawn(async move {
391
+ let mut interval = tokio::time::interval(Duration::from_secs(1));
392
+ loop {
393
+ let lv = last_vals.load();
394
+ m.mem_pid_output.record(lv.mem_output, &m.attribs);
395
+ m.cpu_pid_output.record(lv.cpu_output, &m.attribs);
396
+ m.mem_usage.record(lv.mem_used_percent * 100., &m.attribs);
397
+ m.cpu_usage.record(lv.cpu_used_percent * 100., &m.attribs);
398
+ interval.tick().await;
399
+ }
400
+ })
401
+ });
402
+ }
403
+ }
404
+
405
+ /// Implements [SystemResourceInfo] using the [sysinfo] crate
406
+ #[derive(Debug)]
407
+ pub struct RealSysInfo {
408
+ sys: Mutex<sysinfo::System>,
409
+ total_mem: u64,
410
+ cur_mem_usage: AtomicU64,
411
+ cur_cpu_usage: AtomicU64,
412
+ last_refresh: AtomicCell<Instant>,
413
+ }
414
+ impl RealSysInfo {
415
+ fn new() -> Self {
416
+ let mut sys = sysinfo::System::new();
417
+ sys.refresh_memory();
418
+ let total_mem = sys.total_memory();
419
+ let s = Self {
420
+ sys: Mutex::new(sys),
421
+ last_refresh: AtomicCell::new(Instant::now()),
422
+ cur_mem_usage: AtomicU64::new(0),
423
+ cur_cpu_usage: AtomicU64::new(0),
424
+ total_mem,
425
+ };
426
+ s.refresh();
427
+ s
428
+ }
429
+
430
+ fn refresh_if_needed(&self) {
431
+ // This is all quite expensive and meaningfully slows everything down if it's allowed to
432
+ // happen more often. A better approach than a lock would be needed to go faster.
433
+ if (Instant::now() - self.last_refresh.load()) > Duration::from_millis(100) {
434
+ self.refresh();
435
+ }
436
+ }
437
+
438
+ fn refresh(&self) {
439
+ let mut lock = self.sys.lock();
440
+ lock.refresh_memory();
441
+ lock.refresh_cpu_usage();
442
+ let mem = lock.used_memory();
443
+ let cpu = lock.global_cpu_info().cpu_usage() as f64 / 100.;
444
+ self.cur_mem_usage.store(mem, Ordering::Release);
445
+ self.cur_cpu_usage.store(cpu.to_bits(), Ordering::Release);
446
+ self.last_refresh.store(Instant::now());
447
+ }
448
+ }
449
+ impl SystemResourceInfo for RealSysInfo {
450
+ fn total_mem(&self) -> u64 {
451
+ self.total_mem
452
+ }
453
+
454
+ fn used_mem(&self) -> u64 {
455
+ self.refresh_if_needed();
456
+ self.cur_mem_usage.load(Ordering::Acquire)
457
+ }
458
+
459
+ fn used_cpu_percent(&self) -> f64 {
460
+ self.refresh_if_needed();
461
+ f64::from_bits(self.cur_cpu_usage.load(Ordering::Acquire))
462
+ }
463
+ }
464
+
465
+ #[cfg(test)]
466
+ mod tests {
467
+ use super::*;
468
+ use crate::{abstractions::MeteredPermitDealer, telemetry::metrics::MetricsContext};
469
+ use std::sync::{
470
+ atomic::{AtomicU64, Ordering},
471
+ Arc,
472
+ };
473
+ use temporal_sdk_core_api::worker::WorkflowSlotKind;
474
+
475
+ struct FakeMIS {
476
+ used: Arc<AtomicU64>,
477
+ }
478
+ impl FakeMIS {
479
+ fn new() -> (Self, Arc<AtomicU64>) {
480
+ let used = Arc::new(AtomicU64::new(0));
481
+ (Self { used: used.clone() }, used)
482
+ }
483
+ }
484
+ impl SystemResourceInfo for FakeMIS {
485
+ fn total_mem(&self) -> u64 {
486
+ 100_000
487
+ }
488
+
489
+ fn used_mem(&self) -> u64 {
490
+ self.used.load(Ordering::Acquire)
491
+ }
492
+
493
+ fn used_cpu_percent(&self) -> f64 {
494
+ 0.0
495
+ }
496
+ }
497
+
498
+ fn test_options() -> ResourceBasedSlotsOptions {
499
+ ResourceBasedSlotsOptionsBuilder::default()
500
+ .target_mem_usage(0.8)
501
+ .target_cpu_usage(1.0)
502
+ .build()
503
+ .expect("default resource based slot options can't fail to build")
504
+ }
505
+
506
+ #[test]
507
+ fn mem_workflow_sync() {
508
+ let (fmis, used) = FakeMIS::new();
509
+ let rbs = Arc::new(ResourceBasedSlots::new_with_sysinfo(test_options(), fmis))
510
+ .as_kind::<WorkflowSlotKind>(ResourceSlotOptions {
511
+ min_slots: 0,
512
+ max_slots: 100,
513
+ ramp_throttle: Duration::from_millis(0),
514
+ });
515
+ let pd = MeteredPermitDealer::new(rbs.clone(), MetricsContext::no_op(), None);
516
+ assert!(rbs.try_reserve_slot(&pd).is_some());
517
+ used.store(90_000, Ordering::Release);
518
+ assert!(rbs.try_reserve_slot(&pd).is_none());
519
+ }
520
+
521
+ #[tokio::test]
522
+ async fn mem_workflow_async() {
523
+ let (fmis, used) = FakeMIS::new();
524
+ used.store(90_000, Ordering::Release);
525
+ let rbs = Arc::new(ResourceBasedSlots::new_with_sysinfo(test_options(), fmis))
526
+ .as_kind::<WorkflowSlotKind>(ResourceSlotOptions {
527
+ min_slots: 0,
528
+ max_slots: 100,
529
+ ramp_throttle: Duration::from_millis(0),
530
+ });
531
+ let pd = MeteredPermitDealer::new(rbs.clone(), MetricsContext::no_op(), None);
532
+ let order = crossbeam_queue::ArrayQueue::new(2);
533
+ let waits_free = async {
534
+ rbs.reserve_slot(&pd).await;
535
+ order.push(2).unwrap();
536
+ };
537
+ let frees = async {
538
+ used.store(70_000, Ordering::Release);
539
+ order.push(1).unwrap();
540
+ };
541
+ tokio::join!(waits_free, frees);
542
+ assert_eq!(order.pop(), Some(1));
543
+ assert_eq!(order.pop(), Some(2));
544
+ }
545
+
546
+ #[test]
547
+ fn minimum_respected() {
548
+ let (fmis, used) = FakeMIS::new();
549
+ let rbs = Arc::new(ResourceBasedSlots::new_with_sysinfo(test_options(), fmis))
550
+ .as_kind::<WorkflowSlotKind>(ResourceSlotOptions {
551
+ min_slots: 2,
552
+ max_slots: 100,
553
+ ramp_throttle: Duration::from_millis(0),
554
+ });
555
+ let pd = MeteredPermitDealer::new(rbs.clone(), MetricsContext::no_op(), None);
556
+ used.store(90_000, Ordering::Release);
557
+ let _p1 = pd.try_acquire_owned().unwrap();
558
+ let _p2 = pd.try_acquire_owned().unwrap();
559
+ assert!(pd.try_acquire_owned().is_err());
560
+ }
561
+ }
@@ -0,0 +1,122 @@
1
+ mod fixed_size;
2
+ mod resource_based;
3
+
4
+ pub use fixed_size::FixedSizeSlotSupplier;
5
+ pub use resource_based::{
6
+ RealSysInfo, ResourceBasedSlots, ResourceBasedTuner, ResourceSlotOptions,
7
+ };
8
+
9
+ use std::sync::{Arc, OnceLock};
10
+ use temporal_sdk_core_api::{
11
+ telemetry::metrics::TemporalMeter,
12
+ worker::{
13
+ ActivitySlotKind, LocalActivitySlotKind, SlotSupplier, WorkerConfig, WorkerTuner,
14
+ WorkflowSlotKind,
15
+ },
16
+ };
17
+
18
+ /// Allows for the composition of different slot suppliers into a [WorkerTuner]
19
+ pub struct TunerHolder {
20
+ wft_supplier: Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync>,
21
+ act_supplier: Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>,
22
+ la_supplier: Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>,
23
+ metrics: OnceLock<TemporalMeter>,
24
+ }
25
+
26
+ /// Can be used to construct a `TunerHolder` from individual slot suppliers. Any supplier which is
27
+ /// not provided will default to a [FixedSizeSlotSupplier] with a capacity of 100.
28
+ #[derive(Default, Clone)]
29
+ pub struct TunerBuilder {
30
+ workflow_slot_supplier:
31
+ Option<Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync>>,
32
+ activity_slot_supplier:
33
+ Option<Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>>,
34
+ local_activity_slot_supplier:
35
+ Option<Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>>,
36
+ }
37
+
38
+ impl TunerBuilder {
39
+ pub(crate) fn from_config(cfg: &WorkerConfig) -> Self {
40
+ let mut builder = Self::default();
41
+ if let Some(m) = cfg.max_outstanding_workflow_tasks {
42
+ builder.workflow_slot_supplier(Arc::new(FixedSizeSlotSupplier::new(m)));
43
+ }
44
+ if let Some(m) = cfg.max_outstanding_activities {
45
+ builder.activity_slot_supplier(Arc::new(FixedSizeSlotSupplier::new(m)));
46
+ }
47
+ if let Some(m) = cfg.max_outstanding_local_activities {
48
+ builder.local_activity_slot_supplier(Arc::new(FixedSizeSlotSupplier::new(m)));
49
+ }
50
+ builder
51
+ }
52
+
53
+ /// Set a workflow slot supplier
54
+ pub fn workflow_slot_supplier(
55
+ &mut self,
56
+ supplier: Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync>,
57
+ ) -> &mut Self {
58
+ self.workflow_slot_supplier = Some(supplier);
59
+ self
60
+ }
61
+
62
+ /// Set an activity slot supplier
63
+ pub fn activity_slot_supplier(
64
+ &mut self,
65
+ supplier: Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>,
66
+ ) -> &mut Self {
67
+ self.activity_slot_supplier = Some(supplier);
68
+ self
69
+ }
70
+
71
+ /// Set a local activity slot supplier
72
+ pub fn local_activity_slot_supplier(
73
+ &mut self,
74
+ supplier: Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>,
75
+ ) -> &mut Self {
76
+ self.local_activity_slot_supplier = Some(supplier);
77
+ self
78
+ }
79
+
80
+ /// Build a [WorkerTuner] from the configured slot suppliers
81
+ pub fn build(&mut self) -> Arc<dyn WorkerTuner + Send + Sync> {
82
+ Arc::new(TunerHolder {
83
+ wft_supplier: self
84
+ .workflow_slot_supplier
85
+ .clone()
86
+ .unwrap_or_else(|| Arc::new(FixedSizeSlotSupplier::new(100))),
87
+ act_supplier: self
88
+ .activity_slot_supplier
89
+ .clone()
90
+ .unwrap_or_else(|| Arc::new(FixedSizeSlotSupplier::new(100))),
91
+ la_supplier: self
92
+ .local_activity_slot_supplier
93
+ .clone()
94
+ .unwrap_or_else(|| Arc::new(FixedSizeSlotSupplier::new(100))),
95
+ metrics: OnceLock::new(),
96
+ })
97
+ }
98
+ }
99
+
100
+ impl WorkerTuner for TunerHolder {
101
+ fn workflow_task_slot_supplier(
102
+ &self,
103
+ ) -> Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync> {
104
+ self.wft_supplier.clone()
105
+ }
106
+
107
+ fn activity_task_slot_supplier(
108
+ &self,
109
+ ) -> Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync> {
110
+ self.act_supplier.clone()
111
+ }
112
+
113
+ fn local_activity_slot_supplier(
114
+ &self,
115
+ ) -> Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync> {
116
+ self.la_supplier.clone()
117
+ }
118
+
119
+ fn attach_metrics(&self, m: TemporalMeter) {
120
+ let _ = self.metrics.set(m);
121
+ }
122
+ }