@temporalio/core-bridge 1.4.4 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +327 -419
- package/Cargo.toml +1 -1
- package/index.js +25 -2
- package/lib/errors.d.ts +22 -0
- package/lib/errors.js +65 -0
- package/lib/errors.js.map +1 -0
- package/lib/index.d.ts +440 -0
- package/lib/index.js +8 -0
- package/lib/index.js.map +1 -0
- package/package.json +11 -5
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +1 -1
- package/sdk-core/.buildkite/docker/docker-compose.yaml +2 -2
- package/sdk-core/bridge-ffi/Cargo.toml +1 -1
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -25
- package/sdk-core/bridge-ffi/src/lib.rs +29 -108
- package/sdk-core/bridge-ffi/src/wrappers.rs +35 -25
- package/sdk-core/client/Cargo.toml +1 -1
- package/sdk-core/client/src/lib.rs +12 -20
- package/sdk-core/client/src/raw.rs +9 -8
- package/sdk-core/client/src/retry.rs +100 -23
- package/sdk-core/core/Cargo.toml +5 -5
- package/sdk-core/core/benches/workflow_replay.rs +13 -10
- package/sdk-core/core/src/abstractions.rs +22 -22
- package/sdk-core/core/src/core_tests/activity_tasks.rs +1 -1
- package/sdk-core/core/src/core_tests/local_activities.rs +228 -6
- package/sdk-core/core/src/core_tests/queries.rs +247 -89
- package/sdk-core/core/src/core_tests/workers.rs +2 -2
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +46 -27
- package/sdk-core/core/src/lib.rs +139 -32
- package/sdk-core/core/src/replay/mod.rs +185 -41
- package/sdk-core/core/src/telemetry/log_export.rs +190 -0
- package/sdk-core/core/src/telemetry/metrics.rs +184 -139
- package/sdk-core/core/src/telemetry/mod.rs +296 -318
- package/sdk-core/core/src/telemetry/prometheus_server.rs +4 -3
- package/sdk-core/core/src/test_help/mod.rs +9 -7
- package/sdk-core/core/src/worker/activities/local_activities.rs +2 -1
- package/sdk-core/core/src/worker/activities.rs +40 -23
- package/sdk-core/core/src/worker/client/mocks.rs +1 -1
- package/sdk-core/core/src/worker/client.rs +30 -4
- package/sdk-core/core/src/worker/mod.rs +22 -18
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +10 -19
- package/sdk-core/core/src/worker/workflow/history_update.rs +99 -25
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +2 -6
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +18 -21
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +12 -38
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +178 -0
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +8 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +232 -216
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +1 -6
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +4 -4
- package/sdk-core/core/src/worker/workflow/managed_run.rs +13 -5
- package/sdk-core/core/src/worker/workflow/mod.rs +61 -9
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +2 -2
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +56 -11
- package/sdk-core/core-api/Cargo.toml +4 -3
- package/sdk-core/core-api/src/lib.rs +1 -43
- package/sdk-core/core-api/src/telemetry.rs +147 -0
- package/sdk-core/core-api/src/worker.rs +13 -0
- package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
- package/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
- package/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
- package/sdk-core/protos/api_upstream/.github/CODEOWNERS +1 -1
- package/sdk-core/protos/api_upstream/buf.yaml +0 -3
- package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +3 -7
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +8 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +2 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +13 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +19 -59
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +0 -19
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +108 -29
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +1 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +47 -8
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +15 -1
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +2 -0
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +8 -1
- package/sdk-core/sdk/src/interceptors.rs +36 -3
- package/sdk-core/sdk/src/lib.rs +7 -4
- package/sdk-core/sdk/src/workflow_context.rs +13 -2
- package/sdk-core/sdk-core-protos/src/history_builder.rs +47 -1
- package/sdk-core/sdk-core-protos/src/history_info.rs +22 -22
- package/sdk-core/sdk-core-protos/src/lib.rs +49 -27
- package/sdk-core/test-utils/Cargo.toml +1 -0
- package/sdk-core/test-utils/src/lib.rs +81 -29
- package/sdk-core/tests/integ_tests/metrics_tests.rs +37 -0
- package/sdk-core/tests/integ_tests/polling_tests.rs +0 -13
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +145 -4
- package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +53 -0
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +106 -20
- package/sdk-core/tests/integ_tests/workflow_tests.rs +18 -8
- package/sdk-core/tests/main.rs +6 -4
- package/src/conversions.rs +52 -47
- package/src/errors.rs +28 -86
- package/src/helpers.rs +3 -4
- package/src/lib.rs +2 -2
- package/src/runtime.rs +132 -61
- package/src/testing.rs +7 -4
- package/src/worker.rs +67 -50
- package/ts/errors.ts +55 -0
- package/{index.d.ts → ts/index.ts} +121 -15
- package/sdk-core/core/src/log_export.rs +0 -62
- package/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +0 -127
- package/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +0 -71
- package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +0 -83
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +0 -40
|
@@ -1,38 +1,73 @@
|
|
|
1
|
-
use
|
|
2
|
-
use crate::telemetry::GLOBAL_TELEM_DAT;
|
|
3
|
-
use opentelemetry::sdk::metrics::aggregators::Aggregator;
|
|
4
|
-
use opentelemetry::sdk::metrics::sdk_api::{Descriptor, InstrumentKind};
|
|
1
|
+
use crate::telemetry::TelemetryInstance;
|
|
5
2
|
use opentelemetry::{
|
|
6
|
-
|
|
7
|
-
metrics::{Counter, Histogram, Meter},
|
|
3
|
+
metrics::{noop::NoopMeterProvider, Counter, Histogram, Meter, MeterProvider},
|
|
8
4
|
sdk::{
|
|
9
5
|
export::metrics::AggregatorSelector,
|
|
10
|
-
metrics::
|
|
6
|
+
metrics::{
|
|
7
|
+
aggregators::{histogram, last_value, sum, Aggregator},
|
|
8
|
+
sdk_api::{Descriptor, InstrumentKind},
|
|
9
|
+
},
|
|
11
10
|
},
|
|
12
11
|
Context, KeyValue,
|
|
13
12
|
};
|
|
14
13
|
use std::{sync::Arc, time::Duration};
|
|
14
|
+
use temporal_sdk_core_api::telemetry::CoreTelemetry;
|
|
15
15
|
|
|
16
16
|
/// Used to track context associated with metrics, and record/update them
|
|
17
17
|
///
|
|
18
18
|
/// Possible improvement: make generic over some type tag so that methods are only exposed if the
|
|
19
19
|
/// appropriate k/vs have already been set.
|
|
20
|
-
#[derive(
|
|
20
|
+
#[derive(Clone)]
|
|
21
21
|
pub(crate) struct MetricsContext {
|
|
22
22
|
ctx: Context,
|
|
23
23
|
kvs: Arc<Vec<KeyValue>>,
|
|
24
|
+
instruments: Arc<Instruments>,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
struct Instruments {
|
|
28
|
+
wf_completed_counter: Counter<u64>,
|
|
29
|
+
wf_canceled_counter: Counter<u64>,
|
|
30
|
+
wf_failed_counter: Counter<u64>,
|
|
31
|
+
wf_cont_counter: Counter<u64>,
|
|
32
|
+
wf_e2e_latency: Histogram<u64>,
|
|
33
|
+
wf_task_queue_poll_empty_counter: Counter<u64>,
|
|
34
|
+
wf_task_queue_poll_succeed_counter: Counter<u64>,
|
|
35
|
+
wf_task_execution_failure_counter: Counter<u64>,
|
|
36
|
+
wf_task_sched_to_start_latency: Histogram<u64>,
|
|
37
|
+
wf_task_replay_latency: Histogram<u64>,
|
|
38
|
+
wf_task_execution_latency: Histogram<u64>,
|
|
39
|
+
act_poll_no_task: Counter<u64>,
|
|
40
|
+
act_task_received_counter: Counter<u64>,
|
|
41
|
+
act_execution_failed: Counter<u64>,
|
|
42
|
+
act_sched_to_start_latency: Histogram<u64>,
|
|
43
|
+
act_exec_latency: Histogram<u64>,
|
|
44
|
+
worker_registered: Counter<u64>,
|
|
45
|
+
num_pollers: Histogram<u64>,
|
|
46
|
+
task_slots_available: Histogram<u64>,
|
|
47
|
+
sticky_cache_hit: Counter<u64>,
|
|
48
|
+
sticky_cache_miss: Counter<u64>,
|
|
49
|
+
sticky_cache_size: Histogram<u64>,
|
|
24
50
|
}
|
|
25
51
|
|
|
26
52
|
impl MetricsContext {
|
|
27
|
-
fn
|
|
53
|
+
pub(crate) fn no_op() -> Self {
|
|
28
54
|
Self {
|
|
29
|
-
ctx:
|
|
30
|
-
kvs:
|
|
55
|
+
ctx: Default::default(),
|
|
56
|
+
kvs: Default::default(),
|
|
57
|
+
instruments: Arc::new(Instruments::new_explicit(
|
|
58
|
+
&NoopMeterProvider::new().meter("fakemeter"),
|
|
59
|
+
"fakemetrics",
|
|
60
|
+
)),
|
|
31
61
|
}
|
|
32
62
|
}
|
|
33
63
|
|
|
34
|
-
pub(crate) fn top_level(namespace: String) -> Self {
|
|
35
|
-
|
|
64
|
+
pub(crate) fn top_level(namespace: String, telemetry: &TelemetryInstance) -> Self {
|
|
65
|
+
let kvs = vec![KeyValue::new(KEY_NAMESPACE, namespace)];
|
|
66
|
+
Self {
|
|
67
|
+
ctx: Context::current(),
|
|
68
|
+
kvs: Arc::new(kvs),
|
|
69
|
+
instruments: Arc::new(Instruments::new(telemetry)),
|
|
70
|
+
}
|
|
36
71
|
}
|
|
37
72
|
|
|
38
73
|
pub(crate) fn with_task_q(mut self, tq: String) -> Self {
|
|
@@ -47,155 +82,219 @@ impl MetricsContext {
|
|
|
47
82
|
Self {
|
|
48
83
|
ctx: Context::current(),
|
|
49
84
|
kvs,
|
|
85
|
+
instruments: self.instruments.clone(),
|
|
50
86
|
}
|
|
51
87
|
}
|
|
52
88
|
|
|
53
89
|
/// A workflow task queue poll succeeded
|
|
54
90
|
pub(crate) fn wf_tq_poll_ok(&self) {
|
|
55
|
-
|
|
91
|
+
self.instruments
|
|
92
|
+
.wf_task_queue_poll_succeed_counter
|
|
93
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
56
94
|
}
|
|
57
95
|
|
|
58
96
|
/// A workflow task queue poll timed out / had empty response
|
|
59
97
|
pub(crate) fn wf_tq_poll_empty(&self) {
|
|
60
|
-
|
|
98
|
+
self.instruments
|
|
99
|
+
.wf_task_queue_poll_empty_counter
|
|
100
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
61
101
|
}
|
|
62
102
|
|
|
63
103
|
/// A workflow task execution failed
|
|
64
104
|
pub(crate) fn wf_task_failed(&self) {
|
|
65
|
-
|
|
105
|
+
self.instruments
|
|
106
|
+
.wf_task_execution_failure_counter
|
|
107
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
66
108
|
}
|
|
67
109
|
|
|
68
110
|
/// A workflow completed successfully
|
|
69
111
|
pub(crate) fn wf_completed(&self) {
|
|
70
|
-
|
|
112
|
+
self.instruments
|
|
113
|
+
.wf_completed_counter
|
|
114
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
71
115
|
}
|
|
72
116
|
|
|
73
117
|
/// A workflow ended cancelled
|
|
74
118
|
pub(crate) fn wf_canceled(&self) {
|
|
75
|
-
|
|
119
|
+
self.instruments
|
|
120
|
+
.wf_canceled_counter
|
|
121
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
76
122
|
}
|
|
77
123
|
|
|
78
124
|
/// A workflow ended failed
|
|
79
125
|
pub(crate) fn wf_failed(&self) {
|
|
80
|
-
|
|
126
|
+
self.instruments
|
|
127
|
+
.wf_failed_counter
|
|
128
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
81
129
|
}
|
|
82
130
|
|
|
83
131
|
/// A workflow continued as new
|
|
84
132
|
pub(crate) fn wf_continued_as_new(&self) {
|
|
85
|
-
|
|
133
|
+
self.instruments
|
|
134
|
+
.wf_cont_counter
|
|
135
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
86
136
|
}
|
|
87
137
|
|
|
88
138
|
/// Record workflow total execution time in milliseconds
|
|
89
139
|
pub(crate) fn wf_e2e_latency(&self, dur: Duration) {
|
|
90
|
-
|
|
140
|
+
self.instruments
|
|
141
|
+
.wf_e2e_latency
|
|
142
|
+
.record(&self.ctx, dur.as_millis() as u64, &self.kvs);
|
|
91
143
|
}
|
|
92
144
|
|
|
93
145
|
/// Record workflow task schedule to start time in millis
|
|
94
146
|
pub(crate) fn wf_task_sched_to_start_latency(&self, dur: Duration) {
|
|
95
|
-
|
|
147
|
+
self.instruments.wf_task_sched_to_start_latency.record(
|
|
148
|
+
&self.ctx,
|
|
149
|
+
dur.as_millis() as u64,
|
|
150
|
+
&self.kvs,
|
|
151
|
+
);
|
|
96
152
|
}
|
|
97
153
|
|
|
98
154
|
/// Record workflow task execution time in milliseconds
|
|
99
155
|
pub(crate) fn wf_task_latency(&self, dur: Duration) {
|
|
100
|
-
|
|
156
|
+
self.instruments.wf_task_execution_latency.record(
|
|
157
|
+
&self.ctx,
|
|
158
|
+
dur.as_millis() as u64,
|
|
159
|
+
&self.kvs,
|
|
160
|
+
);
|
|
101
161
|
}
|
|
102
162
|
|
|
103
163
|
/// Record time it takes to catch up on replaying a WFT
|
|
104
164
|
pub(crate) fn wf_task_replay_latency(&self, dur: Duration) {
|
|
105
|
-
|
|
165
|
+
self.instruments.wf_task_replay_latency.record(
|
|
166
|
+
&self.ctx,
|
|
167
|
+
dur.as_millis() as u64,
|
|
168
|
+
&self.kvs,
|
|
169
|
+
);
|
|
106
170
|
}
|
|
107
171
|
|
|
108
172
|
/// An activity long poll timed out
|
|
109
173
|
pub(crate) fn act_poll_timeout(&self) {
|
|
110
|
-
|
|
174
|
+
self.instruments
|
|
175
|
+
.act_poll_no_task
|
|
176
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/// A count of activity tasks received
|
|
180
|
+
pub(crate) fn act_task_received(&self) {
|
|
181
|
+
self.instruments
|
|
182
|
+
.act_task_received_counter
|
|
183
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
111
184
|
}
|
|
112
185
|
|
|
113
186
|
/// An activity execution failed
|
|
114
187
|
pub(crate) fn act_execution_failed(&self) {
|
|
115
|
-
|
|
188
|
+
self.instruments
|
|
189
|
+
.act_execution_failed
|
|
190
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
116
191
|
}
|
|
117
192
|
|
|
118
193
|
/// Record activity task schedule to start time in millis
|
|
119
194
|
pub(crate) fn act_sched_to_start_latency(&self, dur: Duration) {
|
|
120
|
-
|
|
195
|
+
self.instruments.act_sched_to_start_latency.record(
|
|
196
|
+
&self.ctx,
|
|
197
|
+
dur.as_millis() as u64,
|
|
198
|
+
&self.kvs,
|
|
199
|
+
);
|
|
121
200
|
}
|
|
122
201
|
|
|
123
202
|
/// Record time it took to complete activity execution, from the time core generated the
|
|
124
203
|
/// activity task, to the time lang responded with a completion (failure or success).
|
|
125
204
|
pub(crate) fn act_execution_latency(&self, dur: Duration) {
|
|
126
|
-
|
|
205
|
+
self.instruments
|
|
206
|
+
.act_exec_latency
|
|
207
|
+
.record(&self.ctx, dur.as_millis() as u64, &self.kvs);
|
|
127
208
|
}
|
|
128
209
|
|
|
129
210
|
/// A worker was registered
|
|
130
211
|
pub(crate) fn worker_registered(&self) {
|
|
131
|
-
|
|
212
|
+
self.instruments
|
|
213
|
+
.worker_registered
|
|
214
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
132
215
|
}
|
|
133
216
|
|
|
134
217
|
/// Record current number of available task slots. Context should have worker type set.
|
|
135
218
|
pub(crate) fn available_task_slots(&self, num: usize) {
|
|
136
|
-
|
|
219
|
+
self.instruments
|
|
220
|
+
.task_slots_available
|
|
221
|
+
.record(&self.ctx, num as u64, &self.kvs)
|
|
137
222
|
}
|
|
138
223
|
|
|
139
224
|
/// Record current number of pollers. Context should include poller type / task queue tag.
|
|
140
225
|
pub(crate) fn record_num_pollers(&self, num: usize) {
|
|
141
|
-
|
|
226
|
+
self.instruments
|
|
227
|
+
.num_pollers
|
|
228
|
+
.record(&self.ctx, num as u64, &self.kvs);
|
|
142
229
|
}
|
|
143
230
|
|
|
144
231
|
/// A workflow task found a cached workflow to run against
|
|
145
232
|
pub(crate) fn sticky_cache_hit(&self) {
|
|
146
|
-
|
|
233
|
+
self.instruments
|
|
234
|
+
.sticky_cache_hit
|
|
235
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
147
236
|
}
|
|
148
237
|
|
|
149
238
|
/// A workflow task did not find a cached workflow
|
|
150
239
|
pub(crate) fn sticky_cache_miss(&self) {
|
|
151
|
-
|
|
240
|
+
self.instruments
|
|
241
|
+
.sticky_cache_miss
|
|
242
|
+
.add(&self.ctx, 1, &self.kvs);
|
|
152
243
|
}
|
|
153
244
|
|
|
154
245
|
/// Record current cache size (in number of wfs, not bytes)
|
|
155
246
|
pub(crate) fn cache_size(&self, size: u64) {
|
|
156
|
-
|
|
247
|
+
self.instruments
|
|
248
|
+
.sticky_cache_size
|
|
249
|
+
.record(&self.ctx, size, &self.kvs);
|
|
157
250
|
}
|
|
158
251
|
}
|
|
159
252
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
if
|
|
164
|
-
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
.get()
|
|
172
|
-
.map(|gtd| {
|
|
173
|
-
if gtd.no_temporal_prefix_for_metrics {
|
|
174
|
-
""
|
|
175
|
-
} else {
|
|
176
|
-
"temporal_"
|
|
177
|
-
}
|
|
178
|
-
})
|
|
179
|
-
.unwrap_or("")
|
|
180
|
-
}
|
|
253
|
+
impl Instruments {
|
|
254
|
+
fn new(telem: &TelemetryInstance) -> Self {
|
|
255
|
+
let no_op_meter: Meter;
|
|
256
|
+
let meter = if let Some(meter) = telem.get_metric_meter() {
|
|
257
|
+
meter
|
|
258
|
+
} else {
|
|
259
|
+
no_op_meter = NoopMeterProvider::default().meter("no_op");
|
|
260
|
+
&no_op_meter
|
|
261
|
+
};
|
|
262
|
+
Self::new_explicit(meter, telem.metric_prefix)
|
|
263
|
+
}
|
|
181
264
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
265
|
+
fn new_explicit(meter: &Meter, metric_prefix: &'static str) -> Self {
|
|
266
|
+
let ctr = |name: &'static str| -> Counter<u64> {
|
|
267
|
+
meter.u64_counter(metric_prefix.to_string() + name).init()
|
|
268
|
+
};
|
|
269
|
+
let hst = |name: &'static str| -> Histogram<u64> {
|
|
270
|
+
meter.u64_histogram(metric_prefix.to_string() + name).init()
|
|
271
|
+
};
|
|
272
|
+
Self {
|
|
273
|
+
wf_completed_counter: ctr("workflow_completed"),
|
|
274
|
+
wf_canceled_counter: ctr("workflow_canceled"),
|
|
275
|
+
wf_failed_counter: ctr("workflow_failed"),
|
|
276
|
+
wf_cont_counter: ctr("workflow_continue_as_new"),
|
|
277
|
+
wf_e2e_latency: hst(WF_E2E_LATENCY_NAME),
|
|
278
|
+
wf_task_queue_poll_empty_counter: ctr("workflow_task_queue_poll_empty"),
|
|
279
|
+
wf_task_queue_poll_succeed_counter: ctr("workflow_task_queue_poll_succeed"),
|
|
280
|
+
wf_task_execution_failure_counter: ctr("workflow_task_queue_poll_failed"),
|
|
281
|
+
wf_task_sched_to_start_latency: hst(WF_TASK_SCHED_TO_START_LATENCY_NAME),
|
|
282
|
+
wf_task_replay_latency: hst(WF_TASK_REPLAY_LATENCY_NAME),
|
|
283
|
+
wf_task_execution_latency: hst(WF_TASK_EXECUTION_LATENCY_NAME),
|
|
284
|
+
act_poll_no_task: ctr("activity_poll_no_task"),
|
|
285
|
+
act_task_received_counter: ctr("activity_task_received"),
|
|
286
|
+
act_execution_failed: ctr("activity_execution_failed"),
|
|
287
|
+
act_sched_to_start_latency: hst(ACT_SCHED_TO_START_LATENCY_NAME),
|
|
288
|
+
act_exec_latency: hst(ACT_EXEC_LATENCY_NAME),
|
|
289
|
+
// name kept as worker start for compat with old sdk / what users expect
|
|
290
|
+
worker_registered: ctr("worker_start"),
|
|
291
|
+
num_pollers: hst(NUM_POLLERS_NAME),
|
|
292
|
+
task_slots_available: hst(TASK_SLOTS_AVAILABLE_NAME),
|
|
293
|
+
sticky_cache_hit: ctr("sticky_cache_hit"),
|
|
294
|
+
sticky_cache_miss: ctr("sticky_cache_miss"),
|
|
295
|
+
sticky_cache_size: hst(STICKY_CACHE_SIZE_NAME),
|
|
197
296
|
}
|
|
198
|
-
}
|
|
297
|
+
}
|
|
199
298
|
}
|
|
200
299
|
|
|
201
300
|
const KEY_NAMESPACE: &str = "namespace";
|
|
@@ -204,6 +303,7 @@ const KEY_TASK_QUEUE: &str = "task_queue";
|
|
|
204
303
|
const KEY_ACT_TYPE: &str = "activity_type";
|
|
205
304
|
const KEY_POLLER_TYPE: &str = "poller_type";
|
|
206
305
|
const KEY_WORKER_TYPE: &str = "worker_type";
|
|
306
|
+
const KEY_EAGER: &str = "eager";
|
|
207
307
|
|
|
208
308
|
pub(crate) fn workflow_poller() -> KeyValue {
|
|
209
309
|
KeyValue::new(KEY_POLLER_TYPE, "workflow_task")
|
|
@@ -224,85 +324,27 @@ pub(crate) fn workflow_type(ty: String) -> KeyValue {
|
|
|
224
324
|
KeyValue::new(KEY_WF_TYPE, ty)
|
|
225
325
|
}
|
|
226
326
|
pub(crate) fn workflow_worker_type() -> KeyValue {
|
|
227
|
-
KeyValue
|
|
228
|
-
key: opentelemetry::Key::from_static_str(KEY_WORKER_TYPE),
|
|
229
|
-
value: opentelemetry::Value::String("WorkflowWorker".into()),
|
|
230
|
-
}
|
|
327
|
+
KeyValue::new(KEY_WORKER_TYPE, "WorkflowWorker")
|
|
231
328
|
}
|
|
232
329
|
pub(crate) fn activity_worker_type() -> KeyValue {
|
|
233
|
-
KeyValue
|
|
234
|
-
key: opentelemetry::Key::from_static_str(KEY_WORKER_TYPE),
|
|
235
|
-
value: opentelemetry::Value::String("ActivityWorker".into()),
|
|
236
|
-
}
|
|
330
|
+
KeyValue::new(KEY_WORKER_TYPE, "ActivityWorker")
|
|
237
331
|
}
|
|
238
332
|
pub(crate) fn local_activity_worker_type() -> KeyValue {
|
|
239
|
-
KeyValue
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
333
|
+
KeyValue::new(KEY_WORKER_TYPE, "LocalActivityWorker")
|
|
334
|
+
}
|
|
335
|
+
pub(crate) fn eager(is_eager: bool) -> KeyValue {
|
|
336
|
+
KeyValue::new(KEY_EAGER, is_eager)
|
|
243
337
|
}
|
|
244
338
|
|
|
245
|
-
tm!(ctr, WF_COMPLETED_COUNTER, "workflow_completed");
|
|
246
|
-
tm!(ctr, WF_CANCELED_COUNTER, "workflow_canceled");
|
|
247
|
-
tm!(ctr, WF_FAILED_COUNTER, "workflow_failed");
|
|
248
|
-
tm!(ctr, WF_CONT_COUNTER, "workflow_continue_as_new");
|
|
249
339
|
const WF_E2E_LATENCY_NAME: &str = "workflow_endtoend_latency";
|
|
250
|
-
tm!(vr_u64, WF_E2E_LATENCY, WF_E2E_LATENCY_NAME);
|
|
251
|
-
|
|
252
|
-
tm!(
|
|
253
|
-
ctr,
|
|
254
|
-
WF_TASK_QUEUE_POLL_EMPTY_COUNTER,
|
|
255
|
-
"workflow_task_queue_poll_empty"
|
|
256
|
-
);
|
|
257
|
-
tm!(
|
|
258
|
-
ctr,
|
|
259
|
-
WF_TASK_QUEUE_POLL_SUCCEED_COUNTER,
|
|
260
|
-
"workflow_task_queue_poll_succeed"
|
|
261
|
-
);
|
|
262
|
-
tm!(
|
|
263
|
-
ctr,
|
|
264
|
-
WF_TASK_EXECUTION_FAILURE_COUNTER,
|
|
265
|
-
"workflow_task_execution_failed"
|
|
266
|
-
);
|
|
267
340
|
const WF_TASK_SCHED_TO_START_LATENCY_NAME: &str = "workflow_task_schedule_to_start_latency";
|
|
268
|
-
tm!(
|
|
269
|
-
vr_u64,
|
|
270
|
-
WF_TASK_SCHED_TO_START_LATENCY,
|
|
271
|
-
WF_TASK_SCHED_TO_START_LATENCY_NAME
|
|
272
|
-
);
|
|
273
341
|
const WF_TASK_REPLAY_LATENCY_NAME: &str = "workflow_task_replay_latency";
|
|
274
|
-
tm!(vr_u64, WF_TASK_REPLAY_LATENCY, WF_TASK_REPLAY_LATENCY_NAME);
|
|
275
342
|
const WF_TASK_EXECUTION_LATENCY_NAME: &str = "workflow_task_execution_latency";
|
|
276
|
-
tm!(
|
|
277
|
-
vr_u64,
|
|
278
|
-
WF_TASK_EXECUTION_LATENCY,
|
|
279
|
-
WF_TASK_EXECUTION_LATENCY_NAME
|
|
280
|
-
);
|
|
281
|
-
|
|
282
|
-
tm!(ctr, ACT_POLL_NO_TASK, "activity_poll_no_task");
|
|
283
|
-
tm!(ctr, ACT_EXECUTION_FAILED, "activity_execution_failed");
|
|
284
|
-
// Act task unregistered can't be known by core right now since it's not well defined as an
|
|
285
|
-
// activity result. We could add a flag to the failed activity result if desired.
|
|
286
343
|
const ACT_SCHED_TO_START_LATENCY_NAME: &str = "activity_schedule_to_start_latency";
|
|
287
|
-
tm!(
|
|
288
|
-
vr_u64,
|
|
289
|
-
ACT_SCHED_TO_START_LATENCY,
|
|
290
|
-
ACT_SCHED_TO_START_LATENCY_NAME
|
|
291
|
-
);
|
|
292
344
|
const ACT_EXEC_LATENCY_NAME: &str = "activity_execution_latency";
|
|
293
|
-
tm!(vr_u64, ACT_EXEC_LATENCY, ACT_EXEC_LATENCY_NAME);
|
|
294
|
-
|
|
295
|
-
// name kept as worker start for compat with old sdk / what users expect
|
|
296
|
-
tm!(ctr, WORKER_REGISTERED, "worker_start");
|
|
297
345
|
const NUM_POLLERS_NAME: &str = "num_pollers";
|
|
298
|
-
tm!(vr_u64, NUM_POLLERS, NUM_POLLERS_NAME);
|
|
299
346
|
const TASK_SLOTS_AVAILABLE_NAME: &str = "worker_task_slots_available";
|
|
300
|
-
tm!(vr_u64, TASK_SLOTS_AVAILABLE, TASK_SLOTS_AVAILABLE_NAME);
|
|
301
|
-
|
|
302
|
-
tm!(ctr, STICKY_CACHE_HIT, "sticky_cache_hit");
|
|
303
|
-
tm!(ctr, STICKY_CACHE_MISS, "sticky_cache_miss");
|
|
304
347
|
const STICKY_CACHE_SIZE_NAME: &str = "sticky_cache_size";
|
|
305
|
-
tm!(vr_u64, STICKY_CACHE_SIZE, STICKY_CACHE_SIZE_NAME);
|
|
306
348
|
|
|
307
349
|
/// Artisanal, handcrafted latency buckets for workflow e2e latency which should expose a useful
|
|
308
350
|
/// set of buckets for < 1 day runtime workflows. Beyond that, this metric probably isn't very
|
|
@@ -335,15 +377,18 @@ static WF_TASK_MS_BUCKETS: &[f64] = &[1., 10., 20., 50., 100., 200., 500., 1000.
|
|
|
335
377
|
static ACT_EXE_MS_BUCKETS: &[f64] = &[50., 100., 500., 1000., 5000., 10_000., 60_000.];
|
|
336
378
|
|
|
337
379
|
/// Schedule-to-start latency buckets for both WFT and AT
|
|
338
|
-
static TASK_SCHED_TO_START_MS_BUCKETS: &[f64] =
|
|
380
|
+
static TASK_SCHED_TO_START_MS_BUCKETS: &[f64] =
|
|
381
|
+
&[100., 500., 1000., 5000., 10_000., 100_000., 1_000_000.];
|
|
339
382
|
|
|
340
383
|
/// Default buckets. Should never really be used as they will be meaningless for many things, but
|
|
341
384
|
/// broadly it's trying to represent latencies in millis.
|
|
342
385
|
pub(super) static DEFAULT_MS_BUCKETS: &[f64] = &[50., 100., 500., 1000., 2500., 10_000.];
|
|
343
386
|
|
|
344
387
|
/// Chooses appropriate aggregators for our metrics
|
|
345
|
-
#[derive(Debug)]
|
|
346
|
-
pub struct SDKAggSelector
|
|
388
|
+
#[derive(Debug, Clone)]
|
|
389
|
+
pub struct SDKAggSelector {
|
|
390
|
+
pub metric_prefix: &'static str,
|
|
391
|
+
}
|
|
347
392
|
|
|
348
393
|
impl AggregatorSelector for SDKAggSelector {
|
|
349
394
|
fn aggregator_for(&self, descriptor: &Descriptor) -> Option<Arc<dyn Aggregator + Send + Sync>> {
|
|
@@ -355,7 +400,7 @@ impl AggregatorSelector for SDKAggSelector {
|
|
|
355
400
|
if *descriptor.instrument_kind() == InstrumentKind::Histogram {
|
|
356
401
|
let dname = descriptor
|
|
357
402
|
.name()
|
|
358
|
-
.strip_prefix(metric_prefix
|
|
403
|
+
.strip_prefix(self.metric_prefix)
|
|
359
404
|
.unwrap_or_else(|| descriptor.name());
|
|
360
405
|
// Some recorders are just gauges
|
|
361
406
|
match dname {
|