@temporalio/core-bridge 1.9.2 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +754 -473
- package/Cargo.toml +3 -3
- package/lib/index.d.ts +33 -2
- package/lib/index.js.map +1 -1
- package/package.json +4 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/scripts/build.js +4 -3
- package/sdk-core/.cargo/config.toml +2 -4
- package/sdk-core/.github/workflows/heavy.yml +1 -1
- package/sdk-core/.github/workflows/per-pr.yml +6 -4
- package/sdk-core/Cargo.toml +10 -3
- package/sdk-core/README.md +4 -6
- package/sdk-core/client/Cargo.toml +13 -5
- package/sdk-core/client/src/lib.rs +123 -34
- package/sdk-core/client/src/metrics.rs +70 -18
- package/sdk-core/client/src/proxy.rs +85 -0
- package/sdk-core/client/src/raw.rs +67 -5
- package/sdk-core/client/src/worker_registry/mod.rs +5 -3
- package/sdk-core/client/src/workflow_handle/mod.rs +3 -1
- package/sdk-core/core/Cargo.toml +31 -37
- package/sdk-core/core/src/abstractions/take_cell.rs +3 -3
- package/sdk-core/core/src/abstractions.rs +176 -108
- package/sdk-core/core/src/core_tests/activity_tasks.rs +4 -13
- package/sdk-core/core/src/core_tests/determinism.rs +2 -1
- package/sdk-core/core/src/core_tests/local_activities.rs +3 -3
- package/sdk-core/core/src/core_tests/mod.rs +3 -3
- package/sdk-core/core/src/core_tests/queries.rs +42 -5
- package/sdk-core/core/src/core_tests/workers.rs +2 -3
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +115 -15
- package/sdk-core/core/src/ephemeral_server/mod.rs +109 -136
- package/sdk-core/core/src/internal_flags.rs +8 -8
- package/sdk-core/core/src/lib.rs +16 -11
- package/sdk-core/core/src/pollers/mod.rs +11 -5
- package/sdk-core/core/src/pollers/poll_buffer.rs +48 -29
- package/sdk-core/core/src/protosext/mod.rs +32 -32
- package/sdk-core/core/src/protosext/protocol_messages.rs +14 -24
- package/sdk-core/core/src/retry_logic.rs +2 -2
- package/sdk-core/core/src/telemetry/log_export.rs +10 -9
- package/sdk-core/core/src/telemetry/metrics.rs +233 -330
- package/sdk-core/core/src/telemetry/mod.rs +11 -38
- package/sdk-core/core/src/telemetry/otel.rs +355 -0
- package/sdk-core/core/src/telemetry/prometheus_server.rs +36 -23
- package/sdk-core/core/src/test_help/mod.rs +80 -59
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +6 -6
- package/sdk-core/core/src/worker/activities/local_activities.rs +46 -43
- package/sdk-core/core/src/worker/activities.rs +45 -46
- package/sdk-core/core/src/worker/client/mocks.rs +8 -7
- package/sdk-core/core/src/worker/client.rs +40 -39
- package/sdk-core/core/src/worker/mod.rs +72 -42
- package/sdk-core/core/src/worker/slot_provider.rs +28 -28
- package/sdk-core/core/src/worker/slot_supplier.rs +1 -0
- package/sdk-core/core/src/worker/tuner/fixed_size.rs +52 -0
- package/sdk-core/core/src/worker/tuner/resource_based.rs +561 -0
- package/sdk-core/core/src/worker/tuner.rs +122 -0
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +6 -6
- package/sdk-core/core/src/worker/workflow/history_update.rs +27 -53
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +4 -17
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -10
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +4 -11
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +17 -35
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +0 -8
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +0 -5
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +0 -5
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +0 -14
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +0 -5
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +0 -5
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -10
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +3 -10
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +12 -8
- package/sdk-core/core/src/worker/workflow/machines/update_state_machine.rs +0 -10
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +6 -13
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +27 -37
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +3 -14
- package/sdk-core/core/src/worker/workflow/managed_run.rs +84 -54
- package/sdk-core/core/src/worker/workflow/mod.rs +63 -160
- package/sdk-core/core/src/worker/workflow/run_cache.rs +22 -13
- package/sdk-core/core/src/worker/workflow/wft_extraction.rs +16 -3
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +15 -12
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +39 -78
- package/sdk-core/core-api/Cargo.toml +6 -5
- package/sdk-core/core-api/src/errors.rs +8 -0
- package/sdk-core/core-api/src/telemetry/metrics.rs +75 -4
- package/sdk-core/core-api/src/telemetry.rs +7 -1
- package/sdk-core/core-api/src/worker.rs +212 -56
- package/sdk-core/fsm/Cargo.toml +3 -0
- package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
- package/sdk-core/sdk/Cargo.toml +5 -7
- package/sdk-core/sdk/src/app_data.rs +3 -3
- package/sdk-core/sdk/src/lib.rs +5 -3
- package/sdk-core/sdk/src/workflow_context/options.rs +1 -1
- package/sdk-core/sdk/src/workflow_context.rs +10 -9
- package/sdk-core/sdk/src/workflow_future.rs +1 -1
- package/sdk-core/sdk-core-protos/Cargo.toml +8 -6
- package/sdk-core/sdk-core-protos/build.rs +1 -10
- package/sdk-core/sdk-core-protos/protos/api_upstream/.github/PULL_REQUEST_TEMPLATE.md +3 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/.github/workflows/ci.yml +26 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/Makefile +42 -20
- package/sdk-core/sdk-core-protos/protos/api_upstream/README.md +2 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/api-linter.yaml +36 -26
- package/sdk-core/sdk-core-protos/protos/api_upstream/buf.lock +2 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/struct.proto +95 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv2.json +9632 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv3.yaml +7337 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/payload_description.txt +2 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/command/v1/message.proto +45 -11
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/common/v1/message.proto +22 -4
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/command_type.proto +2 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/common.proto +44 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/event_type.proto +18 -3
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +20 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +30 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/update.proto +7 -8
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/workflow.proto +23 -5
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/errordetails/v1/message.proto +20 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/failure/v1/message.proto +25 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/history/v1/message.proto +141 -15
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/namespace/v1/message.proto +12 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/nexus/v1/message.proto +193 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +73 -6
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +46 -4
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/schedule/v1/message.proto +4 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/sdk/v1/workflow_metadata.proto +2 -2
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +116 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflow/v1/message.proto +134 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +274 -29
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +57 -1
- package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +10 -12
- package/sdk-core/sdk-core-protos/src/history_builder.rs +1 -1
- package/sdk-core/sdk-core-protos/src/lib.rs +54 -51
- package/sdk-core/sdk-core-protos/src/task_token.rs +11 -2
- package/sdk-core/test-utils/Cargo.toml +7 -4
- package/sdk-core/test-utils/src/histfetch.rs +1 -1
- package/sdk-core/test-utils/src/lib.rs +44 -62
- package/sdk-core/tests/fuzzy_workflow.rs +5 -2
- package/sdk-core/tests/heavy_tests.rs +114 -17
- package/sdk-core/tests/integ_tests/activity_functions.rs +1 -1
- package/sdk-core/tests/integ_tests/client_tests.rs +2 -2
- package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +38 -26
- package/sdk-core/tests/integ_tests/metrics_tests.rs +126 -17
- package/sdk-core/tests/integ_tests/polling_tests.rs +118 -2
- package/sdk-core/tests/integ_tests/update_tests.rs +3 -5
- package/sdk-core/tests/integ_tests/visibility_tests.rs +3 -3
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +3 -3
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +5 -4
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -2
- package/sdk-core/tests/integ_tests/workflow_tests/eager.rs +6 -10
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +9 -7
- package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +14 -9
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +6 -13
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +9 -6
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +5 -5
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests.rs +115 -11
- package/sdk-core/tests/main.rs +2 -2
- package/src/conversions.rs +57 -0
- package/src/lib.rs +1 -0
- package/src/runtime.rs +51 -35
- package/ts/index.ts +67 -3
- package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +0 -117
- package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +0 -24
- package/sdk-core/sdk/src/payload_converter.rs +0 -11
- package/sdk-core/sdk-core-protos/protos/api_upstream/.buildkite/Dockerfile +0 -2
- package/sdk-core/sdk-core-protos/protos/api_upstream/.buildkite/docker-compose.yml +0 -15
- package/sdk-core/sdk-core-protos/protos/api_upstream/.buildkite/pipeline.yml +0 -10
- package/sdk-core/test-utils/src/wf_input_saver.rs +0 -50
- package/sdk-core/tests/wf_input_replay.rs +0 -32
|
@@ -3,27 +3,24 @@
|
|
|
3
3
|
|
|
4
4
|
mod log_export;
|
|
5
5
|
pub(crate) mod metrics;
|
|
6
|
+
#[cfg(feature = "otel")]
|
|
7
|
+
mod otel;
|
|
8
|
+
#[cfg(feature = "otel")]
|
|
6
9
|
mod prometheus_server;
|
|
7
10
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
};
|
|
11
|
+
#[cfg(feature = "otel")]
|
|
12
|
+
pub use metrics::{default_buckets_for, MetricsCallBuffer};
|
|
13
|
+
#[cfg(feature = "otel")]
|
|
14
|
+
pub use otel::{build_otlp_metric_exporter, start_prometheus_metric_exporter};
|
|
12
15
|
|
|
13
16
|
pub use log_export::{CoreLogBuffer, CoreLogBufferedConsumer, CoreLogStreamConsumer};
|
|
14
17
|
|
|
15
18
|
use crate::telemetry::{log_export::CoreLogConsumerLayer, metrics::PrefixedMetricsMeter};
|
|
16
19
|
use itertools::Itertools;
|
|
17
|
-
use once_cell::sync::OnceCell;
|
|
18
|
-
use opentelemetry::KeyValue;
|
|
19
|
-
use opentelemetry_sdk::{
|
|
20
|
-
metrics::{data::Temporality, reader::TemporalitySelector, InstrumentKind},
|
|
21
|
-
Resource,
|
|
22
|
-
};
|
|
23
20
|
use parking_lot::Mutex;
|
|
24
21
|
use std::{
|
|
25
22
|
cell::RefCell,
|
|
26
|
-
collections::
|
|
23
|
+
collections::VecDeque,
|
|
27
24
|
env,
|
|
28
25
|
sync::{
|
|
29
26
|
atomic::{AtomicBool, Ordering},
|
|
@@ -32,7 +29,7 @@ use std::{
|
|
|
32
29
|
};
|
|
33
30
|
use temporal_sdk_core_api::telemetry::{
|
|
34
31
|
metrics::{CoreMeter, MetricKeyValue, NewAttributes, TemporalMeter},
|
|
35
|
-
CoreLog, CoreTelemetry, Logger,
|
|
32
|
+
CoreLog, CoreTelemetry, Logger, TelemetryOptions,
|
|
36
33
|
};
|
|
37
34
|
use tracing::{Level, Subscriber};
|
|
38
35
|
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Layer};
|
|
@@ -124,7 +121,8 @@ impl TelemetryInstance {
|
|
|
124
121
|
}
|
|
125
122
|
|
|
126
123
|
thread_local! {
|
|
127
|
-
static SUB_GUARD: RefCell<Option<tracing::subscriber::DefaultGuard>> =
|
|
124
|
+
static SUB_GUARD: RefCell<Option<tracing::subscriber::DefaultGuard>> =
|
|
125
|
+
const { RefCell::new(None) };
|
|
128
126
|
}
|
|
129
127
|
/// Set the trace subscriber for the current thread. This must be done in every thread which uses
|
|
130
128
|
/// core stuff, otherwise traces/logs will not be collected on that thread. For example, if using
|
|
@@ -252,31 +250,6 @@ pub fn telemetry_init_global(opts: TelemetryOptions) -> Result<(), anyhow::Error
|
|
|
252
250
|
Ok(())
|
|
253
251
|
}
|
|
254
252
|
|
|
255
|
-
fn default_resource_kvs() -> &'static [KeyValue] {
|
|
256
|
-
static INSTANCE: OnceCell<[KeyValue; 1]> = OnceCell::new();
|
|
257
|
-
INSTANCE.get_or_init(|| [KeyValue::new("service.name", TELEM_SERVICE_NAME)])
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
fn default_resource(override_values: &HashMap<String, String>) -> Resource {
|
|
261
|
-
let override_kvs = override_values
|
|
262
|
-
.iter()
|
|
263
|
-
.map(|(k, v)| KeyValue::new(k.clone(), v.clone()));
|
|
264
|
-
Resource::new(default_resource_kvs().iter().cloned()).merge(&Resource::new(override_kvs))
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
#[derive(Clone)]
|
|
268
|
-
struct ConstantTemporality(Temporality);
|
|
269
|
-
impl TemporalitySelector for ConstantTemporality {
|
|
270
|
-
fn temporality(&self, _: InstrumentKind) -> Temporality {
|
|
271
|
-
self.0
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
fn metric_temporality_to_selector(t: MetricTemporality) -> impl TemporalitySelector + Clone {
|
|
275
|
-
match t {
|
|
276
|
-
MetricTemporality::Cumulative => ConstantTemporality(Temporality::Cumulative),
|
|
277
|
-
MetricTemporality::Delta => ConstantTemporality(Temporality::Delta),
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
253
|
#[cfg(test)]
|
|
281
254
|
pub use test_initters::*;
|
|
282
255
|
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
use super::{
|
|
2
|
+
default_buckets_for,
|
|
3
|
+
metrics::{
|
|
4
|
+
ACT_EXEC_LATENCY_NAME, ACT_SCHED_TO_START_LATENCY_NAME, DEFAULT_MS_BUCKETS,
|
|
5
|
+
WF_E2E_LATENCY_NAME, WF_TASK_EXECUTION_LATENCY_NAME, WF_TASK_REPLAY_LATENCY_NAME,
|
|
6
|
+
WF_TASK_SCHED_TO_START_LATENCY_NAME,
|
|
7
|
+
},
|
|
8
|
+
prometheus_server::PromServer,
|
|
9
|
+
TELEM_SERVICE_NAME,
|
|
10
|
+
};
|
|
11
|
+
use crate::{abstractions::dbg_panic, telemetry::metrics::DEFAULT_S_BUCKETS};
|
|
12
|
+
use opentelemetry::{
|
|
13
|
+
self,
|
|
14
|
+
metrics::{Meter, MeterProvider as MeterProviderT, Unit},
|
|
15
|
+
KeyValue,
|
|
16
|
+
};
|
|
17
|
+
use opentelemetry_otlp::WithExportConfig;
|
|
18
|
+
use opentelemetry_sdk::{
|
|
19
|
+
metrics::{
|
|
20
|
+
data::Temporality,
|
|
21
|
+
new_view,
|
|
22
|
+
reader::{AggregationSelector, DefaultAggregationSelector, TemporalitySelector},
|
|
23
|
+
Aggregation, Instrument, InstrumentKind, MeterProviderBuilder, PeriodicReader, View,
|
|
24
|
+
},
|
|
25
|
+
runtime, AttributeSet, Resource,
|
|
26
|
+
};
|
|
27
|
+
use parking_lot::RwLock;
|
|
28
|
+
use std::{collections::HashMap, net::SocketAddr, sync::Arc, time::Duration};
|
|
29
|
+
use temporal_sdk_core_api::telemetry::{
|
|
30
|
+
metrics::{
|
|
31
|
+
CoreMeter, Counter, Gauge, GaugeF64, Histogram, HistogramDuration, HistogramF64,
|
|
32
|
+
MetricAttributes, MetricParameters, NewAttributes,
|
|
33
|
+
},
|
|
34
|
+
MetricTemporality, OtelCollectorOptions, PrometheusExporterOptions,
|
|
35
|
+
};
|
|
36
|
+
use tokio::task::AbortHandle;
|
|
37
|
+
use tonic::metadata::MetadataMap;
|
|
38
|
+
|
|
39
|
+
/// Chooses appropriate aggregators for our metrics
|
|
40
|
+
#[derive(Debug, Clone)]
|
|
41
|
+
struct SDKAggSelector {
|
|
42
|
+
use_seconds: bool,
|
|
43
|
+
default: DefaultAggregationSelector,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
impl SDKAggSelector {
|
|
47
|
+
fn new(use_seconds: bool) -> Self {
|
|
48
|
+
Self {
|
|
49
|
+
use_seconds,
|
|
50
|
+
default: Default::default(),
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
impl AggregationSelector for SDKAggSelector {
|
|
56
|
+
fn aggregation(&self, kind: InstrumentKind) -> Aggregation {
|
|
57
|
+
match kind {
|
|
58
|
+
InstrumentKind::Histogram => Aggregation::ExplicitBucketHistogram {
|
|
59
|
+
boundaries: if self.use_seconds {
|
|
60
|
+
DEFAULT_S_BUCKETS.to_vec()
|
|
61
|
+
} else {
|
|
62
|
+
DEFAULT_MS_BUCKETS.to_vec()
|
|
63
|
+
},
|
|
64
|
+
record_min_max: true,
|
|
65
|
+
},
|
|
66
|
+
_ => self.default.aggregation(kind),
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
fn histo_view(
|
|
72
|
+
metric_name: &'static str,
|
|
73
|
+
use_seconds: bool,
|
|
74
|
+
) -> opentelemetry::metrics::Result<Box<dyn View>> {
|
|
75
|
+
let buckets = default_buckets_for(metric_name, use_seconds);
|
|
76
|
+
new_view(
|
|
77
|
+
Instrument::new().name(format!("*{metric_name}")),
|
|
78
|
+
opentelemetry_sdk::metrics::Stream::new().aggregation(
|
|
79
|
+
Aggregation::ExplicitBucketHistogram {
|
|
80
|
+
boundaries: buckets.to_vec(),
|
|
81
|
+
record_min_max: true,
|
|
82
|
+
},
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
pub(super) fn augment_meter_provider_with_defaults(
|
|
88
|
+
mpb: MeterProviderBuilder,
|
|
89
|
+
global_tags: &HashMap<String, String>,
|
|
90
|
+
use_seconds: bool,
|
|
91
|
+
) -> opentelemetry::metrics::Result<MeterProviderBuilder> {
|
|
92
|
+
// Some histograms are actually gauges, but we have to use histograms otherwise they forget
|
|
93
|
+
// their value between collections since we don't use callbacks.
|
|
94
|
+
Ok(mpb
|
|
95
|
+
.with_view(histo_view(WF_E2E_LATENCY_NAME, use_seconds)?)
|
|
96
|
+
.with_view(histo_view(WF_TASK_EXECUTION_LATENCY_NAME, use_seconds)?)
|
|
97
|
+
.with_view(histo_view(WF_TASK_REPLAY_LATENCY_NAME, use_seconds)?)
|
|
98
|
+
.with_view(histo_view(
|
|
99
|
+
WF_TASK_SCHED_TO_START_LATENCY_NAME,
|
|
100
|
+
use_seconds,
|
|
101
|
+
)?)
|
|
102
|
+
.with_view(histo_view(ACT_SCHED_TO_START_LATENCY_NAME, use_seconds)?)
|
|
103
|
+
.with_view(histo_view(ACT_EXEC_LATENCY_NAME, use_seconds)?)
|
|
104
|
+
.with_resource(default_resource(global_tags)))
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/// OTel has no built-in synchronous Gauge. Histograms used to be able to serve that purpose, but
|
|
108
|
+
/// they broke that. Lovely. So, we need to implement one by hand.
|
|
109
|
+
pub(crate) struct MemoryGauge<U> {
|
|
110
|
+
labels_to_values: Arc<RwLock<HashMap<AttributeSet, U>>>,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
macro_rules! impl_memory_gauge {
|
|
114
|
+
($ty:ty, $gauge_fn:ident, $observe_fn:ident) => {
|
|
115
|
+
impl MemoryGauge<$ty> {
|
|
116
|
+
fn new(params: MetricParameters, meter: &Meter) -> Self {
|
|
117
|
+
let gauge = meter
|
|
118
|
+
.$gauge_fn(params.name)
|
|
119
|
+
.with_unit(Unit::new(params.unit))
|
|
120
|
+
.with_description(params.description)
|
|
121
|
+
.init();
|
|
122
|
+
let map = Arc::new(RwLock::new(HashMap::<AttributeSet, $ty>::new()));
|
|
123
|
+
let map_c = map.clone();
|
|
124
|
+
meter
|
|
125
|
+
.register_callback(&[gauge.as_any()], move |o| {
|
|
126
|
+
// This whole thing is... extra stupid.
|
|
127
|
+
// See https://github.com/open-telemetry/opentelemetry-rust/issues/1181
|
|
128
|
+
// The performance is likely bad here, but, given this is only called when
|
|
129
|
+
// metrics are exported it should be livable for now.
|
|
130
|
+
let map_rlock = map_c.read();
|
|
131
|
+
for (kvs, val) in map_rlock.iter() {
|
|
132
|
+
let kvs: Vec<_> = kvs
|
|
133
|
+
.iter()
|
|
134
|
+
.map(|(k, v)| KeyValue::new(k.clone(), v.clone()))
|
|
135
|
+
.collect();
|
|
136
|
+
o.$observe_fn(&gauge, *val, kvs.as_slice())
|
|
137
|
+
}
|
|
138
|
+
})
|
|
139
|
+
.expect("instrument must exist we just created it");
|
|
140
|
+
MemoryGauge {
|
|
141
|
+
labels_to_values: map,
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
impl_memory_gauge!(u64, u64_observable_gauge, observe_u64);
|
|
148
|
+
impl_memory_gauge!(f64, f64_observable_gauge, observe_f64);
|
|
149
|
+
|
|
150
|
+
impl<U> MemoryGauge<U> {
|
|
151
|
+
fn record(&self, val: U, kvs: &[KeyValue]) {
|
|
152
|
+
self.labels_to_values
|
|
153
|
+
.write()
|
|
154
|
+
.insert(AttributeSet::from(kvs), val);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/// Create an OTel meter that can be used as a [CoreMeter] to export metrics over OTLP.
|
|
159
|
+
pub fn build_otlp_metric_exporter(
|
|
160
|
+
opts: OtelCollectorOptions,
|
|
161
|
+
) -> Result<CoreOtelMeter, anyhow::Error> {
|
|
162
|
+
let exporter = opentelemetry_otlp::TonicExporterBuilder::default()
|
|
163
|
+
.with_endpoint(opts.url.to_string())
|
|
164
|
+
.with_metadata(MetadataMap::from_headers((&opts.headers).try_into()?))
|
|
165
|
+
.build_metrics_exporter(
|
|
166
|
+
Box::new(SDKAggSelector::new(opts.use_seconds_for_durations)),
|
|
167
|
+
Box::new(metric_temporality_to_selector(opts.metric_temporality)),
|
|
168
|
+
)?;
|
|
169
|
+
let reader = PeriodicReader::builder(exporter, runtime::Tokio)
|
|
170
|
+
.with_interval(opts.metric_periodicity)
|
|
171
|
+
.build();
|
|
172
|
+
let mp = augment_meter_provider_with_defaults(
|
|
173
|
+
MeterProviderBuilder::default().with_reader(reader),
|
|
174
|
+
&opts.global_tags,
|
|
175
|
+
opts.use_seconds_for_durations,
|
|
176
|
+
)?
|
|
177
|
+
.build();
|
|
178
|
+
Ok::<_, anyhow::Error>(CoreOtelMeter {
|
|
179
|
+
meter: mp.meter(TELEM_SERVICE_NAME),
|
|
180
|
+
use_seconds_for_durations: opts.use_seconds_for_durations,
|
|
181
|
+
})
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
pub struct StartedPromServer {
|
|
185
|
+
pub meter: Arc<CoreOtelMeter>,
|
|
186
|
+
pub bound_addr: SocketAddr,
|
|
187
|
+
pub abort_handle: AbortHandle,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/// Builds and runs a prometheus endpoint which can be scraped by prom instances for metrics export.
|
|
191
|
+
/// Returns the meter that can be used as a [CoreMeter].
|
|
192
|
+
///
|
|
193
|
+
/// Requires a Tokio runtime to exist, and will block briefly while binding the server endpoint.
|
|
194
|
+
pub fn start_prometheus_metric_exporter(
|
|
195
|
+
opts: PrometheusExporterOptions,
|
|
196
|
+
) -> Result<StartedPromServer, anyhow::Error> {
|
|
197
|
+
let (srv, exporter) =
|
|
198
|
+
PromServer::new(&opts, SDKAggSelector::new(opts.use_seconds_for_durations))?;
|
|
199
|
+
let meter_provider = augment_meter_provider_with_defaults(
|
|
200
|
+
MeterProviderBuilder::default().with_reader(exporter),
|
|
201
|
+
&opts.global_tags,
|
|
202
|
+
opts.use_seconds_for_durations,
|
|
203
|
+
)?
|
|
204
|
+
.build();
|
|
205
|
+
let bound_addr = srv.bound_addr()?;
|
|
206
|
+
let handle = tokio::spawn(async move { srv.run().await });
|
|
207
|
+
Ok(StartedPromServer {
|
|
208
|
+
meter: Arc::new(CoreOtelMeter {
|
|
209
|
+
meter: meter_provider.meter(TELEM_SERVICE_NAME),
|
|
210
|
+
use_seconds_for_durations: opts.use_seconds_for_durations,
|
|
211
|
+
}),
|
|
212
|
+
bound_addr,
|
|
213
|
+
abort_handle: handle.abort_handle(),
|
|
214
|
+
})
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
#[derive(Debug)]
|
|
218
|
+
pub struct CoreOtelMeter {
|
|
219
|
+
meter: Meter,
|
|
220
|
+
use_seconds_for_durations: bool,
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
impl CoreMeter for CoreOtelMeter {
|
|
224
|
+
fn new_attributes(&self, attribs: NewAttributes) -> MetricAttributes {
|
|
225
|
+
MetricAttributes::OTel {
|
|
226
|
+
kvs: Arc::new(attribs.attributes.into_iter().map(KeyValue::from).collect()),
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
fn extend_attributes(
|
|
231
|
+
&self,
|
|
232
|
+
existing: MetricAttributes,
|
|
233
|
+
attribs: NewAttributes,
|
|
234
|
+
) -> MetricAttributes {
|
|
235
|
+
if let MetricAttributes::OTel { mut kvs } = existing {
|
|
236
|
+
Arc::make_mut(&mut kvs).extend(attribs.attributes.into_iter().map(Into::into));
|
|
237
|
+
MetricAttributes::OTel { kvs }
|
|
238
|
+
} else {
|
|
239
|
+
dbg_panic!("Must use OTel attributes with an OTel metric implementation");
|
|
240
|
+
existing
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
fn counter(&self, params: MetricParameters) -> Arc<dyn Counter> {
|
|
245
|
+
Arc::new(
|
|
246
|
+
self.meter
|
|
247
|
+
.u64_counter(params.name)
|
|
248
|
+
.with_unit(Unit::new(params.unit))
|
|
249
|
+
.with_description(params.description)
|
|
250
|
+
.init(),
|
|
251
|
+
)
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
fn histogram(&self, params: MetricParameters) -> Arc<dyn Histogram> {
|
|
255
|
+
Arc::new(
|
|
256
|
+
self.meter
|
|
257
|
+
.u64_histogram(params.name)
|
|
258
|
+
.with_unit(Unit::new(params.unit))
|
|
259
|
+
.with_description(params.description)
|
|
260
|
+
.init(),
|
|
261
|
+
)
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
fn histogram_f64(&self, params: MetricParameters) -> Arc<dyn HistogramF64> {
|
|
265
|
+
Arc::new(
|
|
266
|
+
self.meter
|
|
267
|
+
.f64_histogram(params.name)
|
|
268
|
+
.with_unit(Unit::new(params.unit))
|
|
269
|
+
.with_description(params.description)
|
|
270
|
+
.init(),
|
|
271
|
+
)
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
fn histogram_duration(&self, mut params: MetricParameters) -> Arc<dyn HistogramDuration> {
|
|
275
|
+
Arc::new(if self.use_seconds_for_durations {
|
|
276
|
+
params.unit = "s".into();
|
|
277
|
+
DurationHistogram::Seconds(self.histogram_f64(params))
|
|
278
|
+
} else {
|
|
279
|
+
params.unit = "ms".into();
|
|
280
|
+
DurationHistogram::Milliseconds(self.histogram(params))
|
|
281
|
+
})
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
fn gauge(&self, params: MetricParameters) -> Arc<dyn Gauge> {
|
|
285
|
+
Arc::new(MemoryGauge::<u64>::new(params, &self.meter))
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
fn gauge_f64(&self, params: MetricParameters) -> Arc<dyn GaugeF64> {
|
|
289
|
+
Arc::new(MemoryGauge::<f64>::new(params, &self.meter))
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/// A histogram being used to record durations.
|
|
294
|
+
#[derive(Clone)]
|
|
295
|
+
enum DurationHistogram {
|
|
296
|
+
Milliseconds(Arc<dyn Histogram>),
|
|
297
|
+
Seconds(Arc<dyn HistogramF64>),
|
|
298
|
+
}
|
|
299
|
+
impl HistogramDuration for DurationHistogram {
|
|
300
|
+
fn record(&self, value: Duration, attributes: &MetricAttributes) {
|
|
301
|
+
match self {
|
|
302
|
+
DurationHistogram::Milliseconds(h) => h.record(value.as_millis() as u64, attributes),
|
|
303
|
+
DurationHistogram::Seconds(h) => h.record(value.as_secs_f64(), attributes),
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
impl Gauge for MemoryGauge<u64> {
|
|
309
|
+
fn record(&self, value: u64, attributes: &MetricAttributes) {
|
|
310
|
+
if let MetricAttributes::OTel { kvs } = attributes {
|
|
311
|
+
self.record(value, kvs);
|
|
312
|
+
} else {
|
|
313
|
+
dbg_panic!("Must use OTel attributes with an OTel metric implementation");
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
impl GaugeF64 for MemoryGauge<f64> {
|
|
318
|
+
fn record(&self, value: f64, attributes: &MetricAttributes) {
|
|
319
|
+
if let MetricAttributes::OTel { kvs } = attributes {
|
|
320
|
+
self.record(value, kvs);
|
|
321
|
+
} else {
|
|
322
|
+
dbg_panic!("Must use OTel attributes with an OTel metric implementation");
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
fn default_resource_kvs() -> &'static [KeyValue] {
|
|
328
|
+
use once_cell::sync::OnceCell;
|
|
329
|
+
|
|
330
|
+
static INSTANCE: OnceCell<[KeyValue; 1]> = OnceCell::new();
|
|
331
|
+
INSTANCE.get_or_init(|| [KeyValue::new("service.name", TELEM_SERVICE_NAME)])
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
fn default_resource(override_values: &HashMap<String, String>) -> Resource {
|
|
335
|
+
let override_kvs = override_values
|
|
336
|
+
.iter()
|
|
337
|
+
.map(|(k, v)| KeyValue::new(k.clone(), v.clone()));
|
|
338
|
+
Resource::new(default_resource_kvs().iter().cloned()).merge(&Resource::new(override_kvs))
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
#[derive(Clone)]
|
|
342
|
+
struct ConstantTemporality(Temporality);
|
|
343
|
+
|
|
344
|
+
impl TemporalitySelector for ConstantTemporality {
|
|
345
|
+
fn temporality(&self, _: InstrumentKind) -> Temporality {
|
|
346
|
+
self.0
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
fn metric_temporality_to_selector(t: MetricTemporality) -> impl TemporalitySelector + Clone {
|
|
351
|
+
match t {
|
|
352
|
+
MetricTemporality::Cumulative => ConstantTemporality(Temporality::Cumulative),
|
|
353
|
+
MetricTemporality::Delta => ConstantTemporality(Temporality::Delta),
|
|
354
|
+
}
|
|
355
|
+
}
|
|
@@ -1,25 +1,26 @@
|
|
|
1
|
-
use
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
use http_body_util::Full;
|
|
2
|
+
use hyper::{body::Bytes, header::CONTENT_TYPE, service::service_fn, Method, Request, Response};
|
|
3
|
+
use hyper_util::{
|
|
4
|
+
rt::{TokioExecutor, TokioIo},
|
|
5
|
+
server::conn::auto,
|
|
6
6
|
};
|
|
7
7
|
use opentelemetry_prometheus::PrometheusExporter;
|
|
8
8
|
use opentelemetry_sdk::metrics::reader::AggregationSelector;
|
|
9
9
|
use prometheus::{Encoder, Registry, TextEncoder};
|
|
10
|
-
use std::{
|
|
10
|
+
use std::net::{SocketAddr, TcpListener};
|
|
11
11
|
use temporal_sdk_core_api::telemetry::PrometheusExporterOptions;
|
|
12
|
+
use tokio::io;
|
|
12
13
|
|
|
13
14
|
/// Exposes prometheus metrics for scraping
|
|
14
15
|
pub(super) struct PromServer {
|
|
15
|
-
|
|
16
|
+
listener: TcpListener,
|
|
16
17
|
registry: Registry,
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
impl PromServer {
|
|
20
|
-
pub fn new(
|
|
21
|
+
pub(super) fn new(
|
|
21
22
|
opts: &PrometheusExporterOptions,
|
|
22
|
-
aggregation: impl AggregationSelector +
|
|
23
|
+
aggregation: impl AggregationSelector + 'static,
|
|
23
24
|
) -> Result<(Self, PrometheusExporter), anyhow::Error> {
|
|
24
25
|
let registry = Registry::new();
|
|
25
26
|
let exporter = opentelemetry_prometheus::exporter()
|
|
@@ -36,37 +37,49 @@ impl PromServer {
|
|
|
36
37
|
} else {
|
|
37
38
|
exporter
|
|
38
39
|
};
|
|
39
|
-
let bound_addr = AddrIncoming::bind(&opts.socket_addr)?;
|
|
40
40
|
Ok((
|
|
41
41
|
Self {
|
|
42
|
-
|
|
42
|
+
listener: TcpListener::bind(opts.socket_addr)?,
|
|
43
43
|
registry,
|
|
44
44
|
},
|
|
45
45
|
exporter.build()?,
|
|
46
46
|
))
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
pub async fn run(self) ->
|
|
49
|
+
pub(super) async fn run(self) -> Result<(), anyhow::Error> {
|
|
50
50
|
// Spin up hyper server to serve metrics for scraping. We use hyper since we already depend
|
|
51
51
|
// on it via Tonic.
|
|
52
|
-
|
|
52
|
+
self.listener.set_nonblocking(true)?;
|
|
53
|
+
let listener = tokio::net::TcpListener::from_std(self.listener)?;
|
|
54
|
+
loop {
|
|
55
|
+
let (stream, _) = listener.accept().await?;
|
|
56
|
+
let io = TokioIo::new(stream);
|
|
53
57
|
let regclone = self.registry.clone();
|
|
54
|
-
async move {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
+
tokio::task::spawn(async move {
|
|
59
|
+
let server = auto::Builder::new(TokioExecutor::new());
|
|
60
|
+
if let Err(e) = server
|
|
61
|
+
.serve_connection(
|
|
62
|
+
io,
|
|
63
|
+
service_fn(move |req| metrics_req(req, regclone.clone())),
|
|
64
|
+
)
|
|
65
|
+
.await
|
|
66
|
+
{
|
|
67
|
+
warn!("Error serving metrics connection: {:?}", e);
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
}
|
|
58
71
|
}
|
|
59
72
|
|
|
60
|
-
pub fn bound_addr(&self) -> SocketAddr {
|
|
61
|
-
self.
|
|
73
|
+
pub(super) fn bound_addr(&self) -> io::Result<SocketAddr> {
|
|
74
|
+
self.listener.local_addr()
|
|
62
75
|
}
|
|
63
76
|
}
|
|
64
77
|
|
|
65
78
|
/// Serves prometheus metrics in the expected format for scraping
|
|
66
79
|
async fn metrics_req(
|
|
67
|
-
req: Request<
|
|
80
|
+
req: Request<hyper::body::Incoming>,
|
|
68
81
|
registry: Registry,
|
|
69
|
-
) -> Result<Response<
|
|
82
|
+
) -> Result<Response<Full<Bytes>>, hyper::Error> {
|
|
70
83
|
let response = match (req.method(), req.uri().path()) {
|
|
71
84
|
(&Method::GET, "/metrics") => {
|
|
72
85
|
let mut buffer = vec![];
|
|
@@ -77,12 +90,12 @@ async fn metrics_req(
|
|
|
77
90
|
Response::builder()
|
|
78
91
|
.status(200)
|
|
79
92
|
.header(CONTENT_TYPE, encoder.format_type())
|
|
80
|
-
.body(
|
|
93
|
+
.body(buffer.into())
|
|
81
94
|
.unwrap()
|
|
82
95
|
}
|
|
83
96
|
_ => Response::builder()
|
|
84
97
|
.status(404)
|
|
85
|
-
.body(
|
|
98
|
+
.body(vec![].into())
|
|
86
99
|
.expect("Can't fail to construct empty resp"),
|
|
87
100
|
};
|
|
88
101
|
Ok(response)
|