@temporalio/core-bridge 1.8.6 → 1.9.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +189 -152
- package/Cargo.toml +1 -0
- package/lib/index.d.ts +17 -44
- package/lib/index.js.map +1 -1
- package/package.json +3 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.github/workflows/heavy.yml +4 -0
- package/sdk-core/.github/workflows/per-pr.yml +96 -0
- package/sdk-core/ARCHITECTURE.md +1 -1
- package/sdk-core/Cargo.toml +6 -0
- package/sdk-core/README.md +37 -21
- package/sdk-core/client/Cargo.toml +6 -3
- package/sdk-core/client/src/lib.rs +272 -138
- package/sdk-core/client/src/metrics.rs +68 -57
- package/sdk-core/client/src/raw.rs +191 -45
- package/sdk-core/client/src/retry.rs +20 -0
- package/sdk-core/client/src/worker_registry/mod.rs +264 -0
- package/sdk-core/client/src/workflow_handle/mod.rs +2 -1
- package/sdk-core/core/Cargo.toml +16 -18
- package/sdk-core/core/src/core_tests/child_workflows.rs +7 -7
- package/sdk-core/core/src/core_tests/mod.rs +1 -0
- package/sdk-core/core/src/core_tests/replay_flag.rs +29 -39
- package/sdk-core/core/src/core_tests/updates.rs +73 -0
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +52 -1
- package/sdk-core/core/src/ephemeral_server/mod.rs +34 -11
- package/sdk-core/core/src/internal_flags.rs +7 -1
- package/sdk-core/core/src/lib.rs +19 -36
- package/sdk-core/core/src/protosext/mod.rs +11 -3
- package/sdk-core/core/src/protosext/protocol_messages.rs +102 -0
- package/sdk-core/core/src/replay/mod.rs +100 -48
- package/sdk-core/core/src/telemetry/log_export.rs +161 -28
- package/sdk-core/core/src/telemetry/metrics.rs +869 -248
- package/sdk-core/core/src/telemetry/mod.rs +135 -239
- package/sdk-core/core/src/telemetry/prometheus_server.rs +36 -31
- package/sdk-core/core/src/test_help/mod.rs +63 -4
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +12 -2
- package/sdk-core/core/src/worker/activities.rs +276 -10
- package/sdk-core/core/src/worker/client/mocks.rs +18 -0
- package/sdk-core/core/src/worker/client.rs +16 -3
- package/sdk-core/core/src/worker/mod.rs +50 -19
- package/sdk-core/core/src/worker/slot_provider.rs +175 -0
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +27 -34
- package/sdk-core/core/src/worker/workflow/history_update.rs +4 -1
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +36 -94
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +34 -22
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +50 -34
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +106 -92
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +22 -21
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +386 -499
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +12 -2
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +33 -26
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +198 -215
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +66 -62
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +88 -119
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +3 -1
- package/sdk-core/core/src/worker/workflow/machines/update_state_machine.rs +411 -0
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +26 -25
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +302 -85
- package/sdk-core/core/src/worker/workflow/managed_run.rs +179 -132
- package/sdk-core/core/src/worker/workflow/mod.rs +121 -46
- package/sdk-core/core/src/worker/workflow/run_cache.rs +8 -12
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +45 -38
- package/sdk-core/core-api/Cargo.toml +7 -6
- package/sdk-core/core-api/src/lib.rs +4 -12
- package/sdk-core/core-api/src/telemetry/metrics.rs +334 -0
- package/sdk-core/core-api/src/telemetry.rs +53 -42
- package/sdk-core/core-api/src/worker.rs +7 -0
- package/sdk-core/{.buildkite/docker → docker}/docker-compose.yaml +1 -1
- package/sdk-core/etc/dynamic-config.yaml +11 -1
- package/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +1 -1
- package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +1 -3
- package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +2 -2
- package/sdk-core/sdk/Cargo.toml +1 -1
- package/sdk-core/sdk/src/lib.rs +85 -7
- package/sdk-core/sdk/src/workflow_context/options.rs +4 -0
- package/sdk-core/sdk/src/workflow_context.rs +43 -15
- package/sdk-core/sdk/src/workflow_future.rs +334 -204
- package/sdk-core/sdk-core-protos/Cargo.toml +2 -2
- package/sdk-core/sdk-core-protos/build.rs +14 -14
- package/sdk-core/sdk-core-protos/protos/api_upstream/.buildkite/Dockerfile +2 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/Makefile +99 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/api-linter.yaml +56 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/buf.gen.yaml +20 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/buf.lock +11 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/buf.yaml +18 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/api/annotations.proto +31 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/api/http.proto +379 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/any.proto +162 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/descriptor.proto +1212 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/duration.proto +115 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/empty.proto +51 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/timestamp.proto +144 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/google/protobuf/wrappers.proto +123 -0
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/batch/v1/message.proto +3 -5
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/command/v1/message.proto +11 -13
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/common/v1/message.proto +2 -4
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/failed_cause.proto +2 -0
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/reset.proto +1 -1
- package/sdk-core/{protos/api_upstream/build/tools.go → sdk-core-protos/protos/api_upstream/temporal/api/export/v1/message.proto} +22 -6
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/filter/v1/message.proto +2 -4
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/history/v1/message.proto +21 -23
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/namespace/v1/message.proto +2 -4
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/operatorservice/v1/request_response.proto +2 -0
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/operatorservice/v1/service.proto +4 -0
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/replication/v1/message.proto +1 -3
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/schedule/v1/message.proto +36 -20
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +13 -0
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -4
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/update/v1/message.proto +1 -1
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/version/v1/message.proto +2 -3
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/workflow/v1/message.proto +18 -20
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/workflowservice/v1/request_response.proto +84 -32
- package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/workflowservice/v1/service.proto +205 -47
- package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +57 -0
- package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +27 -0
- package/sdk-core/sdk-core-protos/src/history_builder.rs +67 -2
- package/sdk-core/sdk-core-protos/src/lib.rs +75 -2
- package/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
- package/sdk-core/test-utils/Cargo.toml +5 -1
- package/sdk-core/test-utils/src/canned_histories.rs +3 -57
- package/sdk-core/test-utils/src/interceptors.rs +46 -0
- package/sdk-core/test-utils/src/lib.rs +106 -38
- package/sdk-core/tests/integ_tests/metrics_tests.rs +110 -15
- package/sdk-core/tests/integ_tests/queries_tests.rs +174 -3
- package/sdk-core/tests/integ_tests/update_tests.rs +908 -0
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +44 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +4 -4
- package/sdk-core/tests/integ_tests/workflow_tests/eager.rs +61 -0
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +27 -2
- package/sdk-core/tests/integ_tests/workflow_tests.rs +1 -0
- package/sdk-core/tests/main.rs +2 -1
- package/sdk-core/tests/runner.rs +15 -2
- package/src/conversions.rs +75 -89
- package/src/helpers.rs +74 -0
- package/src/runtime.rs +17 -6
- package/src/worker.rs +14 -61
- package/ts/index.ts +21 -52
- package/sdk-core/.buildkite/docker/Dockerfile +0 -9
- package/sdk-core/.buildkite/docker/build.sh +0 -5
- package/sdk-core/.buildkite/docker/docker-compose-ci.yaml +0 -27
- package/sdk-core/.buildkite/pipeline.yml +0 -57
- package/sdk-core/.github/workflows/semgrep.yml +0 -25
- package/sdk-core/core/src/worker/workflow/bridge.rs +0 -35
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +0 -215
- package/sdk-core/protos/api_upstream/.buildkite/Dockerfile +0 -2
- package/sdk-core/protos/api_upstream/Makefile +0 -80
- package/sdk-core/protos/api_upstream/api-linter.yaml +0 -40
- package/sdk-core/protos/api_upstream/buf.yaml +0 -9
- package/sdk-core/protos/api_upstream/build/go.mod +0 -7
- package/sdk-core/protos/api_upstream/build/go.sum +0 -5
- package/sdk-core/protos/api_upstream/go.mod +0 -6
- package/sdk-core/protos/testsrv_upstream/dependencies/gogoproto/gogo.proto +0 -141
- /package/sdk-core/{.buildkite/docker → docker}/docker-compose-telem.yaml +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/.buildkite/docker-compose.yml +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/.buildkite/pipeline.yml +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/.github/CODEOWNERS +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/.github/workflows/publish-docs.yml +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/.github/workflows/trigger-api-go-update.yml +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/LICENSE +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/README.md +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/batch_operation.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/command_type.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/common.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/event_type.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/namespace.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/query.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/schedule.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/task_queue.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/update.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/enums/v1/workflow.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/errordetails/v1/message.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/failure/v1/message.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/protocol/v1/message.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/api_upstream/temporal/api/query/v1/message.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/google/rpc/status.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/grpc/health/v1/health.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/activity_result/activity_result.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/activity_task/activity_task.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/child_workflow/child_workflow.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/common/common.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/core_interface.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/external_data/external_data.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/testsrv_upstream/Makefile +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/testsrv_upstream/api-linter.yaml +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/testsrv_upstream/buf.yaml +0 -0
- /package/sdk-core/{protos/api_upstream → sdk-core-protos/protos/testsrv_upstream}/dependencies/gogoproto/gogo.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +0 -0
- /package/sdk-core/{protos → sdk-core-protos/protos}/testsrv_upstream/temporal/api/testservice/v1/service.proto +0 -0
|
@@ -1,17 +1,37 @@
|
|
|
1
|
-
use crate::
|
|
1
|
+
use crate::{
|
|
2
|
+
abstractions::dbg_panic,
|
|
3
|
+
telemetry::{
|
|
4
|
+
default_resource, metric_temporality_to_selector, prometheus_server::PromServer,
|
|
5
|
+
TelemetryInstance, TELEM_SERVICE_NAME,
|
|
6
|
+
},
|
|
7
|
+
};
|
|
2
8
|
use opentelemetry::{
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
9
|
+
self,
|
|
10
|
+
metrics::{Meter, MeterProvider as MeterProviderT, Unit},
|
|
11
|
+
KeyValue,
|
|
12
|
+
};
|
|
13
|
+
use opentelemetry_otlp::WithExportConfig;
|
|
14
|
+
use opentelemetry_sdk::{
|
|
15
|
+
metrics::{
|
|
16
|
+
new_view,
|
|
17
|
+
reader::{AggregationSelector, DefaultAggregationSelector},
|
|
18
|
+
Aggregation, Instrument, InstrumentKind, MeterProvider, MeterProviderBuilder,
|
|
19
|
+
PeriodicReader, View,
|
|
10
20
|
},
|
|
11
|
-
|
|
21
|
+
runtime, AttributeSet,
|
|
12
22
|
};
|
|
13
|
-
use
|
|
14
|
-
use
|
|
23
|
+
use parking_lot::RwLock;
|
|
24
|
+
use std::{collections::HashMap, fmt::Debug, net::SocketAddr, sync::Arc, time::Duration};
|
|
25
|
+
use temporal_sdk_core_api::telemetry::{
|
|
26
|
+
metrics::{
|
|
27
|
+
BufferAttributes, BufferInstrumentRef, CoreMeter, Counter, Gauge, Histogram,
|
|
28
|
+
LazyBufferInstrument, MetricAttributes, MetricCallBufferer, MetricEvent, MetricKeyValue,
|
|
29
|
+
MetricKind, MetricParameters, MetricUpdateVal, NewAttributes, NoOpCoreMeter,
|
|
30
|
+
},
|
|
31
|
+
OtelCollectorOptions, PrometheusExporterOptions,
|
|
32
|
+
};
|
|
33
|
+
use tokio::task::AbortHandle;
|
|
34
|
+
use tonic::metadata::MetadataMap;
|
|
15
35
|
|
|
16
36
|
/// Used to track context associated with metrics, and record/update them
|
|
17
37
|
///
|
|
@@ -19,111 +39,77 @@ use temporal_client::ClientMetricProvider;
|
|
|
19
39
|
/// appropriate k/vs have already been set.
|
|
20
40
|
#[derive(Clone)]
|
|
21
41
|
pub(crate) struct MetricsContext {
|
|
22
|
-
|
|
23
|
-
kvs:
|
|
42
|
+
meter: Arc<dyn CoreMeter>,
|
|
43
|
+
kvs: MetricAttributes,
|
|
24
44
|
instruments: Arc<Instruments>,
|
|
25
45
|
}
|
|
26
46
|
|
|
27
|
-
/// Wraps OTel's [Meter] to ensure we name our metrics properly, or any other temporal-specific
|
|
28
|
-
/// metrics customizations
|
|
29
|
-
#[derive(derive_more::Constructor)]
|
|
30
|
-
pub struct TemporalMeter<'a> {
|
|
31
|
-
inner: &'a Meter,
|
|
32
|
-
metrics_prefix: &'static str,
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
impl<'a> TemporalMeter<'a> {
|
|
36
|
-
pub(crate) fn counter(&self, name: &'static str) -> Counter<u64> {
|
|
37
|
-
self.inner
|
|
38
|
-
.u64_counter(self.metrics_prefix.to_string() + name)
|
|
39
|
-
.init()
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
pub(crate) fn histogram(&self, name: &'static str) -> Histogram<u64> {
|
|
43
|
-
self.inner
|
|
44
|
-
.u64_histogram(self.metrics_prefix.to_string() + name)
|
|
45
|
-
.init()
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
impl<'a> ClientMetricProvider for TemporalMeter<'a> {
|
|
50
|
-
fn counter(&self, name: &'static str) -> Counter<u64> {
|
|
51
|
-
self.counter(name)
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
fn histogram(&self, name: &'static str) -> Histogram<u64> {
|
|
55
|
-
self.histogram(name)
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
impl<'a> Deref for TemporalMeter<'a> {
|
|
60
|
-
type Target = dyn ClientMetricProvider + 'a;
|
|
61
|
-
|
|
62
|
-
fn deref(&self) -> &Self::Target {
|
|
63
|
-
self as &Self::Target
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
47
|
struct Instruments {
|
|
68
|
-
wf_completed_counter: Counter
|
|
69
|
-
wf_canceled_counter: Counter
|
|
70
|
-
wf_failed_counter: Counter
|
|
71
|
-
wf_cont_counter: Counter
|
|
72
|
-
wf_e2e_latency: Histogram
|
|
73
|
-
wf_task_queue_poll_empty_counter: Counter
|
|
74
|
-
wf_task_queue_poll_succeed_counter: Counter
|
|
75
|
-
wf_task_execution_failure_counter: Counter
|
|
76
|
-
wf_task_sched_to_start_latency: Histogram
|
|
77
|
-
wf_task_replay_latency: Histogram
|
|
78
|
-
wf_task_execution_latency: Histogram
|
|
79
|
-
act_poll_no_task: Counter
|
|
80
|
-
act_task_received_counter: Counter
|
|
81
|
-
act_execution_failed: Counter
|
|
82
|
-
act_sched_to_start_latency: Histogram
|
|
83
|
-
act_exec_latency: Histogram
|
|
84
|
-
worker_registered: Counter
|
|
85
|
-
num_pollers:
|
|
86
|
-
task_slots_available:
|
|
87
|
-
sticky_cache_hit: Counter
|
|
88
|
-
sticky_cache_miss: Counter
|
|
89
|
-
sticky_cache_size:
|
|
90
|
-
sticky_cache_evictions: Counter
|
|
48
|
+
wf_completed_counter: Arc<dyn Counter>,
|
|
49
|
+
wf_canceled_counter: Arc<dyn Counter>,
|
|
50
|
+
wf_failed_counter: Arc<dyn Counter>,
|
|
51
|
+
wf_cont_counter: Arc<dyn Counter>,
|
|
52
|
+
wf_e2e_latency: Arc<dyn Histogram>,
|
|
53
|
+
wf_task_queue_poll_empty_counter: Arc<dyn Counter>,
|
|
54
|
+
wf_task_queue_poll_succeed_counter: Arc<dyn Counter>,
|
|
55
|
+
wf_task_execution_failure_counter: Arc<dyn Counter>,
|
|
56
|
+
wf_task_sched_to_start_latency: Arc<dyn Histogram>,
|
|
57
|
+
wf_task_replay_latency: Arc<dyn Histogram>,
|
|
58
|
+
wf_task_execution_latency: Arc<dyn Histogram>,
|
|
59
|
+
act_poll_no_task: Arc<dyn Counter>,
|
|
60
|
+
act_task_received_counter: Arc<dyn Counter>,
|
|
61
|
+
act_execution_failed: Arc<dyn Counter>,
|
|
62
|
+
act_sched_to_start_latency: Arc<dyn Histogram>,
|
|
63
|
+
act_exec_latency: Arc<dyn Histogram>,
|
|
64
|
+
worker_registered: Arc<dyn Counter>,
|
|
65
|
+
num_pollers: Arc<dyn Gauge>,
|
|
66
|
+
task_slots_available: Arc<dyn Gauge>,
|
|
67
|
+
sticky_cache_hit: Arc<dyn Counter>,
|
|
68
|
+
sticky_cache_miss: Arc<dyn Counter>,
|
|
69
|
+
sticky_cache_size: Arc<dyn Gauge>,
|
|
70
|
+
sticky_cache_evictions: Arc<dyn Counter>,
|
|
91
71
|
}
|
|
92
72
|
|
|
93
73
|
impl MetricsContext {
|
|
94
74
|
pub(crate) fn no_op() -> Self {
|
|
75
|
+
let meter = Arc::new(NoOpCoreMeter);
|
|
95
76
|
Self {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
&NoopMeterProvider::new().meter("fakemeter"),
|
|
100
|
-
"fakemetrics",
|
|
101
|
-
))),
|
|
77
|
+
kvs: meter.new_attributes(Default::default()),
|
|
78
|
+
instruments: Arc::new(Instruments::new(meter.as_ref())),
|
|
79
|
+
meter,
|
|
102
80
|
}
|
|
103
81
|
}
|
|
104
82
|
|
|
105
|
-
pub(crate) fn top_level(namespace: String, telemetry: &TelemetryInstance) -> Self {
|
|
106
|
-
let
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
83
|
+
pub(crate) fn top_level(namespace: String, tq: String, telemetry: &TelemetryInstance) -> Self {
|
|
84
|
+
if let Some(mut meter) = telemetry.get_temporal_metric_meter() {
|
|
85
|
+
meter
|
|
86
|
+
.default_attribs
|
|
87
|
+
.attributes
|
|
88
|
+
.push(MetricKeyValue::new(KEY_NAMESPACE, namespace));
|
|
89
|
+
meter.default_attribs.attributes.push(task_queue(tq));
|
|
90
|
+
let kvs = meter.inner.new_attributes(meter.default_attribs);
|
|
91
|
+
Self {
|
|
92
|
+
kvs,
|
|
93
|
+
instruments: Arc::new(Instruments::new(meter.inner.as_ref())),
|
|
94
|
+
meter: meter.inner,
|
|
95
|
+
}
|
|
96
|
+
} else {
|
|
97
|
+
Self::no_op()
|
|
111
98
|
}
|
|
112
99
|
}
|
|
113
100
|
|
|
114
|
-
pub(crate) fn with_task_q(mut self, tq: String) -> Self {
|
|
115
|
-
Arc::make_mut(&mut self.kvs).push(task_queue(tq));
|
|
116
|
-
self
|
|
117
|
-
}
|
|
118
|
-
|
|
119
101
|
/// Extend an existing metrics context with new attributes
|
|
120
|
-
pub(crate) fn with_new_attrs(
|
|
121
|
-
|
|
122
|
-
|
|
102
|
+
pub(crate) fn with_new_attrs(
|
|
103
|
+
&self,
|
|
104
|
+
new_attrs: impl IntoIterator<Item = MetricKeyValue>,
|
|
105
|
+
) -> Self {
|
|
106
|
+
let kvs = self
|
|
107
|
+
.meter
|
|
108
|
+
.extend_attributes(self.kvs.clone(), new_attrs.into());
|
|
123
109
|
Self {
|
|
124
|
-
ctx: Context::current(),
|
|
125
110
|
kvs,
|
|
126
111
|
instruments: self.instruments.clone(),
|
|
112
|
+
meter: self.meter.clone(),
|
|
127
113
|
}
|
|
128
114
|
}
|
|
129
115
|
|
|
@@ -131,113 +117,91 @@ impl MetricsContext {
|
|
|
131
117
|
pub(crate) fn wf_tq_poll_ok(&self) {
|
|
132
118
|
self.instruments
|
|
133
119
|
.wf_task_queue_poll_succeed_counter
|
|
134
|
-
.add(
|
|
120
|
+
.add(1, &self.kvs);
|
|
135
121
|
}
|
|
136
122
|
|
|
137
123
|
/// A workflow task queue poll timed out / had empty response
|
|
138
124
|
pub(crate) fn wf_tq_poll_empty(&self) {
|
|
139
125
|
self.instruments
|
|
140
126
|
.wf_task_queue_poll_empty_counter
|
|
141
|
-
.add(
|
|
127
|
+
.add(1, &self.kvs);
|
|
142
128
|
}
|
|
143
129
|
|
|
144
130
|
/// A workflow task execution failed
|
|
145
131
|
pub(crate) fn wf_task_failed(&self) {
|
|
146
132
|
self.instruments
|
|
147
133
|
.wf_task_execution_failure_counter
|
|
148
|
-
.add(
|
|
134
|
+
.add(1, &self.kvs);
|
|
149
135
|
}
|
|
150
136
|
|
|
151
137
|
/// A workflow completed successfully
|
|
152
138
|
pub(crate) fn wf_completed(&self) {
|
|
153
|
-
self.instruments
|
|
154
|
-
.wf_completed_counter
|
|
155
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
139
|
+
self.instruments.wf_completed_counter.add(1, &self.kvs);
|
|
156
140
|
}
|
|
157
141
|
|
|
158
142
|
/// A workflow ended cancelled
|
|
159
143
|
pub(crate) fn wf_canceled(&self) {
|
|
160
|
-
self.instruments
|
|
161
|
-
.wf_canceled_counter
|
|
162
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
144
|
+
self.instruments.wf_canceled_counter.add(1, &self.kvs);
|
|
163
145
|
}
|
|
164
146
|
|
|
165
147
|
/// A workflow ended failed
|
|
166
148
|
pub(crate) fn wf_failed(&self) {
|
|
167
|
-
self.instruments
|
|
168
|
-
.wf_failed_counter
|
|
169
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
149
|
+
self.instruments.wf_failed_counter.add(1, &self.kvs);
|
|
170
150
|
}
|
|
171
151
|
|
|
172
152
|
/// A workflow continued as new
|
|
173
153
|
pub(crate) fn wf_continued_as_new(&self) {
|
|
174
|
-
self.instruments
|
|
175
|
-
.wf_cont_counter
|
|
176
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
154
|
+
self.instruments.wf_cont_counter.add(1, &self.kvs);
|
|
177
155
|
}
|
|
178
156
|
|
|
179
157
|
/// Record workflow total execution time in milliseconds
|
|
180
158
|
pub(crate) fn wf_e2e_latency(&self, dur: Duration) {
|
|
181
159
|
self.instruments
|
|
182
160
|
.wf_e2e_latency
|
|
183
|
-
.record(
|
|
161
|
+
.record(dur.as_millis() as u64, &self.kvs);
|
|
184
162
|
}
|
|
185
163
|
|
|
186
164
|
/// Record workflow task schedule to start time in millis
|
|
187
165
|
pub(crate) fn wf_task_sched_to_start_latency(&self, dur: Duration) {
|
|
188
|
-
self.instruments
|
|
189
|
-
|
|
190
|
-
dur.as_millis() as u64,
|
|
191
|
-
&self.kvs,
|
|
192
|
-
);
|
|
166
|
+
self.instruments
|
|
167
|
+
.wf_task_sched_to_start_latency
|
|
168
|
+
.record(dur.as_millis() as u64, &self.kvs);
|
|
193
169
|
}
|
|
194
170
|
|
|
195
171
|
/// Record workflow task execution time in milliseconds
|
|
196
172
|
pub(crate) fn wf_task_latency(&self, dur: Duration) {
|
|
197
|
-
self.instruments
|
|
198
|
-
|
|
199
|
-
dur.as_millis() as u64,
|
|
200
|
-
&self.kvs,
|
|
201
|
-
);
|
|
173
|
+
self.instruments
|
|
174
|
+
.wf_task_execution_latency
|
|
175
|
+
.record(dur.as_millis() as u64, &self.kvs);
|
|
202
176
|
}
|
|
203
177
|
|
|
204
178
|
/// Record time it takes to catch up on replaying a WFT
|
|
205
179
|
pub(crate) fn wf_task_replay_latency(&self, dur: Duration) {
|
|
206
|
-
self.instruments
|
|
207
|
-
|
|
208
|
-
dur.as_millis() as u64,
|
|
209
|
-
&self.kvs,
|
|
210
|
-
);
|
|
180
|
+
self.instruments
|
|
181
|
+
.wf_task_replay_latency
|
|
182
|
+
.record(dur.as_millis() as u64, &self.kvs);
|
|
211
183
|
}
|
|
212
184
|
|
|
213
185
|
/// An activity long poll timed out
|
|
214
186
|
pub(crate) fn act_poll_timeout(&self) {
|
|
215
|
-
self.instruments
|
|
216
|
-
.act_poll_no_task
|
|
217
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
187
|
+
self.instruments.act_poll_no_task.add(1, &self.kvs);
|
|
218
188
|
}
|
|
219
189
|
|
|
220
190
|
/// A count of activity tasks received
|
|
221
191
|
pub(crate) fn act_task_received(&self) {
|
|
222
|
-
self.instruments
|
|
223
|
-
.act_task_received_counter
|
|
224
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
192
|
+
self.instruments.act_task_received_counter.add(1, &self.kvs);
|
|
225
193
|
}
|
|
226
194
|
|
|
227
195
|
/// An activity execution failed
|
|
228
196
|
pub(crate) fn act_execution_failed(&self) {
|
|
229
|
-
self.instruments
|
|
230
|
-
.act_execution_failed
|
|
231
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
197
|
+
self.instruments.act_execution_failed.add(1, &self.kvs);
|
|
232
198
|
}
|
|
233
199
|
|
|
234
200
|
/// Record activity task schedule to start time in millis
|
|
235
201
|
pub(crate) fn act_sched_to_start_latency(&self, dur: Duration) {
|
|
236
|
-
self.instruments
|
|
237
|
-
|
|
238
|
-
dur.as_millis() as u64,
|
|
239
|
-
&self.kvs,
|
|
240
|
-
);
|
|
202
|
+
self.instruments
|
|
203
|
+
.act_sched_to_start_latency
|
|
204
|
+
.record(dur.as_millis() as u64, &self.kvs);
|
|
241
205
|
}
|
|
242
206
|
|
|
243
207
|
/// Record time it took to complete activity execution, from the time core generated the
|
|
@@ -245,97 +209,169 @@ impl MetricsContext {
|
|
|
245
209
|
pub(crate) fn act_execution_latency(&self, dur: Duration) {
|
|
246
210
|
self.instruments
|
|
247
211
|
.act_exec_latency
|
|
248
|
-
.record(
|
|
212
|
+
.record(dur.as_millis() as u64, &self.kvs);
|
|
249
213
|
}
|
|
250
214
|
|
|
251
215
|
/// A worker was registered
|
|
252
216
|
pub(crate) fn worker_registered(&self) {
|
|
253
|
-
self.instruments
|
|
254
|
-
.worker_registered
|
|
255
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
217
|
+
self.instruments.worker_registered.add(1, &self.kvs);
|
|
256
218
|
}
|
|
257
219
|
|
|
258
220
|
/// Record current number of available task slots. Context should have worker type set.
|
|
259
221
|
pub(crate) fn available_task_slots(&self, num: usize) {
|
|
260
222
|
self.instruments
|
|
261
223
|
.task_slots_available
|
|
262
|
-
.record(
|
|
224
|
+
.record(num as u64, &self.kvs)
|
|
263
225
|
}
|
|
264
226
|
|
|
265
227
|
/// Record current number of pollers. Context should include poller type / task queue tag.
|
|
266
228
|
pub(crate) fn record_num_pollers(&self, num: usize) {
|
|
267
|
-
self.instruments
|
|
268
|
-
.num_pollers
|
|
269
|
-
.record(&self.ctx, num as u64, &self.kvs);
|
|
229
|
+
self.instruments.num_pollers.record(num as u64, &self.kvs);
|
|
270
230
|
}
|
|
271
231
|
|
|
272
232
|
/// A workflow task found a cached workflow to run against
|
|
273
233
|
pub(crate) fn sticky_cache_hit(&self) {
|
|
274
|
-
self.instruments
|
|
275
|
-
.sticky_cache_hit
|
|
276
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
234
|
+
self.instruments.sticky_cache_hit.add(1, &self.kvs);
|
|
277
235
|
}
|
|
278
236
|
|
|
279
237
|
/// A workflow task did not find a cached workflow
|
|
280
238
|
pub(crate) fn sticky_cache_miss(&self) {
|
|
281
|
-
self.instruments
|
|
282
|
-
.sticky_cache_miss
|
|
283
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
239
|
+
self.instruments.sticky_cache_miss.add(1, &self.kvs);
|
|
284
240
|
}
|
|
285
241
|
|
|
286
242
|
/// Record current cache size (in number of wfs, not bytes)
|
|
287
243
|
pub(crate) fn cache_size(&self, size: u64) {
|
|
288
|
-
self.instruments
|
|
289
|
-
.sticky_cache_size
|
|
290
|
-
.record(&self.ctx, size, &self.kvs);
|
|
244
|
+
self.instruments.sticky_cache_size.record(size, &self.kvs);
|
|
291
245
|
}
|
|
292
246
|
|
|
293
247
|
/// Count a workflow being evicted from the cache
|
|
294
248
|
pub(crate) fn cache_eviction(&self) {
|
|
295
|
-
self.instruments
|
|
296
|
-
.sticky_cache_evictions
|
|
297
|
-
.add(&self.ctx, 1, &self.kvs);
|
|
249
|
+
self.instruments.sticky_cache_evictions.add(1, &self.kvs);
|
|
298
250
|
}
|
|
299
251
|
}
|
|
300
252
|
|
|
301
253
|
impl Instruments {
|
|
302
|
-
fn new(
|
|
303
|
-
let no_op_meter: Meter;
|
|
304
|
-
let meter = if let Some(meter) = telem.get_metric_meter() {
|
|
305
|
-
meter
|
|
306
|
-
} else {
|
|
307
|
-
no_op_meter = NoopMeterProvider::default().meter("no_op");
|
|
308
|
-
TemporalMeter::new(&no_op_meter, "fakemetrics")
|
|
309
|
-
};
|
|
310
|
-
Self::new_explicit(meter)
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
fn new_explicit(meter: TemporalMeter) -> Self {
|
|
254
|
+
fn new(meter: &dyn CoreMeter) -> Self {
|
|
314
255
|
Self {
|
|
315
|
-
wf_completed_counter: meter.counter(
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
256
|
+
wf_completed_counter: meter.counter(MetricParameters {
|
|
257
|
+
name: "workflow_completed".into(),
|
|
258
|
+
description: "Count of successfully completed workflows".into(),
|
|
259
|
+
unit: "".into(),
|
|
260
|
+
}),
|
|
261
|
+
wf_canceled_counter: meter.counter(MetricParameters {
|
|
262
|
+
name: "workflow_canceled".into(),
|
|
263
|
+
description: "Count of canceled workflows".into(),
|
|
264
|
+
unit: "".into(),
|
|
265
|
+
}),
|
|
266
|
+
wf_failed_counter: meter.counter(MetricParameters {
|
|
267
|
+
name: "workflow_failed".into(),
|
|
268
|
+
description: "Count of failed workflows".into(),
|
|
269
|
+
unit: "".into(),
|
|
270
|
+
}),
|
|
271
|
+
wf_cont_counter: meter.counter(MetricParameters {
|
|
272
|
+
name: "workflow_continue_as_new".into(),
|
|
273
|
+
description: "Count of continued-as-new workflows".into(),
|
|
274
|
+
unit: "".into(),
|
|
275
|
+
}),
|
|
276
|
+
wf_e2e_latency: meter.histogram(MetricParameters {
|
|
277
|
+
name: WF_E2E_LATENCY_NAME.into(),
|
|
278
|
+
unit: "ms".into(),
|
|
279
|
+
description: "Histogram of total workflow execution latencies".into(),
|
|
280
|
+
}),
|
|
281
|
+
wf_task_queue_poll_empty_counter: meter.counter(MetricParameters {
|
|
282
|
+
name: "workflow_task_queue_poll_empty".into(),
|
|
283
|
+
description: "Count of workflow task queue poll timeouts (no new task)".into(),
|
|
284
|
+
unit: "".into(),
|
|
285
|
+
}),
|
|
286
|
+
wf_task_queue_poll_succeed_counter: meter.counter(MetricParameters {
|
|
287
|
+
name: "workflow_task_queue_poll_succeed".into(),
|
|
288
|
+
description: "Count of workflow task queue poll successes".into(),
|
|
289
|
+
unit: "".into(),
|
|
290
|
+
}),
|
|
291
|
+
wf_task_execution_failure_counter: meter.counter(MetricParameters {
|
|
292
|
+
name: "workflow_task_execution_failed".into(),
|
|
293
|
+
description: "Count of workflow task execution failures".into(),
|
|
294
|
+
unit: "".into(),
|
|
295
|
+
}),
|
|
296
|
+
wf_task_sched_to_start_latency: meter.histogram(MetricParameters {
|
|
297
|
+
name: WF_TASK_SCHED_TO_START_LATENCY_NAME.into(),
|
|
298
|
+
unit: "ms".into(),
|
|
299
|
+
description: "Histogram of workflow task schedule-to-start latencies".into(),
|
|
300
|
+
}),
|
|
301
|
+
wf_task_replay_latency: meter.histogram(MetricParameters {
|
|
302
|
+
name: WF_TASK_REPLAY_LATENCY_NAME.into(),
|
|
303
|
+
unit: "ms".into(),
|
|
304
|
+
description: "Histogram of workflow task replay latencies".into(),
|
|
305
|
+
}),
|
|
306
|
+
wf_task_execution_latency: meter.histogram(MetricParameters {
|
|
307
|
+
name: WF_TASK_EXECUTION_LATENCY_NAME.into(),
|
|
308
|
+
unit: "ms".into(),
|
|
309
|
+
description: "Histogram of workflow task execution (not replay) latencies".into(),
|
|
310
|
+
}),
|
|
311
|
+
act_poll_no_task: meter.counter(MetricParameters {
|
|
312
|
+
name: "activity_poll_no_task".into(),
|
|
313
|
+
description: "Count of activity task queue poll timeouts (no new task)".into(),
|
|
314
|
+
unit: "".into(),
|
|
315
|
+
}),
|
|
316
|
+
act_task_received_counter: meter.counter(MetricParameters {
|
|
317
|
+
name: "activity_task_received".into(),
|
|
318
|
+
description: "Count of activity task queue poll successes".into(),
|
|
319
|
+
unit: "".into(),
|
|
320
|
+
}),
|
|
321
|
+
act_execution_failed: meter.counter(MetricParameters {
|
|
322
|
+
name: "activity_execution_failed".into(),
|
|
323
|
+
description: "Count of activity task execution failures".into(),
|
|
324
|
+
unit: "".into(),
|
|
325
|
+
}),
|
|
326
|
+
act_sched_to_start_latency: meter.histogram(MetricParameters {
|
|
327
|
+
name: ACT_SCHED_TO_START_LATENCY_NAME.into(),
|
|
328
|
+
unit: "ms".into(),
|
|
329
|
+
description: "Histogram of activity schedule-to-start latencies".into(),
|
|
330
|
+
}),
|
|
331
|
+
act_exec_latency: meter.histogram(MetricParameters {
|
|
332
|
+
name: ACT_EXEC_LATENCY_NAME.into(),
|
|
333
|
+
unit: "ms".into(),
|
|
334
|
+
description: "Histogram of activity execution latencies".into(),
|
|
335
|
+
}),
|
|
331
336
|
// name kept as worker start for compat with old sdk / what users expect
|
|
332
|
-
worker_registered: meter.counter(
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
337
|
+
worker_registered: meter.counter(MetricParameters {
|
|
338
|
+
name: "worker_start".into(),
|
|
339
|
+
description: "Count of the number of initialized workers".into(),
|
|
340
|
+
unit: "".into(),
|
|
341
|
+
}),
|
|
342
|
+
num_pollers: meter.gauge(MetricParameters {
|
|
343
|
+
name: NUM_POLLERS_NAME.into(),
|
|
344
|
+
description: "Current number of active pollers per queue type".into(),
|
|
345
|
+
unit: "".into(),
|
|
346
|
+
}),
|
|
347
|
+
task_slots_available: meter.gauge(MetricParameters {
|
|
348
|
+
name: TASK_SLOTS_AVAILABLE_NAME.into(),
|
|
349
|
+
description: "Current number of available slots per task type".into(),
|
|
350
|
+
unit: "".into(),
|
|
351
|
+
}),
|
|
352
|
+
sticky_cache_hit: meter.counter(MetricParameters {
|
|
353
|
+
name: "sticky_cache_hit".into(),
|
|
354
|
+
description: "Count of times the workflow cache was used for a new workflow task"
|
|
355
|
+
.into(),
|
|
356
|
+
unit: "".into(),
|
|
357
|
+
}),
|
|
358
|
+
sticky_cache_miss: meter.counter(MetricParameters {
|
|
359
|
+
name: "sticky_cache_miss".into(),
|
|
360
|
+
description:
|
|
361
|
+
"Count of times the workflow cache was missing a workflow for a sticky task"
|
|
362
|
+
.into(),
|
|
363
|
+
unit: "".into(),
|
|
364
|
+
}),
|
|
365
|
+
sticky_cache_size: meter.gauge(MetricParameters {
|
|
366
|
+
name: STICKY_CACHE_SIZE_NAME.into(),
|
|
367
|
+
description: "Current number of cached workflows".into(),
|
|
368
|
+
unit: "".into(),
|
|
369
|
+
}),
|
|
370
|
+
sticky_cache_evictions: meter.counter(MetricParameters {
|
|
371
|
+
name: "sticky_cache_total_forced_eviction".into(),
|
|
372
|
+
description: "Count of evictions of cached workflows".into(),
|
|
373
|
+
unit: "".into(),
|
|
374
|
+
}),
|
|
339
375
|
}
|
|
340
376
|
}
|
|
341
377
|
}
|
|
@@ -348,35 +384,35 @@ const KEY_POLLER_TYPE: &str = "poller_type";
|
|
|
348
384
|
const KEY_WORKER_TYPE: &str = "worker_type";
|
|
349
385
|
const KEY_EAGER: &str = "eager";
|
|
350
386
|
|
|
351
|
-
pub(crate) fn workflow_poller() ->
|
|
352
|
-
|
|
387
|
+
pub(crate) fn workflow_poller() -> MetricKeyValue {
|
|
388
|
+
MetricKeyValue::new(KEY_POLLER_TYPE, "workflow_task")
|
|
353
389
|
}
|
|
354
|
-
pub(crate) fn workflow_sticky_poller() ->
|
|
355
|
-
|
|
390
|
+
pub(crate) fn workflow_sticky_poller() -> MetricKeyValue {
|
|
391
|
+
MetricKeyValue::new(KEY_POLLER_TYPE, "sticky_workflow_task")
|
|
356
392
|
}
|
|
357
|
-
pub(crate) fn activity_poller() ->
|
|
358
|
-
|
|
393
|
+
pub(crate) fn activity_poller() -> MetricKeyValue {
|
|
394
|
+
MetricKeyValue::new(KEY_POLLER_TYPE, "activity_task")
|
|
359
395
|
}
|
|
360
|
-
pub(crate) fn task_queue(tq: String) ->
|
|
361
|
-
|
|
396
|
+
pub(crate) fn task_queue(tq: String) -> MetricKeyValue {
|
|
397
|
+
MetricKeyValue::new(KEY_TASK_QUEUE, tq)
|
|
362
398
|
}
|
|
363
|
-
pub(crate) fn activity_type(ty: String) ->
|
|
364
|
-
|
|
399
|
+
pub(crate) fn activity_type(ty: String) -> MetricKeyValue {
|
|
400
|
+
MetricKeyValue::new(KEY_ACT_TYPE, ty)
|
|
365
401
|
}
|
|
366
|
-
pub(crate) fn workflow_type(ty: String) ->
|
|
367
|
-
|
|
402
|
+
pub(crate) fn workflow_type(ty: String) -> MetricKeyValue {
|
|
403
|
+
MetricKeyValue::new(KEY_WF_TYPE, ty)
|
|
368
404
|
}
|
|
369
|
-
pub(crate) fn workflow_worker_type() ->
|
|
370
|
-
|
|
405
|
+
pub(crate) fn workflow_worker_type() -> MetricKeyValue {
|
|
406
|
+
MetricKeyValue::new(KEY_WORKER_TYPE, "WorkflowWorker")
|
|
371
407
|
}
|
|
372
|
-
pub(crate) fn activity_worker_type() ->
|
|
373
|
-
|
|
408
|
+
pub(crate) fn activity_worker_type() -> MetricKeyValue {
|
|
409
|
+
MetricKeyValue::new(KEY_WORKER_TYPE, "ActivityWorker")
|
|
374
410
|
}
|
|
375
|
-
pub(crate) fn local_activity_worker_type() ->
|
|
376
|
-
|
|
411
|
+
pub(crate) fn local_activity_worker_type() -> MetricKeyValue {
|
|
412
|
+
MetricKeyValue::new(KEY_WORKER_TYPE, "LocalActivityWorker")
|
|
377
413
|
}
|
|
378
|
-
pub(crate) fn eager(is_eager: bool) ->
|
|
379
|
-
|
|
414
|
+
pub(crate) fn eager(is_eager: bool) -> MetricKeyValue {
|
|
415
|
+
MetricKeyValue::new(KEY_EAGER, is_eager)
|
|
380
416
|
}
|
|
381
417
|
|
|
382
418
|
const WF_E2E_LATENCY_NAME: &str = "workflow_endtoend_latency";
|
|
@@ -427,45 +463,630 @@ static TASK_SCHED_TO_START_MS_BUCKETS: &[f64] =
|
|
|
427
463
|
/// broadly it's trying to represent latencies in millis.
|
|
428
464
|
pub(super) static DEFAULT_MS_BUCKETS: &[f64] = &[50., 100., 500., 1000., 2500., 10_000.];
|
|
429
465
|
|
|
466
|
+
/// Returns the default histogram buckets that lang should use for a given metric name if they
|
|
467
|
+
/// have not been overridden by the user.
|
|
468
|
+
///
|
|
469
|
+
/// The name must *not* be prefixed with `temporal_`
|
|
470
|
+
pub fn default_buckets_for(histo_name: &str) -> &'static [f64] {
|
|
471
|
+
match histo_name {
|
|
472
|
+
WF_E2E_LATENCY_NAME => WF_LATENCY_MS_BUCKETS,
|
|
473
|
+
WF_TASK_EXECUTION_LATENCY_NAME | WF_TASK_REPLAY_LATENCY_NAME => WF_TASK_MS_BUCKETS,
|
|
474
|
+
WF_TASK_SCHED_TO_START_LATENCY_NAME | ACT_SCHED_TO_START_LATENCY_NAME => {
|
|
475
|
+
TASK_SCHED_TO_START_MS_BUCKETS
|
|
476
|
+
}
|
|
477
|
+
ACT_EXEC_LATENCY_NAME => ACT_EXE_MS_BUCKETS,
|
|
478
|
+
_ => DEFAULT_MS_BUCKETS,
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
430
482
|
/// Chooses appropriate aggregators for our metrics
|
|
431
|
-
#[derive(Debug, Clone)]
|
|
483
|
+
#[derive(Debug, Clone, Default)]
|
|
432
484
|
pub struct SDKAggSelector {
|
|
433
|
-
|
|
485
|
+
default: DefaultAggregationSelector,
|
|
434
486
|
}
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
487
|
+
impl AggregationSelector for SDKAggSelector {
|
|
488
|
+
fn aggregation(&self, kind: InstrumentKind) -> Aggregation {
|
|
489
|
+
match kind {
|
|
490
|
+
InstrumentKind::Histogram => Aggregation::ExplicitBucketHistogram {
|
|
491
|
+
boundaries: DEFAULT_MS_BUCKETS.to_vec(),
|
|
492
|
+
record_min_max: true,
|
|
493
|
+
},
|
|
494
|
+
_ => self.default.aggregation(kind),
|
|
441
495
|
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
442
498
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
499
|
+
fn histo_view(
|
|
500
|
+
metric_name: &'static str,
|
|
501
|
+
buckets: &[f64],
|
|
502
|
+
) -> opentelemetry::metrics::Result<Box<dyn View>> {
|
|
503
|
+
new_view(
|
|
504
|
+
Instrument::new().name(format!("*{metric_name}")),
|
|
505
|
+
opentelemetry_sdk::metrics::Stream::new().aggregation(
|
|
506
|
+
Aggregation::ExplicitBucketHistogram {
|
|
507
|
+
boundaries: buckets.to_vec(),
|
|
508
|
+
record_min_max: true,
|
|
509
|
+
},
|
|
510
|
+
),
|
|
511
|
+
)
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
pub(super) fn augment_meter_provider_with_defaults(
|
|
515
|
+
mpb: MeterProviderBuilder,
|
|
516
|
+
global_tags: &HashMap<String, String>,
|
|
517
|
+
) -> opentelemetry::metrics::Result<MeterProviderBuilder> {
|
|
518
|
+
// Some histograms are actually gauges, but we have to use histograms otherwise they forget
|
|
519
|
+
// their value between collections since we don't use callbacks.
|
|
520
|
+
Ok(mpb
|
|
521
|
+
.with_view(histo_view(WF_E2E_LATENCY_NAME, WF_LATENCY_MS_BUCKETS)?)
|
|
522
|
+
.with_view(histo_view(
|
|
523
|
+
WF_TASK_EXECUTION_LATENCY_NAME,
|
|
524
|
+
WF_TASK_MS_BUCKETS,
|
|
525
|
+
)?)
|
|
526
|
+
.with_view(histo_view(WF_TASK_REPLAY_LATENCY_NAME, WF_TASK_MS_BUCKETS)?)
|
|
527
|
+
.with_view(histo_view(
|
|
528
|
+
WF_TASK_SCHED_TO_START_LATENCY_NAME,
|
|
529
|
+
TASK_SCHED_TO_START_MS_BUCKETS,
|
|
530
|
+
)?)
|
|
531
|
+
.with_view(histo_view(
|
|
532
|
+
ACT_SCHED_TO_START_LATENCY_NAME,
|
|
533
|
+
TASK_SCHED_TO_START_MS_BUCKETS,
|
|
534
|
+
)?)
|
|
535
|
+
.with_view(histo_view(ACT_EXEC_LATENCY_NAME, ACT_EXE_MS_BUCKETS)?)
|
|
536
|
+
.with_resource(default_resource(global_tags)))
|
|
537
|
+
}
|
|
455
538
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
539
|
+
/// OTel has no built-in synchronous Gauge. Histograms used to be able to serve that purpose, but
|
|
540
|
+
/// they broke that. Lovely. So, we need to implement one by hand.
|
|
541
|
+
pub(crate) struct MemoryGaugeU64 {
|
|
542
|
+
labels_to_values: Arc<RwLock<HashMap<AttributeSet, u64>>>,
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
impl MemoryGaugeU64 {
|
|
546
|
+
fn new(params: MetricParameters, meter: &Meter) -> Self {
|
|
547
|
+
let gauge = meter
|
|
548
|
+
.u64_observable_gauge(params.name)
|
|
549
|
+
.with_unit(Unit::new(params.unit))
|
|
550
|
+
.with_description(params.description)
|
|
551
|
+
.init();
|
|
552
|
+
let map = Arc::new(RwLock::new(HashMap::<AttributeSet, u64>::new()));
|
|
553
|
+
let map_c = map.clone();
|
|
554
|
+
meter
|
|
555
|
+
.register_callback(&[gauge.as_any()], move |o| {
|
|
556
|
+
// This whole thing is... extra stupid.
|
|
557
|
+
// See https://github.com/open-telemetry/opentelemetry-rust/issues/1181
|
|
558
|
+
// The performance is likely bad here, but, given this is only called when metrics
|
|
559
|
+
// are exported it should be livable for now.
|
|
560
|
+
let map_rlock = map_c.read();
|
|
561
|
+
for (kvs, val) in map_rlock.iter() {
|
|
562
|
+
let kvs: Vec<_> = kvs
|
|
563
|
+
.iter()
|
|
564
|
+
.map(|(k, v)| KeyValue::new(k.clone(), v.clone()))
|
|
565
|
+
.collect();
|
|
566
|
+
o.observe_u64(&gauge, *val, kvs.as_slice())
|
|
462
567
|
}
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
568
|
+
})
|
|
569
|
+
.expect("instrument must exist we just created it");
|
|
570
|
+
MemoryGaugeU64 {
|
|
571
|
+
labels_to_values: map,
|
|
467
572
|
}
|
|
573
|
+
}
|
|
574
|
+
fn record(&self, val: u64, kvs: &[KeyValue]) {
|
|
575
|
+
self.labels_to_values
|
|
576
|
+
.write()
|
|
577
|
+
.insert(AttributeSet::from(kvs), val);
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
/// Create an OTel meter that can be used as a [CoreMeter] to export metrics over OTLP.
|
|
582
|
+
pub fn build_otlp_metric_exporter(
|
|
583
|
+
opts: OtelCollectorOptions,
|
|
584
|
+
) -> Result<CoreOtelMeter, anyhow::Error> {
|
|
585
|
+
let exporter = opentelemetry_otlp::TonicExporterBuilder::default()
|
|
586
|
+
.with_endpoint(opts.url.to_string())
|
|
587
|
+
.with_metadata(MetadataMap::from_headers((&opts.headers).try_into()?))
|
|
588
|
+
.build_metrics_exporter(
|
|
589
|
+
Box::<SDKAggSelector>::default(),
|
|
590
|
+
Box::new(metric_temporality_to_selector(opts.metric_temporality)),
|
|
591
|
+
)?;
|
|
592
|
+
let reader = PeriodicReader::builder(exporter, runtime::Tokio)
|
|
593
|
+
.with_interval(opts.metric_periodicity)
|
|
594
|
+
.build();
|
|
595
|
+
let mp = augment_meter_provider_with_defaults(
|
|
596
|
+
MeterProvider::builder().with_reader(reader),
|
|
597
|
+
&opts.global_tags,
|
|
598
|
+
)?
|
|
599
|
+
.build();
|
|
600
|
+
Ok::<_, anyhow::Error>(CoreOtelMeter(mp.meter(TELEM_SERVICE_NAME)))
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
pub struct StartedPromServer {
|
|
604
|
+
pub meter: Arc<CoreOtelMeter>,
|
|
605
|
+
pub bound_addr: SocketAddr,
|
|
606
|
+
pub abort_handle: AbortHandle,
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/// Builds and runs a prometheus endpoint which can be scraped by prom instances for metrics export.
|
|
610
|
+
/// Returns the meter that can be used as a [CoreMeter].
|
|
611
|
+
pub fn start_prometheus_metric_exporter(
|
|
612
|
+
opts: PrometheusExporterOptions,
|
|
613
|
+
) -> Result<StartedPromServer, anyhow::Error> {
|
|
614
|
+
let (srv, exporter) = PromServer::new(&opts, SDKAggSelector::default())?;
|
|
615
|
+
let meter_provider = augment_meter_provider_with_defaults(
|
|
616
|
+
MeterProvider::builder().with_reader(exporter),
|
|
617
|
+
&opts.global_tags,
|
|
618
|
+
)?
|
|
619
|
+
.build();
|
|
620
|
+
let bound_addr = srv.bound_addr();
|
|
621
|
+
let handle = tokio::spawn(async move { srv.run().await });
|
|
622
|
+
Ok(StartedPromServer {
|
|
623
|
+
meter: Arc::new(CoreOtelMeter(meter_provider.meter(TELEM_SERVICE_NAME))),
|
|
624
|
+
bound_addr,
|
|
625
|
+
abort_handle: handle.abort_handle(),
|
|
626
|
+
})
|
|
627
|
+
}
|
|
468
628
|
|
|
469
|
-
|
|
629
|
+
/// Buffers [MetricEvent]s for periodic consumption by lang
|
|
630
|
+
#[derive(Debug)]
|
|
631
|
+
pub struct MetricsCallBuffer<I>
|
|
632
|
+
where
|
|
633
|
+
I: BufferInstrumentRef,
|
|
634
|
+
{
|
|
635
|
+
calls_rx: crossbeam::channel::Receiver<MetricEvent<I>>,
|
|
636
|
+
calls_tx: LogErrOnFullSender<MetricEvent<I>>,
|
|
637
|
+
}
|
|
638
|
+
#[derive(Clone, Debug)]
|
|
639
|
+
struct LogErrOnFullSender<I>(crossbeam::channel::Sender<I>);
|
|
640
|
+
impl<I> LogErrOnFullSender<I> {
|
|
641
|
+
fn send(&self, v: I) {
|
|
642
|
+
if let Err(crossbeam::channel::TrySendError::Full(_)) = self.0.try_send(v) {
|
|
643
|
+
error!(
|
|
644
|
+
"Core's metrics buffer is full! Dropping call to record metrics. \
|
|
645
|
+
Make sure you drain the metric buffer often!"
|
|
646
|
+
);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
impl<I> MetricsCallBuffer<I>
|
|
652
|
+
where
|
|
653
|
+
I: Clone + BufferInstrumentRef,
|
|
654
|
+
{
|
|
655
|
+
/// Create a new buffer with the given capacity
|
|
656
|
+
pub fn new(buffer_size: usize) -> Self {
|
|
657
|
+
let (calls_tx, calls_rx) = crossbeam::channel::bounded(buffer_size);
|
|
658
|
+
MetricsCallBuffer {
|
|
659
|
+
calls_rx,
|
|
660
|
+
calls_tx: LogErrOnFullSender(calls_tx),
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
fn new_instrument(&self, params: MetricParameters, kind: MetricKind) -> BufferInstrument<I> {
|
|
664
|
+
let hole = LazyBufferInstrument::hole();
|
|
665
|
+
self.calls_tx.send(MetricEvent::Create {
|
|
666
|
+
params,
|
|
667
|
+
kind,
|
|
668
|
+
populate_into: hole.clone(),
|
|
669
|
+
});
|
|
670
|
+
BufferInstrument {
|
|
671
|
+
kind,
|
|
672
|
+
instrument_ref: hole,
|
|
673
|
+
tx: self.calls_tx.clone(),
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
impl<I> CoreMeter for MetricsCallBuffer<I>
|
|
679
|
+
where
|
|
680
|
+
I: BufferInstrumentRef + Debug + Send + Sync + Clone + 'static,
|
|
681
|
+
{
|
|
682
|
+
fn new_attributes(&self, opts: NewAttributes) -> MetricAttributes {
|
|
683
|
+
let ba = BufferAttributes::hole();
|
|
684
|
+
self.calls_tx.send(MetricEvent::CreateAttributes {
|
|
685
|
+
populate_into: ba.clone(),
|
|
686
|
+
append_from: None,
|
|
687
|
+
attributes: opts.attributes,
|
|
688
|
+
});
|
|
689
|
+
MetricAttributes::Buffer(ba)
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
fn extend_attributes(
|
|
693
|
+
&self,
|
|
694
|
+
existing: MetricAttributes,
|
|
695
|
+
attribs: NewAttributes,
|
|
696
|
+
) -> MetricAttributes {
|
|
697
|
+
if let MetricAttributes::Buffer(ol) = existing {
|
|
698
|
+
let ba = BufferAttributes::hole();
|
|
699
|
+
self.calls_tx.send(MetricEvent::CreateAttributes {
|
|
700
|
+
populate_into: ba.clone(),
|
|
701
|
+
append_from: Some(ol),
|
|
702
|
+
attributes: attribs.attributes,
|
|
703
|
+
});
|
|
704
|
+
MetricAttributes::Buffer(ba)
|
|
705
|
+
} else {
|
|
706
|
+
dbg_panic!("Must use buffer attributes with a buffer metric implementation");
|
|
707
|
+
existing
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
fn counter(&self, params: MetricParameters) -> Arc<dyn Counter> {
|
|
712
|
+
Arc::new(self.new_instrument(params, MetricKind::Counter))
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
fn histogram(&self, params: MetricParameters) -> Arc<dyn Histogram> {
|
|
716
|
+
Arc::new(self.new_instrument(params, MetricKind::Histogram))
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
fn gauge(&self, params: MetricParameters) -> Arc<dyn Gauge> {
|
|
720
|
+
Arc::new(self.new_instrument(params, MetricKind::Gauge))
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
impl<I> MetricCallBufferer<I> for MetricsCallBuffer<I>
|
|
724
|
+
where
|
|
725
|
+
I: Send + Sync + BufferInstrumentRef,
|
|
726
|
+
{
|
|
727
|
+
fn retrieve(&self) -> Vec<MetricEvent<I>> {
|
|
728
|
+
self.calls_rx.try_iter().collect()
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
struct BufferInstrument<I: BufferInstrumentRef> {
|
|
733
|
+
kind: MetricKind,
|
|
734
|
+
instrument_ref: LazyBufferInstrument<I>,
|
|
735
|
+
tx: LogErrOnFullSender<MetricEvent<I>>,
|
|
736
|
+
}
|
|
737
|
+
impl<I> BufferInstrument<I>
|
|
738
|
+
where
|
|
739
|
+
I: Clone + BufferInstrumentRef,
|
|
740
|
+
{
|
|
741
|
+
fn send(&self, value: u64, attributes: &MetricAttributes) {
|
|
742
|
+
let attributes = match attributes {
|
|
743
|
+
MetricAttributes::Buffer(l) => l.clone(),
|
|
744
|
+
_ => panic!("MetricsCallBuffer only works with MetricAttributes::Lang"),
|
|
745
|
+
};
|
|
746
|
+
self.tx.send(MetricEvent::Update {
|
|
747
|
+
instrument: self.instrument_ref.clone(),
|
|
748
|
+
update: match self.kind {
|
|
749
|
+
MetricKind::Counter => MetricUpdateVal::Delta(value),
|
|
750
|
+
MetricKind::Gauge | MetricKind::Histogram => MetricUpdateVal::Value(value),
|
|
751
|
+
},
|
|
752
|
+
attributes: attributes.clone(),
|
|
753
|
+
});
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
impl<I> Counter for BufferInstrument<I>
|
|
757
|
+
where
|
|
758
|
+
I: BufferInstrumentRef + Send + Sync + Clone,
|
|
759
|
+
{
|
|
760
|
+
fn add(&self, value: u64, attributes: &MetricAttributes) {
|
|
761
|
+
self.send(value, attributes)
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
impl<I> Gauge for BufferInstrument<I>
|
|
765
|
+
where
|
|
766
|
+
I: BufferInstrumentRef + Send + Sync + Clone,
|
|
767
|
+
{
|
|
768
|
+
fn record(&self, value: u64, attributes: &MetricAttributes) {
|
|
769
|
+
self.send(value, attributes)
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
impl<I> Histogram for BufferInstrument<I>
|
|
773
|
+
where
|
|
774
|
+
I: BufferInstrumentRef + Send + Sync + Clone,
|
|
775
|
+
{
|
|
776
|
+
fn record(&self, value: u64, attributes: &MetricAttributes) {
|
|
777
|
+
self.send(value, attributes)
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
#[derive(Debug)]
|
|
782
|
+
pub struct CoreOtelMeter(Meter);
|
|
783
|
+
impl CoreMeter for CoreOtelMeter {
|
|
784
|
+
fn new_attributes(&self, attribs: NewAttributes) -> MetricAttributes {
|
|
785
|
+
MetricAttributes::OTel {
|
|
786
|
+
kvs: Arc::new(attribs.attributes.into_iter().map(KeyValue::from).collect()),
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
fn extend_attributes(
|
|
791
|
+
&self,
|
|
792
|
+
existing: MetricAttributes,
|
|
793
|
+
attribs: NewAttributes,
|
|
794
|
+
) -> MetricAttributes {
|
|
795
|
+
if let MetricAttributes::OTel { mut kvs } = existing {
|
|
796
|
+
Arc::make_mut(&mut kvs).extend(attribs.attributes.into_iter().map(Into::into));
|
|
797
|
+
MetricAttributes::OTel { kvs }
|
|
798
|
+
} else {
|
|
799
|
+
dbg_panic!("Must use OTel attributes with an OTel metric implementation");
|
|
800
|
+
existing
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
fn counter(&self, params: MetricParameters) -> Arc<dyn Counter> {
|
|
805
|
+
Arc::new(
|
|
806
|
+
self.0
|
|
807
|
+
.u64_counter(params.name)
|
|
808
|
+
.with_unit(Unit::new(params.unit))
|
|
809
|
+
.with_description(params.description)
|
|
810
|
+
.init(),
|
|
811
|
+
)
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
fn histogram(&self, params: MetricParameters) -> Arc<dyn Histogram> {
|
|
815
|
+
Arc::new(
|
|
816
|
+
self.0
|
|
817
|
+
.u64_histogram(params.name)
|
|
818
|
+
.with_unit(Unit::new(params.unit))
|
|
819
|
+
.with_description(params.description)
|
|
820
|
+
.init(),
|
|
821
|
+
)
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
fn gauge(&self, params: MetricParameters) -> Arc<dyn Gauge> {
|
|
825
|
+
Arc::new(MemoryGaugeU64::new(params, &self.0))
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
impl Gauge for MemoryGaugeU64 {
|
|
830
|
+
fn record(&self, value: u64, attributes: &MetricAttributes) {
|
|
831
|
+
if let MetricAttributes::OTel { kvs } = attributes {
|
|
832
|
+
self.record(value, kvs);
|
|
833
|
+
} else {
|
|
834
|
+
dbg_panic!("Must use OTel attributes with an OTel metric implementation");
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
#[derive(Debug, derive_more::Constructor)]
|
|
840
|
+
pub(crate) struct PrefixedMetricsMeter<CM> {
|
|
841
|
+
prefix: String,
|
|
842
|
+
meter: CM,
|
|
843
|
+
}
|
|
844
|
+
impl<CM: CoreMeter> CoreMeter for PrefixedMetricsMeter<CM> {
|
|
845
|
+
fn new_attributes(&self, attribs: NewAttributes) -> MetricAttributes {
|
|
846
|
+
self.meter.new_attributes(attribs)
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
fn extend_attributes(
|
|
850
|
+
&self,
|
|
851
|
+
existing: MetricAttributes,
|
|
852
|
+
attribs: NewAttributes,
|
|
853
|
+
) -> MetricAttributes {
|
|
854
|
+
self.meter.extend_attributes(existing, attribs)
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
fn counter(&self, mut params: MetricParameters) -> Arc<dyn Counter> {
|
|
858
|
+
params.name = (self.prefix.clone() + &*params.name).into();
|
|
859
|
+
self.meter.counter(params)
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
fn histogram(&self, mut params: MetricParameters) -> Arc<dyn Histogram> {
|
|
863
|
+
params.name = (self.prefix.clone() + &*params.name).into();
|
|
864
|
+
self.meter.histogram(params)
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
fn gauge(&self, mut params: MetricParameters) -> Arc<dyn Gauge> {
|
|
868
|
+
params.name = (self.prefix.clone() + &*params.name).into();
|
|
869
|
+
self.meter.gauge(params)
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
#[cfg(test)]
|
|
874
|
+
mod tests {
|
|
875
|
+
use super::*;
|
|
876
|
+
use std::any::Any;
|
|
877
|
+
use temporal_sdk_core_api::telemetry::{
|
|
878
|
+
metrics::{BufferInstrumentRef, CustomMetricAttributes},
|
|
879
|
+
METRIC_PREFIX,
|
|
880
|
+
};
|
|
881
|
+
use tracing::subscriber::NoSubscriber;
|
|
882
|
+
|
|
883
|
+
#[derive(Debug)]
|
|
884
|
+
struct DummyCustomAttrs(usize);
|
|
885
|
+
impl CustomMetricAttributes for DummyCustomAttrs {
|
|
886
|
+
fn as_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
|
|
887
|
+
self as Arc<dyn Any + Send + Sync>
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
impl DummyCustomAttrs {
|
|
891
|
+
fn as_id(ba: &BufferAttributes) -> usize {
|
|
892
|
+
let as_dum = ba
|
|
893
|
+
.get()
|
|
894
|
+
.clone()
|
|
895
|
+
.as_any()
|
|
896
|
+
.downcast::<DummyCustomAttrs>()
|
|
897
|
+
.unwrap();
|
|
898
|
+
as_dum.0
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
#[derive(Debug, Clone)]
|
|
903
|
+
struct DummyInstrumentRef(usize);
|
|
904
|
+
impl BufferInstrumentRef for DummyInstrumentRef {}
|
|
905
|
+
|
|
906
|
+
#[test]
|
|
907
|
+
fn test_buffered_core_context() {
|
|
908
|
+
let no_op_subscriber = Arc::new(NoSubscriber::new());
|
|
909
|
+
let call_buffer = Arc::new(MetricsCallBuffer::new(100));
|
|
910
|
+
let telem_instance = TelemetryInstance::new(
|
|
911
|
+
Some(no_op_subscriber),
|
|
912
|
+
None,
|
|
913
|
+
METRIC_PREFIX.to_string(),
|
|
914
|
+
Some(call_buffer.clone()),
|
|
915
|
+
true,
|
|
916
|
+
);
|
|
917
|
+
let mc = MetricsContext::top_level("foo".to_string(), "q".to_string(), &telem_instance);
|
|
918
|
+
mc.cache_eviction();
|
|
919
|
+
let events = call_buffer.retrieve();
|
|
920
|
+
let a1 = assert_matches!(
|
|
921
|
+
&events[0],
|
|
922
|
+
MetricEvent::CreateAttributes {
|
|
923
|
+
populate_into,
|
|
924
|
+
append_from: None,
|
|
925
|
+
attributes,
|
|
926
|
+
}
|
|
927
|
+
if attributes[0].key == "service_name" &&
|
|
928
|
+
attributes[1].key == "namespace" &&
|
|
929
|
+
attributes[2].key == "task_queue"
|
|
930
|
+
=> populate_into
|
|
931
|
+
);
|
|
932
|
+
a1.set(Arc::new(DummyCustomAttrs(1))).unwrap();
|
|
933
|
+
// Verify all metrics are created. This number will need to get updated any time a metric
|
|
934
|
+
// is added.
|
|
935
|
+
let num_metrics = 23;
|
|
936
|
+
#[allow(clippy::needless_range_loop)] // Sorry clippy, this reads easier.
|
|
937
|
+
for metric_num in 1..=num_metrics {
|
|
938
|
+
let hole = assert_matches!(&events[metric_num],
|
|
939
|
+
MetricEvent::Create { populate_into, .. }
|
|
940
|
+
=> populate_into
|
|
941
|
+
);
|
|
942
|
+
hole.set(Arc::new(DummyInstrumentRef(metric_num))).unwrap();
|
|
943
|
+
}
|
|
944
|
+
assert_matches!(
|
|
945
|
+
&events[num_metrics + 1], // +1 for attrib creation (at start), then this update
|
|
946
|
+
MetricEvent::Update {
|
|
947
|
+
instrument,
|
|
948
|
+
attributes,
|
|
949
|
+
update: MetricUpdateVal::Delta(1)
|
|
950
|
+
}
|
|
951
|
+
if DummyCustomAttrs::as_id(attributes) == 1 && instrument.get().0 == num_metrics
|
|
952
|
+
);
|
|
953
|
+
// Verify creating a new context with new attributes merges them properly
|
|
954
|
+
let mc2 = mc.with_new_attrs([MetricKeyValue::new("gotta", "go fast")]);
|
|
955
|
+
mc2.wf_task_latency(Duration::from_secs(1));
|
|
956
|
+
let events = call_buffer.retrieve();
|
|
957
|
+
let a2 = assert_matches!(
|
|
958
|
+
&events[0],
|
|
959
|
+
MetricEvent::CreateAttributes {
|
|
960
|
+
populate_into,
|
|
961
|
+
append_from: Some(eh),
|
|
962
|
+
attributes
|
|
963
|
+
}
|
|
964
|
+
if attributes[0].key == "gotta" && DummyCustomAttrs::as_id(eh) == 1
|
|
965
|
+
=> populate_into
|
|
966
|
+
);
|
|
967
|
+
a2.set(Arc::new(DummyCustomAttrs(2))).unwrap();
|
|
968
|
+
assert_matches!(
|
|
969
|
+
&events[1],
|
|
970
|
+
MetricEvent::Update {
|
|
971
|
+
instrument,
|
|
972
|
+
attributes,
|
|
973
|
+
update: MetricUpdateVal::Value(1000) // milliseconds
|
|
974
|
+
}
|
|
975
|
+
if DummyCustomAttrs::as_id(attributes) == 2 && instrument.get().0 == 11
|
|
976
|
+
);
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
#[test]
|
|
980
|
+
fn metric_buffer() {
|
|
981
|
+
let call_buffer = MetricsCallBuffer::new(10);
|
|
982
|
+
let ctr = call_buffer.counter(MetricParameters {
|
|
983
|
+
name: "ctr".into(),
|
|
984
|
+
description: "a counter".into(),
|
|
985
|
+
unit: "grognaks".into(),
|
|
986
|
+
});
|
|
987
|
+
let histo = call_buffer.histogram(MetricParameters {
|
|
988
|
+
name: "histo".into(),
|
|
989
|
+
description: "a histogram".into(),
|
|
990
|
+
unit: "flubarbs".into(),
|
|
991
|
+
});
|
|
992
|
+
let gauge = call_buffer.gauge(MetricParameters {
|
|
993
|
+
name: "gauge".into(),
|
|
994
|
+
description: "a counter".into(),
|
|
995
|
+
unit: "bleezles".into(),
|
|
996
|
+
});
|
|
997
|
+
let attrs_1 = call_buffer.new_attributes(NewAttributes {
|
|
998
|
+
attributes: vec![MetricKeyValue::new("hi", "yo")],
|
|
999
|
+
});
|
|
1000
|
+
let attrs_2 = call_buffer.new_attributes(NewAttributes {
|
|
1001
|
+
attributes: vec![MetricKeyValue::new("run", "fast")],
|
|
1002
|
+
});
|
|
1003
|
+
ctr.add(1, &attrs_1);
|
|
1004
|
+
histo.record(2, &attrs_1);
|
|
1005
|
+
gauge.record(3, &attrs_2);
|
|
1006
|
+
|
|
1007
|
+
let mut calls = call_buffer.retrieve();
|
|
1008
|
+
calls.reverse();
|
|
1009
|
+
let ctr_1 = assert_matches!(
|
|
1010
|
+
calls.pop(),
|
|
1011
|
+
Some(MetricEvent::Create {
|
|
1012
|
+
params,
|
|
1013
|
+
populate_into,
|
|
1014
|
+
kind: MetricKind::Counter
|
|
1015
|
+
})
|
|
1016
|
+
if params.name == "ctr"
|
|
1017
|
+
=> populate_into
|
|
1018
|
+
);
|
|
1019
|
+
ctr_1.set(Arc::new(DummyInstrumentRef(1))).unwrap();
|
|
1020
|
+
let hist_2 = assert_matches!(
|
|
1021
|
+
calls.pop(),
|
|
1022
|
+
Some(MetricEvent::Create {
|
|
1023
|
+
params,
|
|
1024
|
+
populate_into,
|
|
1025
|
+
kind: MetricKind::Histogram
|
|
1026
|
+
})
|
|
1027
|
+
if params.name == "histo"
|
|
1028
|
+
=> populate_into
|
|
1029
|
+
);
|
|
1030
|
+
hist_2.set(Arc::new(DummyInstrumentRef(2))).unwrap();
|
|
1031
|
+
let gauge_3 = assert_matches!(
|
|
1032
|
+
calls.pop(),
|
|
1033
|
+
Some(MetricEvent::Create {
|
|
1034
|
+
params,
|
|
1035
|
+
populate_into,
|
|
1036
|
+
kind: MetricKind::Gauge
|
|
1037
|
+
})
|
|
1038
|
+
if params.name == "gauge"
|
|
1039
|
+
=> populate_into
|
|
1040
|
+
);
|
|
1041
|
+
gauge_3.set(Arc::new(DummyInstrumentRef(3))).unwrap();
|
|
1042
|
+
let a1 = assert_matches!(
|
|
1043
|
+
calls.pop(),
|
|
1044
|
+
Some(MetricEvent::CreateAttributes {
|
|
1045
|
+
populate_into,
|
|
1046
|
+
append_from: None,
|
|
1047
|
+
attributes
|
|
1048
|
+
})
|
|
1049
|
+
if attributes[0].key == "hi"
|
|
1050
|
+
=> populate_into
|
|
1051
|
+
);
|
|
1052
|
+
a1.set(Arc::new(DummyCustomAttrs(1))).unwrap();
|
|
1053
|
+
let a2 = assert_matches!(
|
|
1054
|
+
calls.pop(),
|
|
1055
|
+
Some(MetricEvent::CreateAttributes {
|
|
1056
|
+
populate_into,
|
|
1057
|
+
append_from: None,
|
|
1058
|
+
attributes
|
|
1059
|
+
})
|
|
1060
|
+
if attributes[0].key == "run"
|
|
1061
|
+
=> populate_into
|
|
1062
|
+
);
|
|
1063
|
+
a2.set(Arc::new(DummyCustomAttrs(2))).unwrap();
|
|
1064
|
+
assert_matches!(
|
|
1065
|
+
calls.pop(),
|
|
1066
|
+
Some(MetricEvent::Update{
|
|
1067
|
+
instrument,
|
|
1068
|
+
attributes,
|
|
1069
|
+
update: MetricUpdateVal::Delta(1)
|
|
1070
|
+
})
|
|
1071
|
+
if DummyCustomAttrs::as_id(&attributes) == 1 && instrument.get().0 == 1
|
|
1072
|
+
);
|
|
1073
|
+
assert_matches!(
|
|
1074
|
+
calls.pop(),
|
|
1075
|
+
Some(MetricEvent::Update{
|
|
1076
|
+
instrument,
|
|
1077
|
+
attributes,
|
|
1078
|
+
update: MetricUpdateVal::Value(2)
|
|
1079
|
+
})
|
|
1080
|
+
if DummyCustomAttrs::as_id(&attributes) == 1 && instrument.get().0 == 2
|
|
1081
|
+
);
|
|
1082
|
+
assert_matches!(
|
|
1083
|
+
calls.pop(),
|
|
1084
|
+
Some(MetricEvent::Update{
|
|
1085
|
+
instrument,
|
|
1086
|
+
attributes,
|
|
1087
|
+
update: MetricUpdateVal::Value(3)
|
|
1088
|
+
})
|
|
1089
|
+
if DummyCustomAttrs::as_id(&attributes) == 2&& instrument.get().0 == 3
|
|
1090
|
+
);
|
|
470
1091
|
}
|
|
471
1092
|
}
|