@temporalio/core-bridge 1.12.0 → 1.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +64 -119
- package/Cargo.toml +1 -1
- package/index.js +3 -2
- package/package.json +3 -3
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.cargo/config.toml +1 -2
- package/sdk-core/.github/workflows/per-pr.yml +2 -0
- package/sdk-core/AGENTS.md +7 -0
- package/sdk-core/Cargo.toml +9 -5
- package/sdk-core/README.md +6 -5
- package/sdk-core/client/Cargo.toml +3 -2
- package/sdk-core/client/src/lib.rs +17 -8
- package/sdk-core/client/src/metrics.rs +57 -23
- package/sdk-core/client/src/raw.rs +33 -15
- package/sdk-core/core/Cargo.toml +11 -9
- package/sdk-core/core/benches/workflow_replay.rs +114 -15
- package/sdk-core/core/src/core_tests/activity_tasks.rs +18 -18
- package/sdk-core/core/src/core_tests/child_workflows.rs +4 -4
- package/sdk-core/core/src/core_tests/determinism.rs +6 -6
- package/sdk-core/core/src/core_tests/local_activities.rs +20 -20
- package/sdk-core/core/src/core_tests/mod.rs +40 -5
- package/sdk-core/core/src/core_tests/queries.rs +25 -16
- package/sdk-core/core/src/core_tests/replay_flag.rs +3 -3
- package/sdk-core/core/src/core_tests/updates.rs +3 -3
- package/sdk-core/core/src/core_tests/workers.rs +9 -7
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +40 -42
- package/sdk-core/core/src/ephemeral_server/mod.rs +1 -19
- package/sdk-core/core/src/lib.rs +10 -1
- package/sdk-core/core/src/pollers/poll_buffer.rs +2 -2
- package/sdk-core/core/src/replay/mod.rs +3 -3
- package/sdk-core/core/src/telemetry/metrics.rs +306 -152
- package/sdk-core/core/src/telemetry/mod.rs +11 -4
- package/sdk-core/core/src/telemetry/otel.rs +134 -131
- package/sdk-core/core/src/telemetry/prometheus_meter.rs +885 -0
- package/sdk-core/core/src/telemetry/prometheus_server.rs +48 -28
- package/sdk-core/core/src/test_help/mod.rs +27 -12
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +7 -7
- package/sdk-core/core/src/worker/activities.rs +4 -4
- package/sdk-core/core/src/worker/client/mocks.rs +10 -3
- package/sdk-core/core/src/worker/client.rs +68 -5
- package/sdk-core/core/src/worker/heartbeat.rs +229 -0
- package/sdk-core/core/src/worker/mod.rs +35 -14
- package/sdk-core/core/src/worker/tuner/resource_based.rs +4 -4
- package/sdk-core/core/src/worker/workflow/history_update.rs +71 -19
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +1 -1
- package/sdk-core/core/src/worker/workflow/machines/nexus_operation_state_machine.rs +31 -48
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +3 -3
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +4 -1
- package/sdk-core/core/src/worker/workflow/managed_run.rs +1 -1
- package/sdk-core/core/src/worker/workflow/mod.rs +15 -15
- package/sdk-core/core-api/Cargo.toml +2 -2
- package/sdk-core/core-api/src/envconfig.rs +204 -99
- package/sdk-core/core-api/src/lib.rs +9 -0
- package/sdk-core/core-api/src/telemetry/metrics.rs +548 -100
- package/sdk-core/core-api/src/worker.rs +11 -5
- package/sdk-core/core-c-bridge/Cargo.toml +49 -0
- package/sdk-core/core-c-bridge/build.rs +26 -0
- package/sdk-core/core-c-bridge/include/temporal-sdk-core-c-bridge.h +817 -0
- package/sdk-core/core-c-bridge/src/client.rs +679 -0
- package/sdk-core/core-c-bridge/src/lib.rs +245 -0
- package/sdk-core/core-c-bridge/src/metric.rs +682 -0
- package/sdk-core/core-c-bridge/src/random.rs +61 -0
- package/sdk-core/core-c-bridge/src/runtime.rs +445 -0
- package/sdk-core/core-c-bridge/src/testing.rs +282 -0
- package/sdk-core/core-c-bridge/src/tests/context.rs +644 -0
- package/sdk-core/core-c-bridge/src/tests/mod.rs +178 -0
- package/sdk-core/core-c-bridge/src/tests/utils.rs +108 -0
- package/sdk-core/core-c-bridge/src/worker.rs +1069 -0
- package/sdk-core/etc/deps.svg +64 -64
- package/sdk-core/sdk/src/activity_context.rs +6 -4
- package/sdk-core/sdk/src/lib.rs +49 -27
- package/sdk-core/sdk/src/workflow_future.rs +18 -25
- package/sdk-core/sdk-core-protos/protos/api_upstream/README.md +4 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/buf.yaml +0 -2
- package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv2.json +630 -83
- package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv3.yaml +632 -78
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/batch/v1/message.proto +4 -4
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/command/v1/message.proto +6 -4
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/common/v1/message.proto +2 -2
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/deployment/v1/message.proto +32 -2
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/common.proto +10 -1
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/deployment.proto +26 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +2 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/reset.proto +4 -4
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/history/v1/message.proto +47 -31
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/nexus/v1/message.proto +4 -4
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/schedule/v1/message.proto +7 -1
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/worker/v1/message.proto +134 -0
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflow/v1/message.proto +14 -11
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +148 -37
- package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +21 -0
- package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +4 -4
- package/sdk-core/sdk-core-protos/src/history_builder.rs +9 -5
- package/sdk-core/sdk-core-protos/src/lib.rs +96 -6
- package/sdk-core/test-utils/src/lib.rs +11 -3
- package/sdk-core/tests/cloud_tests.rs +3 -3
- package/sdk-core/tests/heavy_tests.rs +11 -3
- package/sdk-core/tests/integ_tests/client_tests.rs +12 -13
- package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +1 -1
- package/sdk-core/tests/integ_tests/metrics_tests.rs +188 -83
- package/sdk-core/tests/integ_tests/polling_tests.rs +1 -1
- package/sdk-core/tests/integ_tests/queries_tests.rs +56 -40
- package/sdk-core/tests/integ_tests/update_tests.rs +2 -7
- package/sdk-core/tests/integ_tests/worker_tests.rs +3 -4
- package/sdk-core/tests/integ_tests/worker_versioning_tests.rs +3 -7
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +3 -5
- package/sdk-core/tests/integ_tests/workflow_tests/nexus.rs +24 -17
- package/src/client.rs +6 -0
- package/src/metrics.rs +6 -6
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
use crate::integ_tests::mk_nexus_endpoint;
|
|
2
2
|
use anyhow::anyhow;
|
|
3
3
|
use assert_matches::assert_matches;
|
|
4
|
-
use std::{
|
|
4
|
+
use std::{
|
|
5
|
+
collections::HashMap,
|
|
6
|
+
env,
|
|
7
|
+
string::ToString,
|
|
8
|
+
sync::{Arc, OnceLock},
|
|
9
|
+
time::Duration,
|
|
10
|
+
};
|
|
5
11
|
use temporal_client::{
|
|
6
12
|
REQUEST_LATENCY_HISTOGRAM_NAME, WorkflowClientTrait, WorkflowOptions, WorkflowService,
|
|
7
13
|
};
|
|
@@ -19,7 +25,10 @@ use temporal_sdk_core_api::{
|
|
|
19
25
|
telemetry::{
|
|
20
26
|
HistogramBucketOverrides, OtelCollectorOptionsBuilder, OtlpProtocol,
|
|
21
27
|
PrometheusExporterOptionsBuilder, TelemetryOptionsBuilder,
|
|
22
|
-
metrics::{
|
|
28
|
+
metrics::{
|
|
29
|
+
CoreMeter, CounterBase, Gauge, GaugeBase, HistogramBase, MetricKeyValue,
|
|
30
|
+
MetricParameters, MetricParametersBuilder, NewAttributes,
|
|
31
|
+
},
|
|
23
32
|
},
|
|
24
33
|
worker::{
|
|
25
34
|
PollerBehavior, SlotKind, SlotMarkUsedContext, SlotReleaseContext, SlotReservationContext,
|
|
@@ -72,6 +81,7 @@ async fn prometheus_metrics_exported(
|
|
|
72
81
|
) {
|
|
73
82
|
let mut opts_builder = PrometheusExporterOptionsBuilder::default();
|
|
74
83
|
opts_builder
|
|
84
|
+
.global_tags(HashMap::from([("global".to_string(), "hi!".to_string())]))
|
|
75
85
|
.socket_addr(ANY_PORT.parse().unwrap())
|
|
76
86
|
.use_seconds_for_durations(use_seconds_latency);
|
|
77
87
|
if custom_buckets {
|
|
@@ -99,25 +109,25 @@ async fn prometheus_metrics_exported(
|
|
|
99
109
|
|
|
100
110
|
let body = get_text(format!("http://{addr}/metrics")).await;
|
|
101
111
|
assert!(body.contains(
|
|
102
|
-
"temporal_request_latency_count{operation=\"ListNamespaces\",service_name=\"temporal-core-sdk\"} 1"
|
|
112
|
+
"temporal_request_latency_count{operation=\"ListNamespaces\",service_name=\"temporal-core-sdk\",global=\"hi!\"} 1"
|
|
103
113
|
));
|
|
104
114
|
assert!(body.contains(
|
|
105
|
-
"temporal_request_latency_count{operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\"} 1"
|
|
115
|
+
"temporal_request_latency_count{operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\",global=\"hi!\"} 1"
|
|
106
116
|
));
|
|
107
117
|
if custom_buckets {
|
|
108
118
|
assert!(body.contains(
|
|
109
119
|
"temporal_request_latency_bucket{\
|
|
110
|
-
operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\",le=\"1337\"}"
|
|
120
|
+
operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\",global=\"hi!\",le=\"1337\"}"
|
|
111
121
|
));
|
|
112
122
|
} else if use_seconds_latency {
|
|
113
123
|
assert!(body.contains(
|
|
114
124
|
"temporal_request_latency_bucket{\
|
|
115
|
-
operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\",le=\"0.05\"}"
|
|
125
|
+
operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\",global=\"hi!\",le=\"0.05\"}"
|
|
116
126
|
));
|
|
117
127
|
} else {
|
|
118
128
|
assert!(body.contains(
|
|
119
129
|
"temporal_request_latency_bucket{\
|
|
120
|
-
operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\",le=\"50\"}"
|
|
130
|
+
operation=\"GetSystemInfo\",service_name=\"temporal-core-sdk\",global=\"hi!\",le=\"50\"}"
|
|
121
131
|
));
|
|
122
132
|
}
|
|
123
133
|
// Verify counter names are appropriate (don't end w/ '_total')
|
|
@@ -125,15 +135,10 @@ async fn prometheus_metrics_exported(
|
|
|
125
135
|
// Verify non-temporal metrics meter does not prefix
|
|
126
136
|
let mm = rt.telemetry().get_metric_meter().unwrap();
|
|
127
137
|
let g = mm.inner.gauge(MetricParameters::from("mygauge"));
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
&MetricAttributes::OTel {
|
|
131
|
-
kvs: Arc::new(vec![]),
|
|
132
|
-
},
|
|
133
|
-
);
|
|
138
|
+
let attrs = mm.inner.new_attributes(NewAttributes::new(vec![]));
|
|
139
|
+
g.record(42, &attrs);
|
|
134
140
|
let body = get_text(format!("http://{addr}/metrics")).await;
|
|
135
|
-
|
|
136
|
-
assert!(body.contains("\nmygauge 42"));
|
|
141
|
+
assert!(body.contains("\nmygauge{global=\"hi!\"} 42"));
|
|
137
142
|
}
|
|
138
143
|
|
|
139
144
|
#[tokio::test]
|
|
@@ -688,7 +693,7 @@ async fn docker_metrics_with_prometheus(
|
|
|
688
693
|
)]
|
|
689
694
|
otel_collector: (&str, OtlpProtocol),
|
|
690
695
|
) {
|
|
691
|
-
if
|
|
696
|
+
if env::var("DOCKER_PROMETHEUS_RUNNING").is_err() {
|
|
692
697
|
return;
|
|
693
698
|
}
|
|
694
699
|
let (otel_collector_addr, otel_protocol) = otel_collector;
|
|
@@ -730,7 +735,7 @@ async fn docker_metrics_with_prometheus(
|
|
|
730
735
|
client.list_namespaces().await.unwrap();
|
|
731
736
|
|
|
732
737
|
// Give Prometheus time to scrape metrics
|
|
733
|
-
tokio::time::sleep(
|
|
738
|
+
tokio::time::sleep(Duration::from_secs(2)).await;
|
|
734
739
|
|
|
735
740
|
// Query Prometheus API for metrics
|
|
736
741
|
let client = reqwest::Client::new();
|
|
@@ -767,6 +772,9 @@ async fn activity_metrics() {
|
|
|
767
772
|
let rt = CoreRuntime::new_assume_tokio(telemopts).unwrap();
|
|
768
773
|
let wf_name = "activity_metrics";
|
|
769
774
|
let mut starter = CoreWfStarter::new_with_runtime(wf_name, rt);
|
|
775
|
+
starter
|
|
776
|
+
.worker_config
|
|
777
|
+
.graceful_shutdown_period(Duration::from_secs(1));
|
|
770
778
|
let task_queue = starter.get_task_queue().to_owned();
|
|
771
779
|
let mut worker = starter.worker().await;
|
|
772
780
|
|
|
@@ -777,11 +785,6 @@ async fn activity_metrics() {
|
|
|
777
785
|
start_to_close_timeout: Some(Duration::from_secs(1)),
|
|
778
786
|
..Default::default()
|
|
779
787
|
});
|
|
780
|
-
let local_act_pass = ctx.local_activity(LocalActivityOptions {
|
|
781
|
-
activity_type: "pass_fail_act".to_string(),
|
|
782
|
-
input: "pass".as_json_payload().expect("serializes fine"),
|
|
783
|
-
..Default::default()
|
|
784
|
-
});
|
|
785
788
|
let normal_act_fail = ctx.activity(ActivityOptions {
|
|
786
789
|
activity_type: "pass_fail_act".to_string(),
|
|
787
790
|
input: "fail".as_json_payload().expect("serializes fine"),
|
|
@@ -792,6 +795,12 @@ async fn activity_metrics() {
|
|
|
792
795
|
}),
|
|
793
796
|
..Default::default()
|
|
794
797
|
});
|
|
798
|
+
join!(normal_act_pass, normal_act_fail);
|
|
799
|
+
let local_act_pass = ctx.local_activity(LocalActivityOptions {
|
|
800
|
+
activity_type: "pass_fail_act".to_string(),
|
|
801
|
+
input: "pass".as_json_payload().expect("serializes fine"),
|
|
802
|
+
..Default::default()
|
|
803
|
+
});
|
|
795
804
|
let local_act_fail = ctx.local_activity(LocalActivityOptions {
|
|
796
805
|
activity_type: "pass_fail_act".to_string(),
|
|
797
806
|
input: "fail".as_json_payload().expect("serializes fine"),
|
|
@@ -810,12 +819,8 @@ async fn activity_metrics() {
|
|
|
810
819
|
},
|
|
811
820
|
..Default::default()
|
|
812
821
|
});
|
|
813
|
-
join!(
|
|
814
|
-
|
|
815
|
-
local_act_pass,
|
|
816
|
-
normal_act_fail,
|
|
817
|
-
local_act_fail
|
|
818
|
-
);
|
|
822
|
+
join!(local_act_pass, local_act_fail);
|
|
823
|
+
// TODO: Currently takes a WFT b/c of https://github.com/temporalio/sdk-core/issues/856
|
|
819
824
|
local_act_cancel.cancel(&ctx);
|
|
820
825
|
local_act_cancel.await;
|
|
821
826
|
Ok(().into())
|
|
@@ -824,7 +829,6 @@ async fn activity_metrics() {
|
|
|
824
829
|
match i.as_str() {
|
|
825
830
|
"pass" => Ok("pass"),
|
|
826
831
|
"cancel" => {
|
|
827
|
-
// TODO: Cancel is taking until shutdown to come through :|
|
|
828
832
|
ctx.cancelled().await;
|
|
829
833
|
Err(ActivityError::cancelled())
|
|
830
834
|
}
|
|
@@ -925,35 +929,29 @@ async fn nexus_metrics() {
|
|
|
925
929
|
.await
|
|
926
930
|
},
|
|
927
931
|
async {
|
|
928
|
-
ctx
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
.result()
|
|
935
|
-
.await
|
|
932
|
+
let _ = ctx
|
|
933
|
+
.start_nexus_operation(NexusOperationOptions {
|
|
934
|
+
input: Some("fail".into()),
|
|
935
|
+
..partial_op.clone()
|
|
936
|
+
})
|
|
937
|
+
.await;
|
|
936
938
|
},
|
|
937
939
|
async {
|
|
938
|
-
ctx
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
.result()
|
|
945
|
-
.await
|
|
940
|
+
let _ = ctx
|
|
941
|
+
.start_nexus_operation(NexusOperationOptions {
|
|
942
|
+
input: Some("handler-fail".into()),
|
|
943
|
+
..partial_op.clone()
|
|
944
|
+
})
|
|
945
|
+
.await;
|
|
946
946
|
},
|
|
947
947
|
async {
|
|
948
|
-
ctx
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
.result()
|
|
956
|
-
.await
|
|
948
|
+
let _ = ctx
|
|
949
|
+
.start_nexus_operation(NexusOperationOptions {
|
|
950
|
+
input: Some("timeout".into()),
|
|
951
|
+
schedule_to_close_timeout: Some(Duration::from_secs(2)),
|
|
952
|
+
..partial_op.clone()
|
|
953
|
+
})
|
|
954
|
+
.await;
|
|
957
955
|
}
|
|
958
956
|
);
|
|
959
957
|
Ok(().into())
|
|
@@ -1111,6 +1109,7 @@ async fn evict_on_complete_does_not_count_as_forced_eviction() {
|
|
|
1111
1109
|
|
|
1112
1110
|
struct MetricRecordingSlotSupplier<SK> {
|
|
1113
1111
|
inner: FixedSizeSlotSupplier<SK>,
|
|
1112
|
+
metrics: OnceLock<(Gauge, Gauge, Gauge)>,
|
|
1114
1113
|
}
|
|
1115
1114
|
|
|
1116
1115
|
#[async_trait::async_trait]
|
|
@@ -1121,16 +1120,18 @@ where
|
|
|
1121
1120
|
type SlotKind = SK;
|
|
1122
1121
|
|
|
1123
1122
|
async fn reserve_slot(&self, ctx: &dyn SlotReservationContext) -> SlotSupplierPermit {
|
|
1124
|
-
let g =
|
|
1123
|
+
let (g, _, _) = self.metrics.get_or_init(|| {
|
|
1124
|
+
let meter = ctx.get_metrics_meter().unwrap();
|
|
1125
|
+
let g1 = meter.gauge(MetricParameters::from("custom_reserve"));
|
|
1126
|
+
let g2 = meter.gauge(MetricParameters::from("custom_mark_used"));
|
|
1127
|
+
let g3 = meter.gauge(MetricParameters::from("custom_release"));
|
|
1128
|
+
(g1, g2, g3)
|
|
1129
|
+
});
|
|
1130
|
+
let attrs = ctx
|
|
1125
1131
|
.get_metrics_meter()
|
|
1126
1132
|
.unwrap()
|
|
1127
|
-
.
|
|
1128
|
-
g.record(
|
|
1129
|
-
1,
|
|
1130
|
-
&MetricAttributes::OTel {
|
|
1131
|
-
kvs: Arc::new(vec![]),
|
|
1132
|
-
},
|
|
1133
|
-
);
|
|
1133
|
+
.new_attributes(NewAttributes::new(vec![]));
|
|
1134
|
+
g.record(1, &attrs);
|
|
1134
1135
|
self.inner.reserve_slot(ctx).await
|
|
1135
1136
|
}
|
|
1136
1137
|
|
|
@@ -1139,30 +1140,18 @@ where
|
|
|
1139
1140
|
}
|
|
1140
1141
|
|
|
1141
1142
|
fn mark_slot_used(&self, ctx: &dyn SlotMarkUsedContext<SlotKind = Self::SlotKind>) {
|
|
1142
|
-
let
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
g.record(
|
|
1147
|
-
1,
|
|
1148
|
-
&MetricAttributes::OTel {
|
|
1149
|
-
kvs: Arc::new(vec![]),
|
|
1150
|
-
},
|
|
1151
|
-
);
|
|
1143
|
+
let meter = ctx.get_metrics_meter().unwrap();
|
|
1144
|
+
let attrs = meter.new_attributes(NewAttributes::new(vec![]));
|
|
1145
|
+
let (_, g, _) = self.metrics.get().unwrap();
|
|
1146
|
+
g.record(1, &attrs);
|
|
1152
1147
|
self.inner.mark_slot_used(ctx);
|
|
1153
1148
|
}
|
|
1154
1149
|
|
|
1155
1150
|
fn release_slot(&self, ctx: &dyn SlotReleaseContext<SlotKind = Self::SlotKind>) {
|
|
1156
|
-
let
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
g.record(
|
|
1161
|
-
1,
|
|
1162
|
-
&MetricAttributes::OTel {
|
|
1163
|
-
kvs: Arc::new(vec![]),
|
|
1164
|
-
},
|
|
1165
|
-
);
|
|
1151
|
+
let meter = ctx.get_metrics_meter().unwrap();
|
|
1152
|
+
let attrs = meter.new_attributes(NewAttributes::new(vec![]));
|
|
1153
|
+
let (_, _, g) = self.metrics.get().unwrap();
|
|
1154
|
+
g.record(1, &attrs);
|
|
1166
1155
|
self.inner.release_slot(ctx);
|
|
1167
1156
|
}
|
|
1168
1157
|
|
|
@@ -1182,6 +1171,7 @@ async fn metrics_available_from_custom_slot_supplier() {
|
|
|
1182
1171
|
let mut tb = TunerBuilder::default();
|
|
1183
1172
|
tb.workflow_slot_supplier(Arc::new(MetricRecordingSlotSupplier::<WorkflowSlotKind> {
|
|
1184
1173
|
inner: FixedSizeSlotSupplier::new(5),
|
|
1174
|
+
metrics: OnceLock::new(),
|
|
1185
1175
|
}));
|
|
1186
1176
|
starter.worker_config.tuner(Arc::new(tb.build()));
|
|
1187
1177
|
let mut worker = starter.worker().await;
|
|
@@ -1202,9 +1192,124 @@ async fn metrics_available_from_custom_slot_supplier() {
|
|
|
1202
1192
|
.unwrap();
|
|
1203
1193
|
worker.run_until_done().await.unwrap();
|
|
1204
1194
|
|
|
1205
|
-
tokio::time::sleep(Duration::from_millis(100)).await;
|
|
1206
1195
|
let body = get_text(format!("http://{addr}/metrics")).await;
|
|
1207
1196
|
assert!(body.contains("custom_reserve"));
|
|
1208
1197
|
assert!(body.contains("custom_mark_used"));
|
|
1209
1198
|
assert!(body.contains("custom_release"));
|
|
1210
1199
|
}
|
|
1200
|
+
|
|
1201
|
+
#[tokio::test]
|
|
1202
|
+
async fn test_prometheus_endpoint_integration() {
|
|
1203
|
+
let (telemopts, addr, _aborter) = prom_metrics(None);
|
|
1204
|
+
let meter = telemopts.metrics.unwrap();
|
|
1205
|
+
|
|
1206
|
+
let counter = meter.counter(MetricParameters {
|
|
1207
|
+
name: "test_requests_total".into(),
|
|
1208
|
+
description: "Total number of test requests".into(),
|
|
1209
|
+
unit: "".into(),
|
|
1210
|
+
});
|
|
1211
|
+
let histogram = meter.histogram(MetricParameters {
|
|
1212
|
+
name: "test_request_duration_ms".into(),
|
|
1213
|
+
description: "Duration of test requests in milliseconds".into(),
|
|
1214
|
+
unit: "ms".into(),
|
|
1215
|
+
});
|
|
1216
|
+
let gauge = meter.gauge(MetricParameters {
|
|
1217
|
+
name: "test_active_connections".into(),
|
|
1218
|
+
description: "Number of active test connections".into(),
|
|
1219
|
+
unit: "".into(),
|
|
1220
|
+
});
|
|
1221
|
+
|
|
1222
|
+
counter.adds(5);
|
|
1223
|
+
histogram.records(100);
|
|
1224
|
+
gauge.records(10);
|
|
1225
|
+
|
|
1226
|
+
let url = format!("http://{addr}/metrics");
|
|
1227
|
+
let response = tokio::time::timeout(Duration::from_secs(10), reqwest::get(&url))
|
|
1228
|
+
.await
|
|
1229
|
+
.expect("Request timed out")
|
|
1230
|
+
.expect("Request failed");
|
|
1231
|
+
|
|
1232
|
+
assert!(response.status().is_success());
|
|
1233
|
+
|
|
1234
|
+
let content_type = response
|
|
1235
|
+
.headers()
|
|
1236
|
+
.get("content-type")
|
|
1237
|
+
.unwrap()
|
|
1238
|
+
.to_str()
|
|
1239
|
+
.unwrap();
|
|
1240
|
+
assert!(content_type.contains("text/plain"));
|
|
1241
|
+
|
|
1242
|
+
let body = response.text().await.expect("Failed to read response body");
|
|
1243
|
+
|
|
1244
|
+
assert!(body.contains("test_requests_total"),);
|
|
1245
|
+
assert!(body.contains("test_request_duration_ms"),);
|
|
1246
|
+
assert!(body.contains("test_active_connections"),);
|
|
1247
|
+
assert!(body.contains("test_requests_total 5"),);
|
|
1248
|
+
assert!(body.contains("test_active_connections 10"),);
|
|
1249
|
+
assert!(body.contains("test_request_duration_ms_count 1"),);
|
|
1250
|
+
assert!(body.contains("test_request_duration_ms_sum 100"),);
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
#[tokio::test]
|
|
1254
|
+
async fn test_prometheus_metric_format_consistency() {
|
|
1255
|
+
let (telemopts, addr, _aborter) = prom_metrics(None);
|
|
1256
|
+
let meter = telemopts.metrics.unwrap();
|
|
1257
|
+
|
|
1258
|
+
let workflow_counter = meter.counter(MetricParameters {
|
|
1259
|
+
name: "temporal_workflow_completed_total".into(),
|
|
1260
|
+
description: "Total number of completed workflows".into(),
|
|
1261
|
+
unit: "".into(),
|
|
1262
|
+
});
|
|
1263
|
+
let activity_histogram = meter.histogram_duration(MetricParameters {
|
|
1264
|
+
name: "temporal_activity_execution_latency".into(),
|
|
1265
|
+
description: "Duration of activity execution".into(),
|
|
1266
|
+
unit: "ms".into(),
|
|
1267
|
+
});
|
|
1268
|
+
|
|
1269
|
+
let attrs = meter.new_attributes(NewAttributes::new(vec![]));
|
|
1270
|
+
|
|
1271
|
+
workflow_counter.add(1, &attrs);
|
|
1272
|
+
activity_histogram.record(Duration::from_millis(150), &attrs);
|
|
1273
|
+
|
|
1274
|
+
let url = format!("http://{addr}/metrics");
|
|
1275
|
+
let response = tokio::time::timeout(Duration::from_secs(10), reqwest::get(&url))
|
|
1276
|
+
.await
|
|
1277
|
+
.expect("Request timed out")
|
|
1278
|
+
.expect("Request failed");
|
|
1279
|
+
|
|
1280
|
+
let body = response.text().await.expect("Failed to read response body");
|
|
1281
|
+
|
|
1282
|
+
assert!(body.contains("# HELP temporal_workflow_completed_total"),);
|
|
1283
|
+
assert!(body.contains("# TYPE temporal_workflow_completed_total counter"),);
|
|
1284
|
+
assert!(body.contains("# HELP temporal_activity_execution_latency"),);
|
|
1285
|
+
assert!(body.contains("# TYPE temporal_activity_execution_latency histogram"),);
|
|
1286
|
+
assert!(body.contains("temporal_workflow_completed_total 1"),);
|
|
1287
|
+
assert!(body.contains("temporal_activity_execution_latency_count 1"),);
|
|
1288
|
+
assert!(body.contains("temporal_activity_execution_latency_bucket"),);
|
|
1289
|
+
assert!(body.contains("le=\""));
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
#[tokio::test]
|
|
1293
|
+
async fn prometheus_label_nonsense() {
|
|
1294
|
+
let mut opts_builder = PrometheusExporterOptionsBuilder::default();
|
|
1295
|
+
opts_builder.socket_addr(ANY_PORT.parse().unwrap());
|
|
1296
|
+
let (telemopts, addr, _aborter) = prom_metrics(Some(opts_builder.build().unwrap()));
|
|
1297
|
+
let meter = telemopts.metrics.clone().unwrap();
|
|
1298
|
+
|
|
1299
|
+
let ctr = meter.counter(
|
|
1300
|
+
MetricParametersBuilder::default()
|
|
1301
|
+
.name("some_counter")
|
|
1302
|
+
.build()
|
|
1303
|
+
.unwrap(),
|
|
1304
|
+
);
|
|
1305
|
+
let a1 = meter.new_attributes(NewAttributes::from([MetricKeyValue::new("thing", "foo")]));
|
|
1306
|
+
let a2 = meter.new_attributes(NewAttributes::from([MetricKeyValue::new("blerp", "baz")]));
|
|
1307
|
+
ctr.add(1, &a1);
|
|
1308
|
+
ctr.add(1, &a2);
|
|
1309
|
+
ctr.add(1, &a2);
|
|
1310
|
+
ctr.add(1, &a1);
|
|
1311
|
+
|
|
1312
|
+
let body = get_text(format!("http://{addr}/metrics")).await;
|
|
1313
|
+
assert!(body.contains("some_counter{thing=\"foo\"} 2"));
|
|
1314
|
+
assert!(body.contains("some_counter{blerp=\"baz\"} 2"));
|
|
1315
|
+
}
|
|
@@ -169,7 +169,7 @@ async fn switching_worker_client_changes_poll() {
|
|
|
169
169
|
|
|
170
170
|
// Create a worker only on the first server
|
|
171
171
|
let worker = init_worker(
|
|
172
|
-
init_integ_telem(),
|
|
172
|
+
init_integ_telem().unwrap(),
|
|
173
173
|
integ_worker_config("my-task-queue")
|
|
174
174
|
// We want a cache so we don't get extra remove-job activations
|
|
175
175
|
.max_cached_workflows(100_usize)
|
|
@@ -12,6 +12,7 @@ use temporal_sdk_core_protos::{
|
|
|
12
12
|
};
|
|
13
13
|
use temporal_sdk_core_test_utils::{
|
|
14
14
|
CoreWfStarter, WorkerTestHelpers, drain_pollers_and_shutdown, init_core_and_create_wf,
|
|
15
|
+
start_timer_cmd,
|
|
15
16
|
};
|
|
16
17
|
use tokio::join;
|
|
17
18
|
|
|
@@ -124,30 +125,29 @@ async fn query_after_execution_complete(#[case] do_evict: bool) {
|
|
|
124
125
|
let task = core.poll_workflow_activation().await.unwrap();
|
|
125
126
|
|
|
126
127
|
// When we see the query, handle it.
|
|
127
|
-
if go_until_query
|
|
128
|
-
|
|
128
|
+
if go_until_query
|
|
129
|
+
&& let [
|
|
129
130
|
WorkflowActivationJob {
|
|
130
131
|
variant: Some(workflow_activation_job::Variant::QueryWorkflow(query)),
|
|
131
132
|
},
|
|
132
133
|
] = task.jobs.as_slice()
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
}
|
|
134
|
+
{
|
|
135
|
+
core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
|
|
136
|
+
task.run_id,
|
|
137
|
+
QueryResult {
|
|
138
|
+
query_id: query.query_id.clone(),
|
|
139
|
+
variant: Some(
|
|
140
|
+
QuerySuccess {
|
|
141
|
+
response: Some(query_resp.into()),
|
|
142
|
+
}
|
|
143
|
+
.into(),
|
|
144
|
+
),
|
|
145
|
+
}
|
|
146
|
+
.into(),
|
|
147
|
+
))
|
|
148
|
+
.await
|
|
149
|
+
.unwrap();
|
|
150
|
+
break "".to_string();
|
|
151
151
|
}
|
|
152
152
|
|
|
153
153
|
if matches!(
|
|
@@ -213,8 +213,11 @@ async fn query_after_execution_complete(#[case] do_evict: bool) {
|
|
|
213
213
|
drain_pollers_and_shutdown(core).await;
|
|
214
214
|
}
|
|
215
215
|
|
|
216
|
+
#[rstest]
|
|
217
|
+
#[case::withou_nde(false)]
|
|
218
|
+
#[case::with_nde(true)]
|
|
216
219
|
#[tokio::test]
|
|
217
|
-
async fn fail_legacy_query() {
|
|
220
|
+
async fn fail_legacy_query(#[case] with_nde: bool) {
|
|
218
221
|
let query_err = "oh no broken";
|
|
219
222
|
let mut starter = CoreWfStarter::new("fail_legacy_query");
|
|
220
223
|
let core = starter.get_worker().await;
|
|
@@ -245,28 +248,41 @@ async fn fail_legacy_query() {
|
|
|
245
248
|
let query_responder = async {
|
|
246
249
|
// Have to replay first since we've evicted
|
|
247
250
|
let task = core.poll_workflow_activation().await.unwrap();
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
task.
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
251
|
+
if with_nde {
|
|
252
|
+
core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
|
|
253
|
+
task.run_id,
|
|
254
|
+
start_timer_cmd(1, Duration::from_millis(1)),
|
|
255
|
+
))
|
|
256
|
+
.await
|
|
257
|
+
.unwrap();
|
|
258
|
+
} else {
|
|
259
|
+
core.complete_execution(&task.run_id).await;
|
|
260
|
+
let task = core.poll_workflow_activation().await.unwrap();
|
|
261
|
+
assert_matches!(
|
|
262
|
+
task.jobs.as_slice(),
|
|
263
|
+
[WorkflowActivationJob {
|
|
264
|
+
variant: Some(workflow_activation_job::Variant::QueryWorkflow(q)),
|
|
265
|
+
}] => q
|
|
266
|
+
);
|
|
267
|
+
core.complete_workflow_activation(WorkflowActivationCompletion::fail(
|
|
268
|
+
task.run_id,
|
|
269
|
+
Failure {
|
|
270
|
+
message: query_err.to_string(),
|
|
271
|
+
..Default::default()
|
|
272
|
+
},
|
|
273
|
+
None,
|
|
274
|
+
))
|
|
275
|
+
.await
|
|
276
|
+
.unwrap();
|
|
277
|
+
}
|
|
266
278
|
};
|
|
267
279
|
let (q_resp, _) = join!(query_fut, query_responder);
|
|
268
280
|
// Ensure query response is a failure and has the right message
|
|
269
|
-
|
|
281
|
+
if with_nde {
|
|
282
|
+
assert!(q_resp.message().contains("TMPRL1100"));
|
|
283
|
+
} else {
|
|
284
|
+
assert_eq!(q_resp.message(), query_err);
|
|
285
|
+
}
|
|
270
286
|
}
|
|
271
287
|
|
|
272
288
|
#[tokio::test]
|
|
@@ -110,6 +110,7 @@ async fn reapplied_updates_due_to_reset() {
|
|
|
110
110
|
let mut client_mut = client.clone();
|
|
111
111
|
let reset_response = WorkflowService::reset_workflow_execution(
|
|
112
112
|
Arc::make_mut(&mut client_mut),
|
|
113
|
+
#[allow(deprecated)]
|
|
113
114
|
ResetWorkflowExecutionRequest {
|
|
114
115
|
namespace: client.namespace().into(),
|
|
115
116
|
workflow_execution: Some(WorkflowExecution {
|
|
@@ -154,12 +155,6 @@ async fn reapplied_updates_due_to_reset() {
|
|
|
154
155
|
let with_id = HistoryForReplay::new(history, workflow_id.to_string());
|
|
155
156
|
|
|
156
157
|
let replay_worker = init_core_replay_preloaded(workflow_id, [with_id]);
|
|
157
|
-
// Init workflow comes by itself
|
|
158
|
-
let act = replay_worker.poll_workflow_activation().await.unwrap();
|
|
159
|
-
replay_worker
|
|
160
|
-
.complete_workflow_activation(WorkflowActivationCompletion::empty(act.run_id))
|
|
161
|
-
.await
|
|
162
|
-
.unwrap();
|
|
163
158
|
// We now recapitulate the actions that the worker took on first execution above, pretending
|
|
164
159
|
// that we always followed the post-reset history.
|
|
165
160
|
// First, we handled the post-reset reapplied update and did not complete the workflow.
|
|
@@ -167,7 +162,7 @@ async fn reapplied_updates_due_to_reset() {
|
|
|
167
162
|
FailUpdate::No,
|
|
168
163
|
CompleteWorkflow::No,
|
|
169
164
|
replay_worker.as_ref(),
|
|
170
|
-
|
|
165
|
+
2,
|
|
171
166
|
)
|
|
172
167
|
.await;
|
|
173
168
|
// Then the timer fires
|
|
@@ -15,13 +15,13 @@ use temporal_sdk_core_api::{
|
|
|
15
15
|
errors::WorkerValidationError,
|
|
16
16
|
worker::{PollerBehavior, WorkerConfigBuilder, WorkerVersioningStrategy},
|
|
17
17
|
};
|
|
18
|
+
use temporal_sdk_core_protos::temporal::api::enums::v1::WorkflowTaskFailedCause::GrpcMessageTooLarge;
|
|
18
19
|
use temporal_sdk_core_protos::{
|
|
19
20
|
coresdk::workflow_completion::{
|
|
20
21
|
Failure, WorkflowActivationCompletion, workflow_activation_completion::Status,
|
|
21
22
|
},
|
|
22
23
|
temporal::api::{
|
|
23
|
-
enums::v1::
|
|
24
|
-
failure::v1::Failure as InnerFailure,
|
|
24
|
+
enums::v1::EventType, failure::v1::Failure as InnerFailure,
|
|
25
25
|
history::v1::history_event::Attributes::WorkflowTaskFailedEventAttributes,
|
|
26
26
|
},
|
|
27
27
|
};
|
|
@@ -188,8 +188,7 @@ async fn oversize_grpc_message() {
|
|
|
188
188
|
assert!(starter.get_history().await.events.iter().any(|e| {
|
|
189
189
|
e.event_type == EventType::WorkflowTaskFailed as i32
|
|
190
190
|
&& if let WorkflowTaskFailedEventAttributes(attr) = e.attributes.as_ref().unwrap() {
|
|
191
|
-
|
|
192
|
-
attr.cause == WorkflowWorkerUnhandledFailure as i32
|
|
191
|
+
attr.cause == GrpcMessageTooLarge as i32
|
|
193
192
|
&& attr.failure.as_ref().unwrap().message == "GRPC Message too large"
|
|
194
193
|
} else {
|
|
195
194
|
false
|
|
@@ -131,13 +131,9 @@ async fn sets_deployment_info_on_task_responses(#[values(true, false)] use_defau
|
|
|
131
131
|
);
|
|
132
132
|
}
|
|
133
133
|
assert_eq!(wft_complete.worker_deployment_name, deploy_name);
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
wft_complete.worker_deployment_version,
|
|
138
|
-
format!("{deploy_name}.1.0")
|
|
139
|
-
);
|
|
140
|
-
}
|
|
134
|
+
let dv = wft_complete.deployment_version.unwrap();
|
|
135
|
+
assert_eq!(dv.deployment_name, deploy_name);
|
|
136
|
+
assert_eq!(dv.build_id, "1.0");
|
|
141
137
|
}
|
|
142
138
|
|
|
143
139
|
#[tokio::test]
|
|
@@ -270,12 +270,10 @@ impl WorkerInterceptor for LACancellerInterceptor {
|
|
|
270
270
|
if let Some(workflow_activation_completion::Status::Successful(
|
|
271
271
|
workflow_completion::Success { commands, .. },
|
|
272
272
|
)) = completion.status.as_ref()
|
|
273
|
-
|
|
274
|
-
if let Some(&Variant::CompleteWorkflowExecution(_)) =
|
|
273
|
+
&& let Some(&Variant::CompleteWorkflowExecution(_)) =
|
|
275
274
|
commands.last().and_then(|v| v.variant.as_ref())
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
}
|
|
275
|
+
{
|
|
276
|
+
self.token.cancel();
|
|
279
277
|
}
|
|
280
278
|
}
|
|
281
279
|
fn on_shutdown(&self, _: &temporal_sdk::Worker) {
|