gruf-prometheus 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/README.md +22 -20
- data/lib/gruf/prometheus/client/collector.rb +30 -0
- data/lib/gruf/prometheus/client/interceptor.rb +1 -0
- data/lib/gruf/prometheus/client/type_collector.rb +2 -0
- data/lib/gruf/prometheus/server/collector.rb +30 -0
- data/lib/gruf/prometheus/server/interceptor.rb +1 -0
- data/lib/gruf/prometheus/server/type_collector.rb +2 -0
- data/lib/gruf/prometheus/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cd4a02507521f0e59c1242e42396a2ce66a33c1d6152268c1479239abed3f00b
|
4
|
+
data.tar.gz: 01b68639aa687e63d9fd525c69fe968100e00c7d75fcf61d7a0f21d0e47247d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99ba9ca706e42f165d08464d94ba92fc3c7450f68e91062b7717d66124c0f8108d6aaa3d119d1193ae68aeb2898a6d7b1266576919e757fe799d5f7e8f64b191
|
7
|
+
data.tar.gz: 8a05f7a59227f5ac0f428cd4b8e4d4fc384363337f5e7d93db2326c56be2de4effb6493c5f1e42ad6b35793bbb9a99e6098a86382723936909e1c908052aaebf
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,11 @@ Changelog for the gruf-prometheus gem.
|
|
2
2
|
|
3
3
|
### Pending Release
|
4
4
|
|
5
|
+
### 2.3.0
|
6
|
+
|
7
|
+
- Add server collector and interceptor for measuring server failures
|
8
|
+
- Add client collector and interceptor for measuring client failures
|
9
|
+
|
5
10
|
### 2.2.0
|
6
11
|
|
7
12
|
- Add Ruby 3.1 support
|
@@ -44,7 +49,7 @@ Changelog for the gruf-prometheus gem.
|
|
44
49
|
|
45
50
|
### 1.0.0
|
46
51
|
|
47
|
-
- *Breaking Changes* Move all prometheus core dependencies to bc-prometheus-ruby
|
52
|
+
- *Breaking Changes* Move all prometheus core dependencies to bc-prometheus-ruby
|
48
53
|
|
49
54
|
### 0.0.2
|
50
55
|
|
data/README.md
CHANGED
@@ -51,16 +51,17 @@ This will output the following metrics:
|
|
51
51
|
|Name|Type|Description|
|
52
52
|
|---|---|---|
|
53
53
|
|ruby_grpc_server_started_total|counter|Total number of RPCs started on the server|
|
54
|
+
|ruby_grpc_server_failed_total|counter|Total number of RPCs that throw an unknown, internal, data loss, failed precondition, unavailable, deadline exceeded, or cancelled exception on the server|
|
54
55
|
|ruby_grpc_server_handled_total|counter|Total number of RPCs completed on the server, regardless of success or failure|
|
55
56
|
|ruby_grpc_server_handled_latency_seconds|histogram|Histogram of response latency of RPCs handled by the server, in seconds|
|
56
57
|
|
57
58
|
Note that the histogram is disabled by default - you'll have to turn it on either through the `server_measure_latency`
|
58
59
|
configuration yielded in `Gruf::Prometheus.configure`, or through the `PROMETHEUS_SERVER_MEASURE_LATENCY` environment
|
59
|
-
variable. Also, the `measure_latency: true` option can be passed as a second argument to `Gruf.interceptors.use` to
|
60
|
+
variable. Also, the `measure_latency: true` option can be passed as a second argument to `Gruf.interceptors.use` to
|
60
61
|
configure this directly in the interceptor.
|
61
62
|
|
62
63
|
The precedence order for this is, from first to last, with last taking precedence:
|
63
|
-
1) `measure_latency: true` passed into the interceptor
|
64
|
+
1) `measure_latency: true` passed into the interceptor
|
64
65
|
2) `Gruf::Prometheus.configure` explicit setting globally
|
65
66
|
3) `PROMETHEUS_SERVER_MEASURE_LATENCY` ENV var globally. This is the only value set by default - to `false` - and will
|
66
67
|
be the default unless other methods are invoked.
|
@@ -76,40 +77,41 @@ Gruf::Client.new(
|
|
76
77
|
interceptors: [Gruf::Prometheus::Client::Interceptor.new]
|
77
78
|
}
|
78
79
|
)
|
79
|
-
```
|
80
|
+
```
|
80
81
|
|
81
82
|
|Name|Type|Description|
|
82
83
|
|---|---|---|
|
83
84
|
|ruby_grpc_client_started_total|counter|Total number of RPCs started by the client|
|
85
|
+
|ruby_grpc_client_failed_total|counter|Total number of RPCs that throw an unknown, internal, data loss, failed precondition, unavailable, deadline exceeded, or cancelled exception by the client|
|
84
86
|
|ruby_grpc_client_completed|counter|Total number of RPCs completed by the client, regardless of success or failure|
|
85
87
|
|ruby_grpc_client_completed_latency_seconds|histogram|Histogram of response latency of RPCs completed by the client, in seconds|
|
86
88
|
|
87
89
|
Note that the histogram is disabled by default - you'll have to turn it on either through the `client_measure_latency`
|
88
90
|
configuration yielded in `Gruf::Prometheus.configure`, or through the `PROMETHEUS_CLIENT_MEASURE_LATENCY` environment
|
89
|
-
variable. Optionally, you can pass in `measure_latency: true` into the Interceptor directly as an option argument in the
|
90
|
-
initializer.
|
91
|
+
variable. Optionally, you can pass in `measure_latency: true` into the Interceptor directly as an option argument in the
|
92
|
+
initializer.
|
91
93
|
|
92
94
|
The precedence order for this is, from first to last, with last taking precedence:
|
93
|
-
1) `measure_latency: true` passed into the interceptor
|
95
|
+
1) `measure_latency: true` passed into the interceptor
|
94
96
|
2) `Gruf::Prometheus.configure` explicit setting globally
|
95
97
|
3) `PROMETHEUS_CLIENT_MEASURE_LATENCY` ENV var globally. This is the only value set by default - to `false` - and will
|
96
|
-
be the default unless other methods are invoked.
|
98
|
+
be the default unless other methods are invoked.
|
97
99
|
|
98
100
|
### Running the Client Interceptor in Non-gRPC Processes
|
99
101
|
|
100
102
|
One caveat is that you _must_ have the appropriate Type Collector setup in whatever process you are running in. If
|
101
|
-
you are already doing this in a gruf gRPC service that is using the hook provided by this gem above, no further
|
103
|
+
you are already doing this in a gruf gRPC service that is using the hook provided by this gem above, no further
|
102
104
|
configuration is needed. Otherwise, in whatever bc-prometheus-ruby configuration you have setup, you'll need to ensure
|
103
|
-
the type collector is loaded:
|
105
|
+
the type collector is loaded:
|
104
106
|
|
105
107
|
```ruby
|
106
108
|
# prometheus_server is whatever `::Bigcommerce::Prometheus::Server` instance you are using in the current process
|
107
109
|
# Often hooks into these are exposed as configuration options, e.g. `web_collectors`, `resque_collectors`, etc
|
108
|
-
prometheus_server.add_type_collector(::Gruf::Prometheus::Client::TypeCollector.new)
|
110
|
+
prometheus_server.add_type_collector(::Gruf::Prometheus::Client::TypeCollector.new)
|
109
111
|
```
|
110
112
|
|
111
113
|
Note that you don't need to do this for the `Gruf::Prometheus::Client::Collector`, as it is an on-demand collector
|
112
|
-
that does not run in a threaded loop.
|
114
|
+
that does not run in a threaded loop.
|
113
115
|
|
114
116
|
See [bc-prometheus-ruby](https://github.com/bigcommerce/bc-prometheus-ruby#custom-server-integrations)'s documentation
|
115
117
|
on custom server integrations for more information.
|
@@ -129,7 +131,7 @@ where the options available are:
|
|
129
131
|
| Option | Description | Default | ENV Name |
|
130
132
|
| ------ | ----------- | ------- | -------- |
|
131
133
|
| process_label | The label to use for metric prefixing | grpc | PROMETHEUS_PROCESS_LABEL |
|
132
|
-
| process_name | Label to use for process name in logging | grpc | PROMETHEUS_PROCESS_NAME |
|
134
|
+
| process_name | Label to use for process name in logging | grpc | PROMETHEUS_PROCESS_NAME |
|
133
135
|
| collection_frequency | The period in seconds in which to collect metrics | 30 | PROMETHEUS_COLLECTION_FREQUENCY |
|
134
136
|
| collectors | Any collectors you would like to start with the server. Passed as a hash of collector class => options | {} | |
|
135
137
|
| type_collectors | Any type collectors you would like to start with the server. Passed as an array of collector objects | [] | |
|
@@ -138,17 +140,17 @@ where the options available are:
|
|
138
140
|
|
139
141
|
## License
|
140
142
|
|
141
|
-
Copyright (c) 2019-present, BigCommerce Pty. Ltd. All rights reserved
|
143
|
+
Copyright (c) 2019-present, BigCommerce Pty. Ltd. All rights reserved
|
142
144
|
|
143
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
144
|
-
documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
|
145
|
-
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
|
145
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
146
|
+
documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
|
147
|
+
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
|
146
148
|
persons to whom the Software is furnished to do so, subject to the following conditions:
|
147
149
|
|
148
|
-
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
|
150
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
|
149
151
|
Software.
|
150
152
|
|
151
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
152
|
-
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
153
|
-
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
153
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
154
|
+
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
155
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
154
156
|
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -23,6 +23,15 @@ module Gruf
|
|
23
23
|
#
|
24
24
|
class Collector < Bigcommerce::Prometheus::Collectors::Base
|
25
25
|
RESPONSE_CODE_OK = 'OK'
|
26
|
+
FAILURE_CLASSES = %w[
|
27
|
+
GRPC::Unknown
|
28
|
+
GRPC::Internal
|
29
|
+
GRPC::DataLoss
|
30
|
+
GRPC::FailedPrecondition
|
31
|
+
GRPC::Unavailable
|
32
|
+
GRPC::DeadlineExceeded
|
33
|
+
GRPC::Cancelled
|
34
|
+
].freeze
|
26
35
|
|
27
36
|
##
|
28
37
|
# @param [Gruf::Outbound::RequestContext] request_context
|
@@ -34,6 +43,19 @@ module Gruf
|
|
34
43
|
)
|
35
44
|
end
|
36
45
|
|
46
|
+
##
|
47
|
+
# @param [Gruf::Controller::RequestContext] request_context
|
48
|
+
# @param [Gruf::Interceptors::Timer::Result] result
|
49
|
+
#
|
50
|
+
def failed_total(request_context:, result:)
|
51
|
+
return unless failure?(result)
|
52
|
+
|
53
|
+
push(
|
54
|
+
grpc_client_failed_total: 1,
|
55
|
+
custom_labels: custom_labels(request_context: request_context)
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
37
59
|
##
|
38
60
|
# @param [Gruf::Controller::RequestContext] request_context
|
39
61
|
# @param [Gruf::Interceptors::Timer::Result] result
|
@@ -101,6 +123,14 @@ module Gruf
|
|
101
123
|
Gruf::Prometheus::RequestTypes::UNARY
|
102
124
|
end
|
103
125
|
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# @param [Gruf::Interceptors::Timer::Result] result
|
129
|
+
# @return [Boolean]
|
130
|
+
#
|
131
|
+
def failure?(result)
|
132
|
+
FAILURE_CLASSES.include?(result.message_class_name)
|
133
|
+
end
|
104
134
|
end
|
105
135
|
end
|
106
136
|
end
|
@@ -43,6 +43,7 @@ module Gruf
|
|
43
43
|
#
|
44
44
|
def send_metrics(request_context:, result:)
|
45
45
|
prometheus_collector.started_total(request_context: request_context)
|
46
|
+
prometheus_collector.failed_total(request_context: request_context, result: result) unless result.successful?
|
46
47
|
prometheus_collector.completed(request_context: request_context, result: result)
|
47
48
|
prometheus_collector.completed_latency_seconds(request_context: request_context, result: result) if measure_latency?
|
48
49
|
rescue StandardError => e
|
@@ -34,6 +34,7 @@ module Gruf
|
|
34
34
|
def build_metrics
|
35
35
|
metrics = {
|
36
36
|
grpc_client_started_total: PrometheusExporter::Metric::Counter.new('grpc_client_started_total', 'Total number of RPCs started by the client'),
|
37
|
+
grpc_client_failed_total: PrometheusExporter::Metric::Counter.new('grpc_client_failed_total', 'Total number of RPCs failed by the client'),
|
37
38
|
grpc_client_completed: PrometheusExporter::Metric::Counter.new('grpc_client_completed', 'Total number of RPCs completed by the client, regardless of success or failure')
|
38
39
|
}
|
39
40
|
metrics[:grpc_client_completed_latency_seconds] = PrometheusExporter::Metric::Histogram.new('grpc_client_completed_latency_seconds', 'Histogram of response latency of RPCs completed by the client, in seconds') if measure_latency?
|
@@ -45,6 +46,7 @@ module Gruf
|
|
45
46
|
#
|
46
47
|
def collect_metrics(data: {}, labels: {})
|
47
48
|
metric(:grpc_client_started_total)&.observe(data['grpc_client_started_total'].to_i, labels)
|
49
|
+
metric(:grpc_client_failed_total)&.observe(data['grpc_client_failed_total'].to_i, labels)
|
48
50
|
metric(:grpc_client_completed)&.observe(data['grpc_client_completed'].to_i, labels)
|
49
51
|
metric(:grpc_client_completed_latency_seconds)&.observe(data['grpc_client_completed_latency_seconds'].to_f, labels) if measure_latency?
|
50
52
|
end
|
@@ -23,6 +23,15 @@ module Gruf
|
|
23
23
|
#
|
24
24
|
class Collector < Bigcommerce::Prometheus::Collectors::Base
|
25
25
|
RESPONSE_CODE_OK = 'OK'
|
26
|
+
FAILURE_CLASSES = %w[
|
27
|
+
GRPC::Unknown
|
28
|
+
GRPC::Internal
|
29
|
+
GRPC::DataLoss
|
30
|
+
GRPC::FailedPrecondition
|
31
|
+
GRPC::Unavailable
|
32
|
+
GRPC::DeadlineExceeded
|
33
|
+
GRPC::Cancelled
|
34
|
+
].freeze
|
26
35
|
|
27
36
|
##
|
28
37
|
# @param [Gruf::Controller::Request] request
|
@@ -34,6 +43,19 @@ module Gruf
|
|
34
43
|
)
|
35
44
|
end
|
36
45
|
|
46
|
+
##
|
47
|
+
# @param [Gruf::Controller::Request] request
|
48
|
+
# @param [Gruf::Interceptors::Timer::Result] result
|
49
|
+
#
|
50
|
+
def failed_total(request:, result:)
|
51
|
+
return unless failure?(result)
|
52
|
+
|
53
|
+
push(
|
54
|
+
grpc_server_failed_total: 1,
|
55
|
+
custom_labels: custom_labels(request: request)
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
37
59
|
##
|
38
60
|
# @param [Gruf::Controller::Request] request
|
39
61
|
# @param [Gruf::Interceptors::Timer::Result] result:party
|
@@ -116,6 +138,14 @@ module Gruf
|
|
116
138
|
Gruf::Prometheus::RequestTypes::UNARY
|
117
139
|
end
|
118
140
|
end
|
141
|
+
|
142
|
+
##
|
143
|
+
# @param [Gruf::Interceptors::Timer::Result] result
|
144
|
+
# @return [Boolean]
|
145
|
+
#
|
146
|
+
def failure?(result)
|
147
|
+
FAILURE_CLASSES.include?(result.message_class_name)
|
148
|
+
end
|
119
149
|
end
|
120
150
|
end
|
121
151
|
end
|
@@ -42,6 +42,7 @@ module Gruf
|
|
42
42
|
#
|
43
43
|
def send_metrics(result)
|
44
44
|
prometheus_collector.started_total(request: request)
|
45
|
+
prometheus_collector.failed_total(request: request, result: result) unless result.successful?
|
45
46
|
prometheus_collector.handled_total(request: request, result: result)
|
46
47
|
prometheus_collector.handled_latency_seconds(request: request, result: result) if measure_latency?
|
47
48
|
rescue StandardError => e
|
@@ -34,6 +34,7 @@ module Gruf
|
|
34
34
|
def build_metrics
|
35
35
|
metrics = {
|
36
36
|
grpc_server_started_total: PrometheusExporter::Metric::Counter.new('grpc_server_started_total', 'Total number of RPCs started on the server'),
|
37
|
+
grpc_server_failed_total: PrometheusExporter::Metric::Counter.new('grpc_server_failed_total', 'Total number of RPCs failed on the server'),
|
37
38
|
grpc_server_handled_total: PrometheusExporter::Metric::Counter.new('grpc_server_handled_total', 'Total number of RPCs completed on the server, regardless of success or failure')
|
38
39
|
}
|
39
40
|
metrics[:grpc_server_handled_latency_seconds] = PrometheusExporter::Metric::Histogram.new('grpc_server_handled_latency_seconds', 'Histogram of response latency of RPCs handled by the server, in seconds') if measure_latency?
|
@@ -45,6 +46,7 @@ module Gruf
|
|
45
46
|
#
|
46
47
|
def collect_metrics(data: {}, labels: {})
|
47
48
|
metric(:grpc_server_started_total)&.observe(data['grpc_server_started_total'].to_i, labels)
|
49
|
+
metric(:grpc_server_failed_total)&.observe(data['grpc_server_failed_total'].to_i, labels)
|
48
50
|
metric(:grpc_server_handled_total)&.observe(data['grpc_server_handled_total'].to_i, labels)
|
49
51
|
metric(:grpc_server_handled_latency_seconds)&.observe(data['grpc_server_handled_latency_seconds'].to_f, labels) if measure_latency?
|
50
52
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gruf-prometheus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shaun McCormick
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bc-prometheus-ruby
|