prometheus-client 0.9.0 → 0.10.0.pre.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +230 -19
- data/lib/prometheus/client.rb +5 -0
- data/lib/prometheus/client/config.rb +15 -0
- data/lib/prometheus/client/counter.rb +2 -8
- data/lib/prometheus/client/data_stores/README.md +306 -0
- data/lib/prometheus/client/data_stores/direct_file_store.rb +313 -0
- data/lib/prometheus/client/data_stores/single_threaded.rb +58 -0
- data/lib/prometheus/client/data_stores/synchronized.rb +64 -0
- data/lib/prometheus/client/formats/text.rb +8 -14
- data/lib/prometheus/client/gauge.rb +6 -12
- data/lib/prometheus/client/histogram.rb +82 -34
- data/lib/prometheus/client/label_set_validator.rb +17 -13
- data/lib/prometheus/client/metric.rb +41 -22
- data/lib/prometheus/client/registry.rb +27 -9
- data/lib/prometheus/client/summary.rb +26 -35
- data/lib/prometheus/client/version.rb +1 -1
- data/lib/prometheus/middleware/collector.rb +32 -29
- metadata +36 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58aea5e1eb91b3b53326f8c183c65187d5fcd6d87fc908c107f73d65257809aa
|
4
|
+
data.tar.gz: 8d78ea6b70ebc78ed1deba5229b5e2f8b608b11511d8a4278be794dadf945284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07573f45a2555b063ad4987bd26640bfe7388f98e3f1d11e1b21c65ecf7a853aa372c32326fe944ae656efb9d6837171039ee465712f67dc50513c0f62c0baed
|
7
|
+
data.tar.gz: ee75b0ad0b4b2664d3d54bb3bcb29f6fe26d4e6b3801c70db37148343c73cc53b769125ad8fb56ad820d24355a2c862fe3e70832961ee7486a2b748357d205cb
|
data/README.md
CHANGED
@@ -19,12 +19,12 @@ require 'prometheus/client'
|
|
19
19
|
prometheus = Prometheus::Client.registry
|
20
20
|
|
21
21
|
# create a new counter metric
|
22
|
-
http_requests = Prometheus::Client::Counter.new(:http_requests, 'A counter of HTTP requests made')
|
22
|
+
http_requests = Prometheus::Client::Counter.new(:http_requests, docstring: 'A counter of HTTP requests made')
|
23
23
|
# register the metric
|
24
24
|
prometheus.register(http_requests)
|
25
25
|
|
26
26
|
# equivalent helper function
|
27
|
-
http_requests = prometheus.counter(:http_requests, 'A counter of HTTP requests made')
|
27
|
+
http_requests = prometheus.counter(:http_requests, docstring: 'A counter of HTTP requests made')
|
28
28
|
|
29
29
|
# start using the counter
|
30
30
|
http_requests.increment
|
@@ -99,16 +99,16 @@ The following metric types are currently supported.
|
|
99
99
|
Counter is a metric that exposes merely a sum or tally of things.
|
100
100
|
|
101
101
|
```ruby
|
102
|
-
counter = Prometheus::Client::Counter.new(:service_requests_total, '...')
|
102
|
+
counter = Prometheus::Client::Counter.new(:service_requests_total, docstring: '...', labels: [:service])
|
103
103
|
|
104
104
|
# increment the counter for a given label set
|
105
|
-
counter.increment({ service: 'foo' })
|
105
|
+
counter.increment(labels: { service: 'foo' })
|
106
106
|
|
107
107
|
# increment by a given value
|
108
|
-
counter.increment({ service: 'bar' }
|
108
|
+
counter.increment(by: 5, labels: { service: 'bar' })
|
109
109
|
|
110
110
|
# get current value for a given label set
|
111
|
-
counter.get({ service: 'bar' })
|
111
|
+
counter.get(labels: { service: 'bar' })
|
112
112
|
# => 5
|
113
113
|
```
|
114
114
|
|
@@ -118,21 +118,21 @@ Gauge is a metric that exposes merely an instantaneous value or some snapshot
|
|
118
118
|
thereof.
|
119
119
|
|
120
120
|
```ruby
|
121
|
-
gauge = Prometheus::Client::Gauge.new(:room_temperature_celsius, '...')
|
121
|
+
gauge = Prometheus::Client::Gauge.new(:room_temperature_celsius, docstring: '...', labels: [:room])
|
122
122
|
|
123
123
|
# set a value
|
124
|
-
gauge.set({ room: 'kitchen' }
|
124
|
+
gauge.set(21.534, labels: { room: 'kitchen' })
|
125
125
|
|
126
126
|
# retrieve the current value for a given label set
|
127
|
-
gauge.get({ room: 'kitchen' })
|
127
|
+
gauge.get(labels: { room: 'kitchen' })
|
128
128
|
# => 21.534
|
129
129
|
|
130
130
|
# increment the value (default is 1)
|
131
|
-
gauge.increment({ room: 'kitchen' })
|
131
|
+
gauge.increment(labels: { room: 'kitchen' })
|
132
132
|
# => 22.534
|
133
133
|
|
134
134
|
# decrement the value by a given value
|
135
|
-
gauge.decrement({ room: 'kitchen' }
|
135
|
+
gauge.decrement(by: 5, labels: { room: 'kitchen' })
|
136
136
|
# => 17.534
|
137
137
|
```
|
138
138
|
|
@@ -143,13 +143,13 @@ response sizes) and counts them in configurable buckets. It also provides a sum
|
|
143
143
|
of all observed values.
|
144
144
|
|
145
145
|
```ruby
|
146
|
-
histogram = Prometheus::Client::Histogram.new(:service_latency_seconds, '...')
|
146
|
+
histogram = Prometheus::Client::Histogram.new(:service_latency_seconds, docstring: '...', labels: [:service])
|
147
147
|
|
148
148
|
# record a value
|
149
|
-
histogram.observe(
|
149
|
+
histogram.observe(Benchmark.realtime { service.call(arg) }, labels: { service: 'users' })
|
150
150
|
|
151
151
|
# retrieve the current bucket values
|
152
|
-
histogram.get({ service: 'users' })
|
152
|
+
histogram.get(labels: { service: 'users' })
|
153
153
|
# => { 0.005 => 3, 0.01 => 15, 0.025 => 18, ..., 2.5 => 42, 5 => 42, 10 = >42 }
|
154
154
|
```
|
155
155
|
|
@@ -158,17 +158,228 @@ histogram.get({ service: 'users' })
|
|
158
158
|
Summary, similar to histograms, is an accumulator for samples. It captures
|
159
159
|
Numeric data and provides an efficient percentile calculation mechanism.
|
160
160
|
|
161
|
+
For now, only `sum` and `total` (count of observations) are supported, no actual quantiles.
|
162
|
+
|
161
163
|
```ruby
|
162
|
-
summary = Prometheus::Client::Summary.new(:service_latency_seconds, '...')
|
164
|
+
summary = Prometheus::Client::Summary.new(:service_latency_seconds, docstring: '...', labels: [:service])
|
163
165
|
|
164
166
|
# record a value
|
165
|
-
summary.observe(
|
167
|
+
summary.observe(Benchmark.realtime { service.call() }, labels: { service: 'database' })
|
168
|
+
|
169
|
+
# retrieve the current sum and total values
|
170
|
+
summary_value = summary.get(labels: { service: 'database' })
|
171
|
+
summary_value.sum # => 123.45
|
172
|
+
summary_value.count # => 100
|
173
|
+
```
|
174
|
+
|
175
|
+
## Labels
|
176
|
+
|
177
|
+
All metrics can have labels, allowing grouping of related time series.
|
178
|
+
|
179
|
+
Labels are an extremely powerful feature, but one that must be used with care.
|
180
|
+
Refer to the best practices on [naming](https://prometheus.io/docs/practices/naming/) and
|
181
|
+
[labels](https://prometheus.io/docs/practices/instrumentation/#use-labels).
|
182
|
+
|
183
|
+
Most importantly, avoid labels that can have a large number of possible values (high
|
184
|
+
cardinality). For example, an HTTP Status Code is a good label. A User ID is **not**.
|
185
|
+
|
186
|
+
Labels are specified optionally when updating metrics, as a hash of `label_name => value`.
|
187
|
+
Refer to [the Prometheus documentation](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels)
|
188
|
+
as to what's a valid `label_name`.
|
189
|
+
|
190
|
+
In order for a metric to accept labels, their names must be specified when first initializing
|
191
|
+
the metric. Then, when the metric is updated, all the specified labels must be present.
|
192
|
+
|
193
|
+
Example:
|
194
|
+
|
195
|
+
```ruby
|
196
|
+
https_requests_total = Counter.new(:http_requests_total, docstring: '...', labels: [:service, :status_code])
|
166
197
|
|
167
|
-
#
|
168
|
-
|
169
|
-
|
198
|
+
# increment the counter for a given label set
|
199
|
+
https_requests_total.increment(labels: { service: "my_service", status_code: response.status_code })
|
200
|
+
```
|
201
|
+
|
202
|
+
### Pre-set Label Values
|
203
|
+
|
204
|
+
You can also "pre-set" some of these label values, if they'll always be the same, so you don't
|
205
|
+
need to specify them every time:
|
206
|
+
|
207
|
+
```ruby
|
208
|
+
https_requests_total = Counter.new(:http_requests_total,
|
209
|
+
docstring: '...',
|
210
|
+
labels: [:service, :status_code],
|
211
|
+
preset_labels: { service: "my_service" })
|
212
|
+
|
213
|
+
# increment the counter for a given label set
|
214
|
+
https_requests_total.increment(labels: { status_code: response.status_code })
|
170
215
|
```
|
171
216
|
|
217
|
+
### `with_labels`
|
218
|
+
|
219
|
+
Similar to pre-setting labels, you can get a new instance of an existing metric object,
|
220
|
+
with a subset (or full set) of labels set, so that you can increment / observe the metric
|
221
|
+
without having to specify the labels for every call.
|
222
|
+
|
223
|
+
Moreover, if all the labels the metric can take have been pre-set, validation of the labels
|
224
|
+
is done on the call to `with_labels`, and then skipped for each observation, which can
|
225
|
+
lead to performance improvements. If you are incrementing a counter in a fast loop, you
|
226
|
+
definitely want to be doing this.
|
227
|
+
|
228
|
+
|
229
|
+
Examples:
|
230
|
+
|
231
|
+
**Pre-setting labels for ease of use:**
|
232
|
+
|
233
|
+
```ruby
|
234
|
+
# in the metric definition:
|
235
|
+
records_processed_total = registry.counter.new(:records_processed_total,
|
236
|
+
docstring: '...',
|
237
|
+
labels: [:service, :component],
|
238
|
+
preset_labels: { service: "my_service" })
|
239
|
+
|
240
|
+
# in one-off calls, you'd specify the missing labels (component in this case)
|
241
|
+
records_processed_total.increment(labels: { component: 'a_component' })
|
242
|
+
|
243
|
+
# you can also have a "view" on this metric for a specific component where this label is
|
244
|
+
# pre-set:
|
245
|
+
class MyComponent
|
246
|
+
def metric
|
247
|
+
@metric ||= records_processed_total.with_labels(component: "my_component")
|
248
|
+
end
|
249
|
+
|
250
|
+
def process
|
251
|
+
records.each do |record|
|
252
|
+
# process the record
|
253
|
+
metric.increment
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
```
|
258
|
+
|
259
|
+
|
260
|
+
## Data Stores
|
261
|
+
|
262
|
+
The data for all the metrics (the internal counters associated with each labelset)
|
263
|
+
is stored in a global Data Store object, rather than in the metric objects themselves.
|
264
|
+
(This "storage" is ephemeral, generally in-memory, it's not "long-term storage")
|
265
|
+
|
266
|
+
The main reason to do this is that different applications may have different requirements
|
267
|
+
for their metrics storage. Application running in pre-fork servers (like Unicorn, for
|
268
|
+
example), require a shared store between all the processes, to be able to report coherent
|
269
|
+
numbers. At the same time, other applications may not have this requirement but be very
|
270
|
+
sensitive to performance, and would prefer instead a simpler, faster store.
|
271
|
+
|
272
|
+
By having a standardized and simple interface that metrics use to access this store,
|
273
|
+
we abstract away the details of storing the data from the specific needs of each metric.
|
274
|
+
This allows us to then simply swap around the stores based on the needs of different
|
275
|
+
applications, with no changes to the rest of the client.
|
276
|
+
|
277
|
+
The client provides 3 built-in stores, but if neither of these is ideal for your
|
278
|
+
requirements, you can easily make your own store and use that instead. More on this below.
|
279
|
+
|
280
|
+
### Configuring which store to use.
|
281
|
+
|
282
|
+
By default, the Client uses the `Synchronized` store, which is a simple, thread-safe Store
|
283
|
+
for single-process scenarios.
|
284
|
+
|
285
|
+
If you need to use a different store, set it in the Client Config:
|
286
|
+
|
287
|
+
```ruby
|
288
|
+
Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DataStore.new(store_specific_params)
|
289
|
+
```
|
290
|
+
|
291
|
+
NOTE: You **must** make sure to set the `data_store` before initializing any metrics.
|
292
|
+
If using Rails, you probably want to set up your Data Store on `config/application.rb`,
|
293
|
+
or `config/environments/*`, both of which run before `config/initializers/*`
|
294
|
+
|
295
|
+
Also note that `config.data_store` is set to an *instance* of a `DataStore`, not to the
|
296
|
+
class. This is so that the stores can receive parameters. Most of the built-in stores
|
297
|
+
don't require any, but `DirectFileStore` does, for example.
|
298
|
+
|
299
|
+
When instantiating metrics, there is an optional `store_settings` attribute. This is used
|
300
|
+
to set up store-specific settings for each metric. For most stores, this is not used, but
|
301
|
+
for multi-process stores, this is used to specify how to aggregate the values of each
|
302
|
+
metric across multiple processes. For the most part, this is used for Gauges, to specify
|
303
|
+
whether you want to report the `SUM`, `MAX` or `MIN` value observed across all processes.
|
304
|
+
For almost all other cases, you'd leave the default (`SUM`). More on this on the
|
305
|
+
*Aggregation* section below.
|
306
|
+
|
307
|
+
Other custom stores may also accept extra parameters besides `:aggregation`. See the
|
308
|
+
documentation of each store for more details.
|
309
|
+
|
310
|
+
### Built-in stores
|
311
|
+
|
312
|
+
There are 3 built-in stores, with different trade-offs:
|
313
|
+
|
314
|
+
- **Synchronized**: Default store. Thread safe, but not suitable for multi-process
|
315
|
+
scenarios (e.g. pre-fork servers, like Unicorn). Stores data in Hashes, with all accesses
|
316
|
+
protected by Mutexes.
|
317
|
+
- **SingleThreaded**: Fastest store, but only suitable for single-threaded scenarios.
|
318
|
+
This store does not make any effort to synchronize access to its internal hashes, so
|
319
|
+
it's absolutely not thread safe.
|
320
|
+
- **DirectFileStore**: Stores data in binary files, one file per process and per metric.
|
321
|
+
This is generally the recommended store to use with pre-fork servers and other
|
322
|
+
"multi-process" scenarios.
|
323
|
+
|
324
|
+
Each metric gets a file for each process, and manages its contents by storing keys and
|
325
|
+
binary floats next to them, and updating the offsets of those Floats directly. When
|
326
|
+
exporting metrics, it will find all the files that apply to each metric, read them,
|
327
|
+
and aggregate them.
|
328
|
+
|
329
|
+
In order to do this, each Metric needs an `:aggregation` setting, specifying how
|
330
|
+
to aggregate the multiple possible values we can get for each labelset. By default,
|
331
|
+
they are `SUM`med, which is what most use-cases call for (counters and histograms,
|
332
|
+
for example). However, for Gauges, it's possible to set `MAX` or `MIN` as aggregation,
|
333
|
+
to get the highest/lowest value of all the processes / threads.
|
334
|
+
|
335
|
+
Even though this store saves data on disk, it's still much faster than would probably be
|
336
|
+
expected, because the files are never actually `fsync`ed, so the store never blocks
|
337
|
+
while waiting for disk. The kernel's page cache is incredibly efficient in this regard.
|
338
|
+
|
339
|
+
If in doubt, check the benchmark scripts described in the documentation for creating
|
340
|
+
your own stores and run them in your particular runtime environment to make sure this
|
341
|
+
provides adequate performance.
|
342
|
+
|
343
|
+
### Building your own store, and stores other than the built-in ones.
|
344
|
+
|
345
|
+
If none of these stores is suitable for your requirements, you can easily make your own.
|
346
|
+
|
347
|
+
The interface and requirements of Stores are specified in detail in the `README.md`
|
348
|
+
in the `client/data_stores` directory. This thoroughly documents how to make your own
|
349
|
+
store.
|
350
|
+
|
351
|
+
There are also links there to non-built-in stores created by others that may be useful,
|
352
|
+
either as they are, or as a starting point for making your own.
|
353
|
+
|
354
|
+
### Aggregation settings for multi-process stores
|
355
|
+
|
356
|
+
If you are in a multi-process environment (such as pre-fork servers like Unicorn), each
|
357
|
+
process will probably keep their own counters, which need to be aggregated when receiving
|
358
|
+
a Prometheus scrape, to report coherent total numbers.
|
359
|
+
|
360
|
+
For Counters and Histograms (and quantile-less Summaries), this is simply a matter of
|
361
|
+
summing the values of each process.
|
362
|
+
|
363
|
+
For Gauges, however, this may not be the right thing to do, depending on what they're
|
364
|
+
measuring. You might want to take the maximum or minimum value observed in any process,
|
365
|
+
rather than the sum of all of them.
|
366
|
+
|
367
|
+
In those cases, you should use the `store_settings` parameter when registering the
|
368
|
+
metric, to specify an `:aggregation` setting.
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
free_disk_space = registry.gauge(:free_disk_space_bytes,
|
372
|
+
docstring: "Free disk space, in bytes",
|
373
|
+
store_settings: { aggregation: :max })
|
374
|
+
```
|
375
|
+
|
376
|
+
NOTE: This will only work if the store you're using supports the `:aggregation` setting.
|
377
|
+
Of the built-in stores, only `DirectFileStore` does.
|
378
|
+
|
379
|
+
Also note that the `:aggregation` setting works for all metric types, not just for gauges.
|
380
|
+
It would be unusual to use it for anything other than gauges, but if your use-case
|
381
|
+
requires it, the store will respect your aggregation wishes.
|
382
|
+
|
172
383
|
## Tests
|
173
384
|
|
174
385
|
Install necessary development gems with `bundle install` and run tests with
|
data/lib/prometheus/client.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
require 'prometheus/client/registry'
|
4
|
+
require 'prometheus/client/config'
|
4
5
|
|
5
6
|
module Prometheus
|
6
7
|
# Client is a ruby implementation for a Prometheus compatible client.
|
@@ -9,5 +10,9 @@ module Prometheus
|
|
9
10
|
def self.registry
|
10
11
|
@registry ||= Registry.new
|
11
12
|
end
|
13
|
+
|
14
|
+
def self.config
|
15
|
+
@config ||= Config.new
|
16
|
+
end
|
12
17
|
end
|
13
18
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'prometheus/client/data_stores/synchronized'
|
4
|
+
|
5
|
+
module Prometheus
|
6
|
+
module Client
|
7
|
+
class Config
|
8
|
+
attr_accessor :data_store
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@data_store = Prometheus::Client::DataStores::Synchronized.new
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -10,17 +10,11 @@ module Prometheus
|
|
10
10
|
:counter
|
11
11
|
end
|
12
12
|
|
13
|
-
def increment(labels
|
13
|
+
def increment(by: 1, labels: {})
|
14
14
|
raise ArgumentError, 'increment must be a non-negative number' if by < 0
|
15
15
|
|
16
16
|
label_set = label_set_for(labels)
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
private
|
21
|
-
|
22
|
-
def default
|
23
|
-
0.0
|
17
|
+
@store.increment(labels: label_set, by: by)
|
24
18
|
end
|
25
19
|
end
|
26
20
|
end
|
@@ -0,0 +1,306 @@
|
|
1
|
+
# Custom Data Stores
|
2
|
+
|
3
|
+
Stores are basically an abstraction over a Hash, whose keys are in turn a Hash of labels
|
4
|
+
plus a metric name. The intention behind having different data stores is solving
|
5
|
+
different requirements for different production scenarios, or performance trade-offs.
|
6
|
+
|
7
|
+
The most common of these scenarios are pre-fork servers like Unicorn, which have multiple
|
8
|
+
separate processes gathering metrics. If each of these had their own store, the metrics
|
9
|
+
reported on each Prometheus scrape would be different, depending on which process handles
|
10
|
+
the request. Solving this requires some sort of shared storage between these processes,
|
11
|
+
and there are many ways to solve this problem, each with their own trade-offs.
|
12
|
+
|
13
|
+
This abstraction allows us to easily plug in the most adequate store for each scenario.
|
14
|
+
|
15
|
+
## Interface
|
16
|
+
|
17
|
+
`Store` exposes a `for_metric` method, which returns a store-specific and metric-specific
|
18
|
+
`MetricStore` object, which represents a "view" onto the actual internal storage for one
|
19
|
+
particular metric. Each metric / collector object will have a references to this
|
20
|
+
`MetricStore` and interact with it directly.
|
21
|
+
|
22
|
+
The `MetricStore` class must expose `synchronize`, `set`, `increment`, `get` and `all_values`
|
23
|
+
methods, which are explained in the code sample below. Its initializer should be called
|
24
|
+
only by `Store#for_metric`, not directly.
|
25
|
+
|
26
|
+
All values stored are `Float`s.
|
27
|
+
|
28
|
+
Internally, a `Store` can store the data however it needs to, based on its requirements.
|
29
|
+
For example, a store that needs to work in a multi-process environment needs to have a
|
30
|
+
shared section of memory, via either Files, an MMap, an external database, or whatever the
|
31
|
+
implementor chooses for their particular use case.
|
32
|
+
|
33
|
+
Each `Store` / `MetricStore` will also choose how to divide responsibilities over the
|
34
|
+
storage of values. For some use cases, each `MetricStore` may have their own individual
|
35
|
+
storage, whereas for others, the `Store` may own a central storage, and `MetricStore`
|
36
|
+
objects will access it through the `Store`. This depends on the design choices of each `Store`.
|
37
|
+
|
38
|
+
`Store` and `MetricStore` MUST be thread safe. This applies not only to operations on
|
39
|
+
stored values (`set`, `increment`), but `MetricStore` must also expose a `synchronize`
|
40
|
+
method that would allow a Metric to increment multiple values atomically (Histograms need
|
41
|
+
this, for example).
|
42
|
+
|
43
|
+
Ideally, multiple keys should be modifiable simultaneously, but this is not a
|
44
|
+
hard requirement.
|
45
|
+
|
46
|
+
This is what the interface looks like, in practice:
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
module Prometheus
|
50
|
+
module Client
|
51
|
+
module DataStores
|
52
|
+
class CustomStore
|
53
|
+
|
54
|
+
# Return a MetricStore, which provides a view of the internal data store,
|
55
|
+
# catering specifically to that metric.
|
56
|
+
#
|
57
|
+
# - `metric_settings` specifies configuration parameters for this metric
|
58
|
+
# specifically. These may or may not be necessary, depending on each specific
|
59
|
+
# store and metric. The most obvious example of this is for gauges in
|
60
|
+
# multi-process environments, where the developer needs to choose how those
|
61
|
+
# gauges will get aggregated between all the per-process values.
|
62
|
+
#
|
63
|
+
# The settings that the store will accept, and what it will do with them, are
|
64
|
+
# 100% Store-specific. Each store should document what settings it will accept
|
65
|
+
# and how to use them, so the developer using that store can pass the appropriate
|
66
|
+
# instantiating the Store itself, and the Metrics they declare.
|
67
|
+
#
|
68
|
+
# - `metric_type` is specified in case a store wants to validate that the settings
|
69
|
+
# are valid for the metric being set up. It may go unused by most Stores
|
70
|
+
#
|
71
|
+
# Even if your store doesn't need these two parameters, the Store must expose them
|
72
|
+
# to make them swappable.
|
73
|
+
def for_metric(metric_name, metric_type:, metric_settings: {})
|
74
|
+
# Generally, here a Store would validate that the settings passed in are valid,
|
75
|
+
# and raise if they aren't.
|
76
|
+
validate_metric_settings(metric_type: metric_type,
|
77
|
+
metric_settings: metric_settings)
|
78
|
+
MetricStore.new(store: self,
|
79
|
+
metric_name: metric_name,
|
80
|
+
metric_type: metric_type,
|
81
|
+
metric_settings: metric_settings)
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
# MetricStore manages the data for one specific metric. It's generally a view onto
|
86
|
+
# the central store shared by all metrics, but it could also hold the data itself
|
87
|
+
# if that's better for the specific scenario
|
88
|
+
class MetricStore
|
89
|
+
# This constructor is internal to this store, so the signature doesn't need to
|
90
|
+
# be this. No one other than the Store should be creating MetricStores
|
91
|
+
def initialize(store:, metric_name:, metric_type:, metric_settings:)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Metrics may need to modify multiple values at once (Histograms do this, for
|
95
|
+
# example). MetricStore needs to provide a way to synchronize those, in addition
|
96
|
+
# to all of the value modifications being thread-safe without a need for simple
|
97
|
+
# Metrics to call `synchronize`
|
98
|
+
def synchronize
|
99
|
+
raise NotImplementedError
|
100
|
+
end
|
101
|
+
|
102
|
+
# Store a value for this metric and a set of labels
|
103
|
+
# Internally, may add extra "labels" to disambiguate values between,
|
104
|
+
# for example, different processes
|
105
|
+
def set(labels:, val:)
|
106
|
+
raise NotImplementedError
|
107
|
+
end
|
108
|
+
|
109
|
+
def increment(labels:, by: 1)
|
110
|
+
raise NotImplementedError
|
111
|
+
end
|
112
|
+
|
113
|
+
# Return a value for a set of labels
|
114
|
+
# Will return the same value stored by `set`, as opposed to `all_values`, which
|
115
|
+
# may aggregate multiple values.
|
116
|
+
#
|
117
|
+
# For example, in a multi-process scenario, `set` may add an extra internal
|
118
|
+
# label tagging the value with the process id. `get` will return the value for
|
119
|
+
# "this" process ID. `all_values` will return an aggregated value for all
|
120
|
+
# process IDs.
|
121
|
+
def get(labels:)
|
122
|
+
raise NotImplementedError
|
123
|
+
end
|
124
|
+
|
125
|
+
# Returns all the sets of labels seen by the Store, and the aggregated value for
|
126
|
+
# each.
|
127
|
+
#
|
128
|
+
# In some cases, this is just a matter of returning the stored value.
|
129
|
+
#
|
130
|
+
# In other cases, the store may need to aggregate multiple values for the same
|
131
|
+
# set of labels. For example, in a multiple process it may need to `sum` the
|
132
|
+
# values of counters from each process. Or for `gauges`, it may need to take the
|
133
|
+
# `max`. This is generally specified in `metric_settings` when calling
|
134
|
+
# `Store#for_metric`.
|
135
|
+
def all_values
|
136
|
+
raise NotImplementedError
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
```
|
144
|
+
|
145
|
+
## Conventions
|
146
|
+
|
147
|
+
- Your store MAY require or accept extra settings for each metric on the call to `for_metric`.
|
148
|
+
- You SHOULD validate these parameters to make sure they are correct, and raise if they aren't.
|
149
|
+
- If your store needs to aggregate multiple values for the same metric (for example, in
|
150
|
+
a multi-process scenario), you MUST accept a setting to define how values are aggregated.
|
151
|
+
- This setting MUST be called `:aggregation`
|
152
|
+
- It MUST support, at least, `:sum`, `:max` and `:min`.
|
153
|
+
- It MAY support other aggregation modes that may apply to your requirements.
|
154
|
+
- It MUST default to `:sum`
|
155
|
+
|
156
|
+
## Testing your Store
|
157
|
+
|
158
|
+
In order to make it easier to test your store, the basic functionality is tested using
|
159
|
+
`shared_examples`:
|
160
|
+
|
161
|
+
`it_behaves_like Prometheus::Client::DataStores`
|
162
|
+
|
163
|
+
Follow the simple structure in `synchronized_spec.rb` for a starting point.
|
164
|
+
|
165
|
+
Note that if your store stores data somewhere other than in-memory (in files, Redis,
|
166
|
+
databases, etc), you will need to do cleanup between tests in a `before` block.
|
167
|
+
|
168
|
+
The tests for `DirectFileStore` have a good example at the top of the file. This file also
|
169
|
+
has some examples on testing multi-process stores, checking that aggregation between
|
170
|
+
processes works correctly.
|
171
|
+
|
172
|
+
## Benchmarking your custom data store
|
173
|
+
|
174
|
+
If you are developing your own data store, you probably want to benchmark it to see how
|
175
|
+
it compares to the built-in ones, and to make sure it achieves the performance you want.
|
176
|
+
|
177
|
+
The Prometheus Ruby Client includes some benchmarks (in the `spec/benchmarks` directory)
|
178
|
+
to help you with this, and also with validating that your store works correctly.
|
179
|
+
|
180
|
+
The `README` in that directory contains more information what these benchmarks are for,
|
181
|
+
and how to use them.
|
182
|
+
|
183
|
+
## Extra Stores and Research
|
184
|
+
|
185
|
+
In the process of abstracting stores away, and creating the built-in ones, GoCardless
|
186
|
+
has created a good amount of research, benchmarks, and experimental stores, which
|
187
|
+
weren't useful to include in this repo, but may be a useful resource or starting point
|
188
|
+
if you are building your own store.
|
189
|
+
|
190
|
+
Check out the [GoCardless Data Stores Experiments](gocardless/prometheus-client-ruby-data-stores-experiments)
|
191
|
+
repository for these.
|
192
|
+
|
193
|
+
## Sample, imaginary multi-process Data Store
|
194
|
+
|
195
|
+
This is just an example of how one could implement a data store, and a clarification on
|
196
|
+
the "aggregation" point
|
197
|
+
|
198
|
+
Important: This is a **toy example**, intended simply to show how this could work / how to
|
199
|
+
implement these interfaces.
|
200
|
+
|
201
|
+
There are some key pieces of code missing, which are fairly uninteresting, this only shows
|
202
|
+
the parts that illustrate the idea of storing multiple different values, and aggregating
|
203
|
+
them
|
204
|
+
|
205
|
+
```ruby
|
206
|
+
module Prometheus
|
207
|
+
module Client
|
208
|
+
module DataStores
|
209
|
+
# Stores all the data in a magic data structure that keeps cross-process data, in a
|
210
|
+
# way that all processes can read it, but each can write only to their own set of
|
211
|
+
# keys.
|
212
|
+
# It doesn't care how that works, this is not an actual solution to anything,
|
213
|
+
# just an example of how the interface would work with something like that.
|
214
|
+
#
|
215
|
+
# Metric Settings have one possible key, `aggregation`, which must be one of
|
216
|
+
# `AGGREGATION_MODES`
|
217
|
+
class SampleMagicMultiprocessStore
|
218
|
+
AGGREGATION_MODES = [MAX = :max, MIN = :min, SUM = :sum]
|
219
|
+
DEFAULT_METRIC_SETTINGS = { aggregation: SUM }
|
220
|
+
|
221
|
+
def initialize
|
222
|
+
@internal_store = MagicHashSharedBetweenProcesses.new # PStore, for example
|
223
|
+
end
|
224
|
+
|
225
|
+
def for_metric(metric_name, metric_type:, metric_settings: {})
|
226
|
+
settings = DEFAULT_METRIC_SETTINGS.merge(metric_settings)
|
227
|
+
validate_metric_settings(metric_settings: settings)
|
228
|
+
MetricStore.new(store: self,
|
229
|
+
metric_name: metric_name,
|
230
|
+
metric_type: metric_type,
|
231
|
+
metric_settings: settings)
|
232
|
+
end
|
233
|
+
|
234
|
+
private
|
235
|
+
|
236
|
+
def validate_metric_settings(metric_settings:)
|
237
|
+
raise unless metric_settings.has_key?(:aggregation)
|
238
|
+
raise unless metric_settings[:aggregation].in?(AGGREGATION_MODES)
|
239
|
+
end
|
240
|
+
|
241
|
+
class MetricStore
|
242
|
+
def initialize(store:, metric_name:, metric_type:, metric_settings:)
|
243
|
+
@store = store
|
244
|
+
@internal_store = store.internal_store
|
245
|
+
@metric_name = metric_name
|
246
|
+
@aggregation_mode = metric_settings[:aggregation]
|
247
|
+
end
|
248
|
+
|
249
|
+
def set(labels:, val:)
|
250
|
+
@internal_store[store_key(labels)] = val.to_f
|
251
|
+
end
|
252
|
+
|
253
|
+
def get(labels:)
|
254
|
+
@internal_store[store_key(labels)]
|
255
|
+
end
|
256
|
+
|
257
|
+
def all_values
|
258
|
+
non_aggregated_values = all_store_values.each_with_object({}) do |(labels, v), acc|
|
259
|
+
if labels["__metric_name"] == @metric_name
|
260
|
+
label_set = labels.reject { |k,_| k.in?("__metric_name", "__pid") }
|
261
|
+
acc[label_set] ||= []
|
262
|
+
acc[label_set] << v
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# Aggregate all the different values for each label_set
|
267
|
+
non_aggregated_values.each_with_object({}) do |(label_set, values), acc|
|
268
|
+
acc[label_set] = aggregate(values)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
private
|
273
|
+
|
274
|
+
def all_store_values
|
275
|
+
# This assumes there's a something common that all processes can write to, and
|
276
|
+
# it's magically synchronized (which is not true of a PStore, for example, but
|
277
|
+
# would of some sort of external data store like Redis, Memcached, SQLite)
|
278
|
+
|
279
|
+
# This could also have some sort of:
|
280
|
+
# file_list = Dir.glob(File.join(path, '*.db')).sort
|
281
|
+
# which reads all the PStore files / MMapped files, etc, and returns a hash
|
282
|
+
# with all of them together, which then `values` and `label_sets` can use
|
283
|
+
end
|
284
|
+
|
285
|
+
# This method holds most of the key to how this Store works. Adding `_pid` as
|
286
|
+
# one of the labels, we hold each process's value separately, which we can
|
287
|
+
# aggregate later
|
288
|
+
def store_key(labels)
|
289
|
+
labels.merge(
|
290
|
+
{
|
291
|
+
"__metric_name" => @metric_name,
|
292
|
+
"__pid" => Process.pid
|
293
|
+
}
|
294
|
+
)
|
295
|
+
end
|
296
|
+
|
297
|
+
def aggregate(values)
|
298
|
+
# This is a horrible way to do this, just illustrating the point
|
299
|
+
values.send(@aggregation_mode)
|
300
|
+
end
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|
306
|
+
```
|