prometheus_exporter 0.7.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +298 -35
  3. data/README.md +276 -53
  4. data/{bin → exe}/prometheus_exporter +20 -7
  5. data/lib/prometheus_exporter/client.rb +41 -32
  6. data/lib/prometheus_exporter/instrumentation/active_record.rb +29 -35
  7. data/lib/prometheus_exporter/instrumentation/delayed_job.rb +28 -13
  8. data/lib/prometheus_exporter/instrumentation/good_job.rb +28 -0
  9. data/lib/prometheus_exporter/instrumentation/hutch.rb +1 -1
  10. data/lib/prometheus_exporter/instrumentation/method_profiler.rb +67 -27
  11. data/lib/prometheus_exporter/instrumentation/periodic_stats.rb +54 -0
  12. data/lib/prometheus_exporter/instrumentation/process.rb +25 -27
  13. data/lib/prometheus_exporter/instrumentation/puma.rb +36 -27
  14. data/lib/prometheus_exporter/instrumentation/resque.rb +33 -0
  15. data/lib/prometheus_exporter/instrumentation/shoryuken.rb +6 -7
  16. data/lib/prometheus_exporter/instrumentation/sidekiq.rb +51 -23
  17. data/lib/prometheus_exporter/instrumentation/sidekiq_process.rb +45 -0
  18. data/lib/prometheus_exporter/instrumentation/sidekiq_queue.rb +38 -33
  19. data/lib/prometheus_exporter/instrumentation/sidekiq_stats.rb +32 -0
  20. data/lib/prometheus_exporter/instrumentation/unicorn.rb +12 -17
  21. data/lib/prometheus_exporter/instrumentation.rb +5 -0
  22. data/lib/prometheus_exporter/metric/base.rb +20 -17
  23. data/lib/prometheus_exporter/metric/counter.rb +1 -3
  24. data/lib/prometheus_exporter/metric/gauge.rb +6 -6
  25. data/lib/prometheus_exporter/metric/histogram.rb +15 -5
  26. data/lib/prometheus_exporter/metric/summary.rb +5 -14
  27. data/lib/prometheus_exporter/middleware.rb +72 -38
  28. data/lib/prometheus_exporter/server/active_record_collector.rb +16 -14
  29. data/lib/prometheus_exporter/server/collector.rb +29 -17
  30. data/lib/prometheus_exporter/server/collector_base.rb +0 -2
  31. data/lib/prometheus_exporter/server/delayed_job_collector.rb +76 -33
  32. data/lib/prometheus_exporter/server/good_job_collector.rb +52 -0
  33. data/lib/prometheus_exporter/server/hutch_collector.rb +19 -11
  34. data/lib/prometheus_exporter/server/metrics_container.rb +66 -0
  35. data/lib/prometheus_exporter/server/process_collector.rb +15 -14
  36. data/lib/prometheus_exporter/server/puma_collector.rb +21 -18
  37. data/lib/prometheus_exporter/server/resque_collector.rb +50 -0
  38. data/lib/prometheus_exporter/server/runner.rb +49 -13
  39. data/lib/prometheus_exporter/server/shoryuken_collector.rb +22 -17
  40. data/lib/prometheus_exporter/server/sidekiq_collector.rb +22 -14
  41. data/lib/prometheus_exporter/server/sidekiq_process_collector.rb +47 -0
  42. data/lib/prometheus_exporter/server/sidekiq_queue_collector.rb +12 -12
  43. data/lib/prometheus_exporter/server/sidekiq_stats_collector.rb +49 -0
  44. data/lib/prometheus_exporter/server/type_collector.rb +2 -0
  45. data/lib/prometheus_exporter/server/unicorn_collector.rb +32 -33
  46. data/lib/prometheus_exporter/server/web_collector.rb +48 -31
  47. data/lib/prometheus_exporter/server/web_server.rb +70 -48
  48. data/lib/prometheus_exporter/server.rb +4 -0
  49. data/lib/prometheus_exporter/version.rb +1 -1
  50. data/lib/prometheus_exporter.rb +12 -13
  51. metadata +19 -206
  52. data/.github/workflows/ci.yml +0 -42
  53. data/.gitignore +0 -13
  54. data/.rubocop.yml +0 -7
  55. data/Appraisals +0 -10
  56. data/CODE_OF_CONDUCT.md +0 -74
  57. data/Gemfile +0 -8
  58. data/Guardfile +0 -8
  59. data/Rakefile +0 -12
  60. data/bench/bench.rb +0 -45
  61. data/examples/custom_collector.rb +0 -27
  62. data/gemfiles/.bundle/config +0 -2
  63. data/gemfiles/ar_60.gemfile +0 -5
  64. data/gemfiles/ar_61.gemfile +0 -7
  65. data/prometheus_exporter.gemspec +0 -46
data/README.md CHANGED
@@ -5,6 +5,7 @@ Prometheus Exporter allows you to aggregate custom metrics from multiple process
5
5
  To learn more see [Instrumenting Rails with Prometheus](https://samsaffron.com/archive/2018/02/02/instrumenting-rails-with-prometheus) (it has pretty pictures!)
6
6
 
7
7
  * [Requirements](#requirements)
8
+ * [Migrating from v0.x](#migrating-from-v0x)
8
9
  * [Installation](#installation)
9
10
  * [Usage](#usage)
10
11
  * [Single process mode](#single-process-mode)
@@ -19,21 +20,33 @@ To learn more see [Instrumenting Rails with Prometheus](https://samsaffron.com/a
19
20
  * [Hutch metrics](#hutch-message-processing-tracer)
20
21
  * [Puma metrics](#puma-metrics)
21
22
  * [Unicorn metrics](#unicorn-process-metrics)
23
+ * [Resque metrics](#resque-metrics)
24
+ * [GoodJob metrics](#goodjob-metrics)
22
25
  * [Custom type collectors](#custom-type-collectors)
23
26
  * [Multi process mode with custom collector](#multi-process-mode-with-custom-collector)
24
27
  * [GraphQL support](#graphql-support)
25
28
  * [Metrics default prefix / labels](#metrics-default-prefix--labels)
26
29
  * [Client default labels](#client-default-labels)
27
30
  * [Client default host](#client-default-host)
31
+ * [Histogram mode](#histogram-mode)
32
+ * [Histogram - custom buckets](#histogram---custom-buckets)
28
33
  * [Transport concerns](#transport-concerns)
29
34
  * [JSON generation and parsing](#json-generation-and-parsing)
35
+ * [Logging](#logging)
36
+ * [Docker Usage](#docker-usage)
30
37
  * [Contributing](#contributing)
31
38
  * [License](#license)
32
39
  * [Code of Conduct](#code-of-conduct)
33
40
 
34
41
  ## Requirements
35
42
 
36
- Minimum Ruby of version 2.5.0 is required, Ruby 2.4.0 is EOL as of 2020-04-05
43
+ Minimum Ruby of version 3.0.0 is required, Ruby 2.7 is EOL as of March 31st 2023.
44
+
45
+ ## Migrating from v0.x
46
+
47
+ There are some major changes in v1.x from v0.x.
48
+
49
+ - Some of metrics are renamed to match [prometheus official guide for metric names](https://prometheus.io/docs/practices/naming/#metric-names). (#184)
37
50
 
38
51
  ## Installation
39
52
 
@@ -85,8 +98,8 @@ server.collector.register_metric(counter)
85
98
  server.collector.register_metric(summary)
86
99
  server.collector.register_metric(histogram)
87
100
 
88
- gauge.observe(get_rss)
89
- gauge.observe(get_rss)
101
+ gauge.observe(server.get_rss)
102
+ gauge.observe(server.get_rss)
90
103
 
91
104
  counter.observe(1, route: 'test/route')
92
105
  counter.observe(1, route: 'another/route')
@@ -176,7 +189,7 @@ gem 'prometheus_exporter'
176
189
  In an initializer:
177
190
 
178
191
  ```ruby
179
- unless Rails.env == "test"
192
+ unless Rails.env.test?
180
193
  require 'prometheus_exporter/middleware'
181
194
 
182
195
  # This reports stats per request like HTTP status and timings
@@ -190,15 +203,24 @@ Ensure you run the exporter in a monitored background process:
190
203
  $ bundle exec prometheus_exporter
191
204
  ```
192
205
 
206
+ #### Choosing the style of method patching
207
+
208
+ By default, `prometheus_exporter` uses `alias_method` to instrument methods used by SQL and Redis as it is the fastest approach (see [this article](https://samsaffron.com/archive/2017/10/18/fastest-way-to-profile-a-method-in-ruby)). You may desire to add additional instrumentation libraries beyond `prometheus_exporter` to your app. This can become problematic if these other libraries instead use `prepend` to instrument methods. To resolve this, you can tell the middleware to instrument using `prepend` by passing an `instrument` option like so:
209
+
210
+ ```ruby
211
+ Rails.application.middleware.unshift PrometheusExporter::Middleware, instrument: :prepend
212
+ ```
213
+
193
214
  #### Metrics collected by Rails integration middleware
194
215
 
195
- | Type | Name | Description |
196
- | --- | --- | --- |
197
- | Counter | `http_requests_total` | Total HTTP requests from web app |
198
- | Summary | `http_duration_seconds` | Time spent in HTTP reqs in seconds |
199
- | Summary | `http_redis_duration_seconds | Time spent in HTTP reqs in Redis, in seconds |
200
- | Summary | `http_sql_duration_seconds | Time spent in HTTP reqs in SQL in seconds |
201
- | Summary | `http_queue_duration_seconds | Time spent queueing the request in load balancer in seconds |
216
+ | Type | Name | Description |
217
+ | --- | --- | --- |
218
+ | Counter | `http_requests_total` | Total HTTP requests from web app |
219
+ | Summary | `http_request_duration_seconds` | Time spent in HTTP reqs in seconds |
220
+ | Summary | `http_request_redis_duration_seconds | Time spent in HTTP reqs in Redis, in seconds |
221
+ | Summary | `http_request_sql_duration_seconds | Time spent in HTTP reqs in SQL in seconds |
222
+ | Summary | `http_request_queue_duration_seconds | Time spent queueing the request in load balancer in seconds |
223
+ | Summary | `http_request_memcache_duration_seconds`⁴ | Time spent in HTTP reqs in Memcache in seconds |
202
224
 
203
225
  All metrics have a `controller` and an `action` label.
204
226
  `http_requests_total` additionally has a (HTTP response) `status` label.
@@ -241,12 +263,13 @@ end
241
263
  ```
242
264
  That way you won't have all metrics labeled with `controller=other` and `action=other`, but have labels such as
243
265
  ```
244
- ruby_http_duration_seconds{path="/api/v1/teams/:id",method="GET",status="200",quantile="0.99"} 0.009880661998977303
266
+ ruby_http_request_duration_seconds{path="/api/v1/teams/:id",method="GET",status="200",quantile="0.99"} 0.009880661998977303
245
267
  ```
246
268
 
247
269
  ¹) Only available when Redis is used.
248
270
  ²) Only available when Mysql or PostgreSQL are used.
249
271
  ³) Only available when [Instrumenting Request Queueing Time](#instrumenting-request-queueing-time) is set up.
272
+ ⁴) Only available when Dalli is used.
250
273
 
251
274
  #### Activerecord Connection Pool Metrics
252
275
 
@@ -321,7 +344,7 @@ You may also be interested in per-process stats. This collects memory and GC sta
321
344
 
322
345
  ```ruby
323
346
  # in an initializer
324
- unless Rails.env == "test"
347
+ unless Rails.env.test?
325
348
  require 'prometheus_exporter/instrumentation'
326
349
 
327
350
  # this reports basic process stats like RSS and GC info
@@ -350,6 +373,8 @@ end
350
373
  | Counter | `major_gc_ops_total` | Major GC operations by process |
351
374
  | Counter | `minor_gc_ops_total` | Minor GC operations by process |
352
375
  | Counter | `allocated_objects_total` | Total number of allocated objects by process |
376
+ | Gauge | `marking_time` | Marking time spent (Ruby 3.3 minimum) |
377
+ | Gauge | `sweeping_time` | Sweeping time spent (Ruby 3.3 minimum) |
353
378
 
354
379
  _Metrics marked with * are only collected when `MiniRacer` is defined._
355
380
 
@@ -357,40 +382,49 @@ Metrics collected by Process instrumentation include labels `type` (as given wit
357
382
 
358
383
  #### Sidekiq metrics
359
384
 
360
- Including Sidekiq metrics (how many jobs ran? how many failed? how long did they take? how many are dead? how many were restarted?)
361
-
362
- ```ruby
363
- Sidekiq.configure_server do |config|
364
- config.server_middleware do |chain|
365
- require 'prometheus_exporter/instrumentation'
366
- chain.add PrometheusExporter::Instrumentation::Sidekiq
367
- end
368
- config.death_handlers << PrometheusExporter::Instrumentation::Sidekiq.death_handler
369
- end
370
- ```
371
-
372
- To monitor Queue size and latency:
385
+ There are different kinds of Sidekiq metrics that can be collected. A recommended setup looks like this:
373
386
 
374
387
  ```ruby
375
388
  Sidekiq.configure_server do |config|
389
+ require 'prometheus_exporter/instrumentation'
390
+ config.server_middleware do |chain|
391
+ chain.add PrometheusExporter::Instrumentation::Sidekiq
392
+ end
393
+ config.death_handlers << PrometheusExporter::Instrumentation::Sidekiq.death_handler
376
394
  config.on :startup do
377
- require 'prometheus_exporter/instrumentation'
395
+ PrometheusExporter::Instrumentation::Process.start type: 'sidekiq'
396
+ PrometheusExporter::Instrumentation::SidekiqProcess.start
378
397
  PrometheusExporter::Instrumentation::SidekiqQueue.start
398
+ PrometheusExporter::Instrumentation::SidekiqStats.start
379
399
  end
380
400
  end
381
401
  ```
382
402
 
383
- To monitor Sidekiq process info:
403
+ * The middleware and death handler will generate job specific metrics (how many jobs ran? how many failed? how long did they take? how many are dead? how many were restarted?).
404
+ * The [`Process`](#per-process-stats) metrics provide basic ruby metrics.
405
+ * The `SidekiqProcess` metrics provide the concurrency and busy metrics for this process.
406
+ * The `SidekiqQueue` metrics provides size and latency for the queues run by this process.
407
+ * The `SidekiqStats` metrics provide general, global Sidekiq stats (size of Scheduled, Retries, Dead queues, total number of jobs, etc).
408
+
409
+ For `SidekiqQueue`, if you run more than one process for the same queues, note that the same metrics will be exposed by all the processes, just like the `SidekiqStats` will if you run more than one process of any kind. You might want use `avg` or `max` when consuming their metrics.
410
+
411
+ An alternative would be to expose these metrics in lone, long-lived process. Using a rake task, for example:
384
412
 
385
413
  ```ruby
386
- Sidekiq.configure_server do |config|
387
- config.on :startup do
388
- require 'prometheus_exporter/instrumentation'
389
- PrometheusExporter::Instrumentation::Process.start type: 'sidekiq'
390
- end
414
+ task :sidekiq_metrics do
415
+ server = PrometheusExporter::Server::WebServer.new
416
+ server.start
417
+
418
+ PrometheusExporter::Client.default = PrometheusExporter::LocalClient.new(collector: server.collector)
419
+
420
+ PrometheusExporter::Instrumentation::SidekiqQueue.start(all_queues: true)
421
+ PrometheusExporter::Instrumentation::SidekiqStats.start
422
+ sleep
391
423
  end
392
424
  ```
393
425
 
426
+ The `all_queues` parameter for `SidekiqQueue` will expose metrics for all queues.
427
+
394
428
  Sometimes the Sidekiq server shuts down before it can send metrics, that were generated right before the shutdown, to the collector. Especially if you care about the `sidekiq_restarted_jobs_total` metric, it is a good idea to explicitly stop the client:
395
429
 
396
430
  ```ruby
@@ -401,6 +435,18 @@ Sometimes the Sidekiq server shuts down before it can send metrics, that were ge
401
435
  end
402
436
  ```
403
437
 
438
+ Custom labels can be added for individual jobs by defining a class method on the job class. These labels will be added to all Sidekiq metrics written by the job:
439
+
440
+ ```ruby
441
+ class WorkerWithCustomLabels
442
+ def self.custom_labels
443
+ { my_label: 'value-here', other_label: 'second-val' }
444
+ end
445
+
446
+ def perform; end
447
+ end
448
+ ```
449
+
404
450
  ##### Metrics collected by Sidekiq Instrumentation
405
451
 
406
452
  **PrometheusExporter::Instrumentation::Sidekiq**
@@ -423,11 +469,33 @@ This metric has a `job_name` label and a `queue` label.
423
469
  **PrometheusExporter::Instrumentation::SidekiqQueue**
424
470
  | Type | Name | Description |
425
471
  | --- | --- | --- |
426
- | Gauge | `sidekiq_queue_backlog_total` | Size of the sidekiq queue |
472
+ | Gauge | `sidekiq_queue_backlog` | Size of the sidekiq queue |
427
473
  | Gauge | `sidekiq_queue_latency_seconds` | Latency of the sidekiq queue |
428
474
 
429
475
  Both metrics will have a `queue` label with the name of the queue.
430
476
 
477
+ **PrometheusExporter::Instrumentation::SidekiqProcess**
478
+ | Type | Name | Description |
479
+ | --- | --- | --- |
480
+ | Gauge | `sidekiq_process_busy` | Number of busy workers for this process |
481
+ | Gauge | `sidekiq_process_concurrency` | Concurrency for this process |
482
+
483
+ Both metrics will include the labels `labels`, `queues`, `quiet`, `tag`, `hostname` and `identity`, as returned by the [Sidekiq Processes API](https://github.com/mperham/sidekiq/wiki/API#processes).
484
+
485
+ **PrometheusExporter::Instrumentation::SidekiqStats**
486
+ | Type | Name | Description |
487
+ | --- | --- | --- |
488
+ | Gauge | `sidekiq_stats_dead_size` | Size of the dead queue |
489
+ | Gauge | `sidekiq_stats_enqueued` | Number of enqueued jobs |
490
+ | Gauge | `sidekiq_stats_failed` | Number of failed jobs |
491
+ | Gauge | `sidekiq_stats_processed` | Total number of processed jobs |
492
+ | Gauge | `sidekiq_stats_processes_size` | Number of processes |
493
+ | Gauge | `sidekiq_stats_retry_size` | Size of the retries queue |
494
+ | Gauge | `sidekiq_stats_scheduled_size` | Size of the scheduled queue |
495
+ | Gauge | `sidekiq_stats_workers_size` | Number of jobs actively being processed |
496
+
497
+ Based on the [Sidekiq Stats API](https://github.com/mperham/sidekiq/wiki/API#stats).
498
+
431
499
  _See [Metrics collected by Process Instrumentation](#metrics-collected-by-process-instrumentation) for a list of metrics the Process instrumentation will produce._
432
500
 
433
501
  #### Shoryuken metrics
@@ -459,7 +527,7 @@ All metrics have labels for `job_name` and `queue_name`.
459
527
  In an initializer:
460
528
 
461
529
  ```ruby
462
- unless Rails.env == "test"
530
+ unless Rails.env.test?
463
531
  require 'prometheus_exporter/instrumentation'
464
532
  PrometheusExporter::Instrumentation::DelayedJob.register_plugin
465
533
  end
@@ -470,6 +538,7 @@ end
470
538
  | Type | Name | Description | Labels |
471
539
  | --- | --- | --- | --- |
472
540
  | Counter | `delayed_job_duration_seconds` | Total time spent in delayed jobs | `job_name` |
541
+ | Counter | `delayed_job_latency_seconds_total` | Total delayed jobs latency | `job_name` |
473
542
  | Counter | `delayed_jobs_total` | Total number of delayed jobs executed | `job_name` |
474
543
  | Gauge | `delayed_jobs_enqueued` | Number of enqueued delayed jobs | - |
475
544
  | Gauge | `delayed_jobs_pending` | Number of pending delayed jobs | - |
@@ -478,12 +547,15 @@ end
478
547
  | Summary | `delayed_job_duration_seconds_summary` | Summary of the time it takes jobs to execute | `status` |
479
548
  | Summary | `delayed_job_attempts_summary` | Summary of the amount of attempts it takes delayed jobs to succeed | - |
480
549
 
550
+ All metrics have labels for `job_name` and `queue_name`.
551
+ `delayed_job_latency_seconds_total` is considering delayed job's [sleep_delay](https://github.com/collectiveidea/delayed_job#:~:text=If%20no%20jobs%20are%20found%2C%20the%20worker%20sleeps%20for%20the%20amount%20of%20time%20specified%20by%20the%20sleep%20delay%20option.%20Set%20Delayed%3A%3AWorker.sleep_delay%20%3D%2060%20for%20a%2060%20second%20sleep%20time.) parameter, so please be aware of this in case you are looking for high latency precision.
552
+
481
553
  #### Hutch Message Processing Tracer
482
554
 
483
555
  Capture [Hutch](https://github.com/gocardless/hutch) metrics (how many jobs ran? how many failed? how long did they take?)
484
556
 
485
557
  ```ruby
486
- unless Rails.env == "test"
558
+ unless Rails.env.test?
487
559
  require 'prometheus_exporter/instrumentation'
488
560
  Hutch::Config.set(:tracer, PrometheusExporter::Instrumentation::Hutch)
489
561
  end
@@ -505,7 +577,7 @@ Request Queueing is defined as the time it takes for a request to reach your app
505
577
 
506
578
  As this metric starts before `prometheus_exporter` can handle the request, you must add a specific HTTP header as early in your infrastructure as possible (we recommend your load balancer or reverse proxy).
507
579
 
508
- Configure your HTTP server / load balancer to add a header `X-Request-Start: t=<MSEC>` when passing the request upstream. For more information, please consult your software manual.
580
+ The Amazon Application Load Balancer [request tracing header](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html) is natively supported. If you are using another upstream entrypoint, you may configure your HTTP server / load balancer to add a header `X-Request-Start: t=<MSEC>` when passing the request upstream. Please keep in mind request time start is reported as epoch time (in seconds) and lacks precision, which may introduce additional latency in reported metrics. For more information, please consult your software manual.
509
581
 
510
582
  Hint: we aim to be API-compatible with the big APM solutions, so if you've got requests queueing time configured for them, it should be expected to also work with `prometheus_exporter`.
511
583
 
@@ -515,27 +587,87 @@ The puma metrics are using the `Puma.stats` method and hence need to be started
515
587
  workers has been booted and from a Puma thread otherwise the metrics won't be accessible.
516
588
  The easiest way to gather this metrics is to put the following in your `puma.rb` config:
517
589
 
590
+ For Puma single mode
591
+ ```ruby
592
+ # puma.rb config
593
+ require 'prometheus_exporter/instrumentation'
594
+ # optional check, avoids spinning up and down threads per worker
595
+ if !PrometheusExporter::Instrumentation::Puma.started?
596
+ PrometheusExporter::Instrumentation::Puma.start
597
+ end
598
+ ```
599
+
600
+ For Puma clustered mode
518
601
  ```ruby
519
602
  # puma.rb config
520
603
  after_worker_boot do
521
604
  require 'prometheus_exporter/instrumentation'
522
- PrometheusExporter::Instrumentation::Puma.start
605
+ # optional check, avoids spinning up and down threads per worker
606
+ if !PrometheusExporter::Instrumentation::Puma.started?
607
+ PrometheusExporter::Instrumentation::Puma.start
608
+ end
523
609
  end
524
610
  ```
525
611
 
526
612
  #### Metrics collected by Puma Instrumentation
527
613
 
528
- | Type | Name | Description |
529
- | --- | --- | --- |
530
- | Gauge | `puma_workers_total` | Number of puma workers |
531
- | Gauge | `puma_booted_workers_total` | Number of puma workers booted |
532
- | Gauge | `puma_old_workers_total` | Number of old puma workers |
533
- | Gauge | `puma_running_threads_total` | Number of puma threads currently running |
534
- | Gauge | `puma_request_backlog_total` | Number of requests waiting to be processed by a puma thread |
535
- | Gauge | `puma_thread_pool_capacity_total` | Number of puma threads available at current scale |
536
- | Gauge | `puma_max_threads_total` | Number of puma threads at available at max scale |
614
+ | Type | Name | Description |
615
+ | --- | --- | --- |
616
+ | Gauge | `puma_workers` | Number of puma workers |
617
+ | Gauge | `puma_booted_workers` | Number of puma workers booted |
618
+ | Gauge | `puma_old_workers` | Number of old puma workers |
619
+ | Gauge | `puma_running_threads` | How many threads are spawned. A spawned thread may be busy processing a request or waiting for a new request |
620
+ | Gauge | `puma_request_backlog` | Number of requests waiting to be processed by a puma thread |
621
+ | Gauge | `puma_thread_pool_capacity` | Number of puma threads available at current scale |
622
+ | Gauge | `puma_max_threads` | Number of puma threads at available at max scale |
623
+ | Gauge | `puma_busy_threads` | Running - how many threads are waiting to receive work + how many requests are waiting for a thread to pick them up |
624
+
625
+ All metrics may have a `phase` label and all custom labels provided with the `labels` option.
537
626
 
538
- All metrics may have a `phase` label.
627
+ ### Resque metrics
628
+
629
+ The resque metrics are using the `Resque.info` method, which queries Redis internally. To start monitoring your resque
630
+ installation, you'll need to start the instrumentation:
631
+
632
+ ```ruby
633
+ # e.g. config/initializers/resque.rb
634
+ require 'prometheus_exporter/instrumentation'
635
+ PrometheusExporter::Instrumentation::Resque.start
636
+ ```
637
+
638
+ #### Metrics collected by Resque Instrumentation
639
+
640
+ | Type | Name | Description |
641
+ | --- | --- | --- |
642
+ | Gauge | `resque_processed_jobs` | Total number of processed Resque jobs |
643
+ | Gauge | `resque_failed_jobs` | Total number of failed Resque jobs |
644
+ | Gauge | `resque_pending_jobs` | Total number of pending Resque jobs |
645
+ | Gauge | `resque_queues` | Total number of Resque queues |
646
+ | Gauge | `resque_workers` | Total number of Resque workers running |
647
+ | Gauge | `resque_working` | Total number of Resque workers working |
648
+
649
+ ### GoodJob metrics
650
+
651
+ The metrics are generated from the database using the relevant scopes. To start monitoring your GoodJob
652
+ installation, you'll need to start the instrumentation:
653
+
654
+ ```ruby
655
+ # e.g. config/initializers/good_job.rb
656
+ require 'prometheus_exporter/instrumentation'
657
+ PrometheusExporter::Instrumentation::GoodJob.start
658
+ ```
659
+
660
+ #### Metrics collected by GoodJob Instrumentation
661
+
662
+ | Type | Name | Description |
663
+ | --- |----------------------|-----------------------------------------|
664
+ | Gauge | `good_job_scheduled` | Total number of scheduled GoodJob jobs. |
665
+ | Gauge | `good_job_retried` | Total number of retried GoodJob jobs. |
666
+ | Gauge | `good_job_queued` | Total number of queued GoodJob jobs. |
667
+ | Gauge | `good_job_running` | Total number of running GoodJob jobs. |
668
+ | Gauge | `good_job_finished` | Total number of finished GoodJob jobs. |
669
+ | Gauge | `good_job_succeeded` | Total number of succeeded GoodJob jobs. |
670
+ | Gauge | `good_job_discarded` | Total number of discarded GoodJob jobs |
539
671
 
540
672
  ### Unicorn process metrics
541
673
 
@@ -554,11 +686,11 @@ Note: You must install the `raindrops` gem in your `Gemfile` or locally.
554
686
 
555
687
  #### Metrics collected by Unicorn Instrumentation
556
688
 
557
- | Type | Name | Description |
558
- | --- | --- | --- |
559
- | Gauge | `unicorn_workers_total` | Number of unicorn workers |
560
- | Gauge | `unicorn_active_workers_total` | Number of active unicorn workers |
561
- | Gauge | `unicorn_request_backlog_total` | Number of requests waiting to be processed by a unicorn worker |
689
+ | Type | Name | Description |
690
+ | --- | --- | --- |
691
+ | Gauge | `unicorn_workers` | Number of unicorn workers |
692
+ | Gauge | `unicorn_active_workers` | Number of active unicorn workers |
693
+ | Gauge | `unicorn_request_backlog` | Number of requests waiting to be processed by a unicorn worker |
562
694
 
563
695
  ### Custom type collectors
564
696
 
@@ -743,6 +875,7 @@ Usage: prometheus_exporter [options]
743
875
  -c, --collector FILE (optional) Custom collector to run
744
876
  -a, --type-collector FILE (optional) Custom type collectors to run in main collector
745
877
  -v, --verbose
878
+ -g, --histogram Use histogram instead of summary for aggregations
746
879
  --auth FILE (optional) enable basic authentication using a htpasswd FILE
747
880
  --realm REALM (optional) Use REALM for basic authentication (default: "Prometheus Exporter")
748
881
  --unicorn-listen-address ADDRESS
@@ -767,6 +900,9 @@ prometheus_exporter -p 8080 \
767
900
  --prefix 'foo_'
768
901
  ```
769
902
 
903
+ You can use `-b` option to bind the `prometheus_exporter` web server to any IPv4 interface with `-b 0.0.0.0`,
904
+ any IPv6 interface with `-b ::`, or `-b ANY` to any IPv4/IPv6 interfaces available on your host system.
905
+
770
906
  #### Enabling Basic Authentication
771
907
 
772
908
  If you desire authentication on your `/metrics` route, you can enable basic authentication with the `--auth` option.
@@ -813,6 +949,38 @@ http_requests_total{service="app-server-01",app_name="app-01"} 1
813
949
 
814
950
  By default, `PrometheusExporter::Client.default` connects to `localhost:9394`. If your setup requires this (e.g. when using `docker-compose`), you can change the default host and port by setting the environment variables `PROMETHEUS_EXPORTER_HOST` and `PROMETHEUS_EXPORTER_PORT`.
815
951
 
952
+ ### Histogram mode
953
+
954
+ By default, the built-in collectors will report aggregations as summaries. If you need to aggregate metrics across labels, you can switch from summaries to histograms:
955
+
956
+ ```
957
+ $ prometheus_exporter --histogram
958
+ ```
959
+
960
+ In histogram mode, the same metrics will be collected but will be reported as histograms rather than summaries. This sacrifices some precision but allows aggregating metrics across actions and nodes using [`histogram_quantile`].
961
+
962
+ [`histogram_quantile`]: https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile
963
+
964
+ ### Histogram - custom buckets
965
+
966
+ By default these buckets will be used:
967
+ ```
968
+ [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5.0, 10.0].freeze
969
+ ```
970
+ if this is not enough you can specify `default_buckets` like this:
971
+ ```
972
+ Histogram.default_buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 2.5, 3, 4, 5.0, 10.0, 12, 14, 15, 20, 25].freeze
973
+ ```
974
+
975
+ Specfied buckets on the instance takes precedence over default:
976
+
977
+ ```
978
+ Histogram.default_buckets = [0.005, 0.01, 0,5].freeze
979
+ buckets = [0.1, 0.2, 0.3]
980
+ histogram = Histogram.new('test_bucktets', 'I have specified buckets', buckets: buckets)
981
+ histogram.buckets => [0.1, 0.2, 0.3]
982
+ ```
983
+
816
984
  ## Transport concerns
817
985
 
818
986
  Prometheus Exporter handles transport using a simple HTTP protocol. In multi process mode we avoid needing a large number of HTTP request by using chunked encoding to send metrics. This means that a single HTTP channel can deliver 100s or even 1000s of metrics over a single HTTP session to the `/send-metrics` endpoint. All calls to `send` and `send_json` on the `PrometheusExporter::Client` class are **non-blocking** and batched.
@@ -825,6 +993,61 @@ The `PrometheusExporter::Client` class has the method `#send-json`. This method,
825
993
 
826
994
  When `PrometheusExporter::Server::Collector` parses your JSON, by default it will use the faster Oj deserializer if available. This happens cause it only expects a simple Hash out of the box. You can opt in for the default JSON deserializer with `json_serializer: :json`.
827
995
 
996
+ ## Logging
997
+
998
+ `PrometheusExporter::Client.default` will export to `STDERR`. To change this, you can pass your own logger:
999
+ ```ruby
1000
+ PrometheusExporter::Client.new(logger: Rails.logger)
1001
+ PrometheusExporter::Client.new(logger: Logger.new(STDOUT))
1002
+ ```
1003
+
1004
+ You can also pass a log level (default is [`Logger::WARN`](https://ruby-doc.org/stdlib-3.0.1/libdoc/logger/rdoc/Logger.html)):
1005
+ ```ruby
1006
+ PrometheusExporter::Client.new(log_level: Logger::DEBUG)
1007
+ ```
1008
+
1009
+ ## Docker Usage
1010
+
1011
+ You can run `prometheus_exporter` project using an official Docker image:
1012
+
1013
+ ```bash
1014
+ docker pull discourse/prometheus_exporter:latest
1015
+ # or use specific version
1016
+ docker pull discourse/prometheus_exporter:x.x.x
1017
+ ```
1018
+
1019
+ The start the container:
1020
+
1021
+ ```bash
1022
+ docker run -p 9394:9394 discourse/prometheus_exporter
1023
+ ```
1024
+
1025
+ Additional flags could be included:
1026
+
1027
+ ```
1028
+ docker run -p 9394:9394 discourse/prometheus_exporter --verbose --prefix=myapp
1029
+ ```
1030
+
1031
+ ## Docker/Kubernetes Healthcheck
1032
+
1033
+ A `/ping` endpoint which only returns `PONG` is available so you can run container healthchecks :
1034
+
1035
+ Example:
1036
+
1037
+ ```yml
1038
+ services:
1039
+ rails-exporter:
1040
+ command:
1041
+ - bin/prometheus_exporter
1042
+ - -b
1043
+ - 0.0.0.0
1044
+ healthcheck:
1045
+ test: ["CMD", "curl", "--silent", "--show-error", "--fail", "--max-time", "3", "http://0.0.0.0:9394/ping"]
1046
+ timeout: 3s
1047
+ interval: 10s
1048
+ retries: 5
1049
+ ```
1050
+
828
1051
  ## Contributing
829
1052
 
830
1053
  Bug reports and pull requests are welcome on GitHub at https://github.com/discourse/prometheus_exporter. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
@@ -3,12 +3,15 @@
3
3
 
4
4
  require 'optparse'
5
5
  require 'json'
6
+ require 'logger'
6
7
 
7
8
  require_relative "./../lib/prometheus_exporter"
8
9
  require_relative "./../lib/prometheus_exporter/server"
9
10
 
10
11
  def run
11
- options = {}
12
+ options = {
13
+ logger_path: STDERR
14
+ }
12
15
  custom_collector_filename = nil
13
16
  custom_type_collectors_filenames = []
14
17
 
@@ -47,6 +50,9 @@ def run
47
50
  opt.on('-v', '--verbose') do |o|
48
51
  options[:verbose] = true
49
52
  end
53
+ opt.on('-g', '--histogram', "Use histogram instead of summary for aggregations") do |o|
54
+ options[:histogram] = true
55
+ end
50
56
  opt.on('--auth FILE', String, "(optional) enable basic authentication using a htpasswd FILE") do |o|
51
57
  options[:auth] = o
52
58
  end
@@ -61,21 +67,28 @@ def run
61
67
  opt.on('--unicorn-master PID_FILE', String, '(optional) PID file of unicorn master process to monitor unicorn') do |o|
62
68
  options[:unicorn_pid_file] = o
63
69
  end
70
+
71
+ opt.on('--logger-path PATH', String, '(optional) Path to file for logger output. Defaults to STDERR') do |o|
72
+ options[:logger_path] = o
73
+ end
64
74
  end.parse!
65
75
 
76
+ logger = Logger.new(options[:logger_path])
77
+ logger.level = Logger::INFO
78
+
66
79
  if options.has_key?(:realm) && !options.has_key?(:auth)
67
- STDERR.puts "[Warn] Providing REALM without AUTH has no effect"
80
+ logger.warn "Providing REALM without AUTH has no effect"
68
81
  end
69
82
 
70
83
  if options.has_key?(:auth)
71
84
  unless File.exist?(options[:auth]) && File.readable?(options[:auth])
72
- STDERR.puts "[Error] The AUTH file either doesn't exist or we don't have access to it"
85
+ logger.fatal "The AUTH file either doesn't exist or we don't have access to it"
73
86
  exit 1
74
87
  end
75
88
  end
76
89
 
77
90
  if custom_collector_filename
78
- eval File.read(custom_collector_filename), nil, File.expand_path(custom_collector_filename)
91
+ require File.expand_path(custom_collector_filename)
79
92
  found = false
80
93
 
81
94
  base_klass = PrometheusExporter::Server::CollectorBase
@@ -88,14 +101,14 @@ def run
88
101
  end
89
102
 
90
103
  if !found
91
- STDERR.puts "[Error] Can not find a class inheriting off PrometheusExporter::Server::CollectorBase"
104
+ logger.fatal "Can not find a class inheriting off PrometheusExporter::Server::CollectorBase"
92
105
  exit 1
93
106
  end
94
107
  end
95
108
 
96
109
  if custom_type_collectors_filenames.length > 0
97
110
  custom_type_collectors_filenames.each do |t|
98
- eval File.read(t), nil, File.expand_path(t)
111
+ require File.expand_path(t)
99
112
  end
100
113
 
101
114
  ObjectSpace.each_object(Class) do |klass|
@@ -108,7 +121,7 @@ def run
108
121
 
109
122
  runner = PrometheusExporter::Server::Runner.new(options)
110
123
 
111
- puts "#{Time.now} Starting prometheus exporter on #{runner.bind}:#{runner.port}"
124
+ logger.info "Starting prometheus exporter on #{runner.bind}:#{runner.port}"
112
125
  runner.start
113
126
  sleep
114
127
  end