prometheus_exporter 0.5.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +42 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +7 -1
  5. data/Appraisals +10 -0
  6. data/CHANGELOG +36 -3
  7. data/README.md +278 -5
  8. data/bin/prometheus_exporter +21 -0
  9. data/gemfiles/.bundle/config +2 -0
  10. data/gemfiles/ar_60.gemfile +5 -0
  11. data/gemfiles/ar_61.gemfile +7 -0
  12. data/lib/prometheus_exporter.rb +2 -0
  13. data/lib/prometheus_exporter/client.rb +27 -3
  14. data/lib/prometheus_exporter/instrumentation.rb +2 -0
  15. data/lib/prometheus_exporter/instrumentation/active_record.rb +14 -7
  16. data/lib/prometheus_exporter/instrumentation/delayed_job.rb +3 -2
  17. data/lib/prometheus_exporter/instrumentation/method_profiler.rb +2 -1
  18. data/lib/prometheus_exporter/instrumentation/process.rb +2 -0
  19. data/lib/prometheus_exporter/instrumentation/puma.rb +16 -4
  20. data/lib/prometheus_exporter/instrumentation/resque.rb +40 -0
  21. data/lib/prometheus_exporter/instrumentation/sidekiq.rb +44 -3
  22. data/lib/prometheus_exporter/instrumentation/sidekiq_queue.rb +50 -0
  23. data/lib/prometheus_exporter/metric/base.rb +4 -0
  24. data/lib/prometheus_exporter/metric/counter.rb +4 -0
  25. data/lib/prometheus_exporter/metric/gauge.rb +4 -0
  26. data/lib/prometheus_exporter/metric/histogram.rb +6 -0
  27. data/lib/prometheus_exporter/metric/summary.rb +7 -0
  28. data/lib/prometheus_exporter/middleware.rb +40 -17
  29. data/lib/prometheus_exporter/server.rb +2 -0
  30. data/lib/prometheus_exporter/server/active_record_collector.rb +3 -1
  31. data/lib/prometheus_exporter/server/collector.rb +2 -0
  32. data/lib/prometheus_exporter/server/delayed_job_collector.rb +20 -8
  33. data/lib/prometheus_exporter/server/hutch_collector.rb +6 -0
  34. data/lib/prometheus_exporter/server/puma_collector.rb +9 -1
  35. data/lib/prometheus_exporter/server/resque_collector.rb +54 -0
  36. data/lib/prometheus_exporter/server/runner.rb +24 -2
  37. data/lib/prometheus_exporter/server/shoryuken_collector.rb +8 -0
  38. data/lib/prometheus_exporter/server/sidekiq_collector.rb +11 -2
  39. data/lib/prometheus_exporter/server/sidekiq_queue_collector.rb +46 -0
  40. data/lib/prometheus_exporter/server/web_collector.rb +7 -5
  41. data/lib/prometheus_exporter/server/web_server.rb +29 -17
  42. data/lib/prometheus_exporter/version.rb +1 -1
  43. data/prometheus_exporter.gemspec +9 -5
  44. metadata +67 -17
  45. data/.travis.yml +0 -12
@@ -2,6 +2,10 @@
2
2
 
3
3
  module PrometheusExporter::Metric
4
4
  class Base
5
+
6
+ @default_prefix = nil if !defined?(@default_prefix)
7
+ @default_labels = nil if !defined?(@default_labels)
8
+
5
9
  # prefix applied to all metrics
6
10
  def self.default_prefix=(name)
7
11
  @default_prefix = name
@@ -27,6 +27,10 @@ module PrometheusExporter::Metric
27
27
  @data.dup
28
28
  end
29
29
 
30
+ def remove(labels)
31
+ @data.delete(labels)
32
+ end
33
+
30
34
  def observe(increment = 1, labels = {})
31
35
  @data[labels] ||= 0
32
36
  @data[labels] += increment
@@ -27,6 +27,10 @@ module PrometheusExporter::Metric
27
27
  @data.dup
28
28
  end
29
29
 
30
+ def remove(labels)
31
+ @data.delete(labels)
32
+ end
33
+
30
34
  def observe(value, labels = {})
31
35
  if value.nil?
32
36
  data.delete(labels)
@@ -27,6 +27,12 @@ module PrometheusExporter::Metric
27
27
  data
28
28
  end
29
29
 
30
+ def remove(labels)
31
+ @observations.delete(labels)
32
+ @counts.delete(labels)
33
+ @sums.delete(labels)
34
+ end
35
+
30
36
  def type
31
37
  "histogram"
32
38
  end
@@ -32,6 +32,13 @@ module PrometheusExporter::Metric
32
32
  data
33
33
  end
34
34
 
35
+ def remove(labels)
36
+ @counts.delete(labels)
37
+ @sums.delete(labels)
38
+ @buffers[0].delete(labels)
39
+ @buffers[1].delete(labels)
40
+ end
41
+
35
42
  def type
36
43
  "summary"
37
44
  end
@@ -36,22 +36,40 @@ class PrometheusExporter::Middleware
36
36
 
37
37
  result
38
38
  ensure
39
+
40
+ obj = {
41
+ type: "web",
42
+ timings: info,
43
+ queue_time: queue_time,
44
+ default_labels: default_labels(env, result)
45
+ }
46
+ labels = custom_labels(env)
47
+ if labels
48
+ obj = obj.merge(custom_labels: labels)
49
+ end
50
+
51
+ @client.send_json(obj)
52
+ end
53
+
54
+ def default_labels(env, result)
39
55
  status = (result && result[0]) || -1
40
56
  params = env["action_dispatch.request.parameters"]
41
- action, controller = nil
57
+ action = controller = nil
42
58
  if params
43
59
  action = params["action"]
44
60
  controller = params["controller"]
45
61
  end
46
62
 
47
- @client.send_json(
48
- type: "web",
49
- timings: info,
50
- queue_time: queue_time,
51
- action: action,
52
- controller: controller,
63
+ {
64
+ action: action || "other",
65
+ controller: controller || "other",
53
66
  status: status
54
- )
67
+ }
68
+ end
69
+
70
+ # allows subclasses to add custom labels based on env
71
+ def custom_labels(env)
72
+ nil
55
73
  end
56
74
 
57
75
  private
@@ -72,19 +90,24 @@ class PrometheusExporter::Middleware
72
90
  Process.clock_gettime(Process::CLOCK_REALTIME)
73
91
  end
74
92
 
75
- # get the content of the x-queue-start or x-request-start header
93
+ # determine queue start from well-known trace headers
76
94
  def queue_start(env)
95
+
96
+ # get the content of the x-queue-start or x-request-start header
77
97
  value = env['HTTP_X_REQUEST_START'] || env['HTTP_X_QUEUE_START']
78
98
  unless value.nil? || value == ''
79
- convert_header_to_ms(value.to_s)
99
+ # nginx returns time as milliseconds with 3 decimal places
100
+ # apache returns time as microseconds without decimal places
101
+ # this method takes care to convert both into a proper second + fractions timestamp
102
+ value = value.to_s.gsub(/t=|\./, '')
103
+ return "#{value[0, 10]}.#{value[10, 13]}".to_f
80
104
  end
81
- end
82
105
 
83
- # nginx returns time as milliseconds with 3 decimal places
84
- # apache returns time as microseconds without decimal places
85
- # this method takes care to convert both into a proper second + fractions timestamp
86
- def convert_header_to_ms(str)
87
- str = str.gsub(/t=|\./, '')
88
- "#{str[0, 10]}.#{str[10, 13]}".to_f
106
+ # get the content of the x-amzn-trace-id header
107
+ # see also: https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html
108
+ value = env['HTTP_X_AMZN_TRACE_ID']
109
+ value&.split('Root=')&.last&.split('-')&.fetch(1)&.to_i(16)
110
+
89
111
  end
112
+
90
113
  end
@@ -5,6 +5,7 @@ require_relative "server/type_collector"
5
5
  require_relative "server/web_collector"
6
6
  require_relative "server/process_collector"
7
7
  require_relative "server/sidekiq_collector"
8
+ require_relative "server/sidekiq_queue_collector"
8
9
  require_relative "server/delayed_job_collector"
9
10
  require_relative "server/collector_base"
10
11
  require_relative "server/collector"
@@ -15,3 +16,4 @@ require_relative "server/hutch_collector"
15
16
  require_relative "server/unicorn_collector"
16
17
  require_relative "server/active_record_collector"
17
18
  require_relative "server/shoryuken_collector"
19
+ require_relative "server/resque_collector"
@@ -27,6 +27,7 @@ module PrometheusExporter::Server
27
27
 
28
28
  @active_record_metrics.map do |m|
29
29
  metric_key = (m["metric_labels"] || {}).merge("pid" => m["pid"])
30
+ metric_key.merge!(m["custom_labels"]) if m["custom_labels"]
30
31
 
31
32
  ACTIVE_RECORD_GAUGES.map do |k, help|
32
33
  k = k.to_s
@@ -46,7 +47,8 @@ module PrometheusExporter::Server
46
47
  obj["created_at"] = now
47
48
 
48
49
  @active_record_metrics.delete_if do |current|
49
- (obj["pid"] == current["pid"] && obj["hostname"] == current["hostname"]) ||
50
+ (obj["pid"] == current["pid"] && obj["hostname"] == current["hostname"] &&
51
+ obj["metric_labels"]["pool_name"] == current["metric_labels"]["pool_name"]) ||
50
52
  (current["created_at"] + MAX_ACTIVERECORD_METRIC_AGE < now)
51
53
  end
52
54
 
@@ -13,12 +13,14 @@ module PrometheusExporter::Server
13
13
  register_collector(WebCollector.new)
14
14
  register_collector(ProcessCollector.new)
15
15
  register_collector(SidekiqCollector.new)
16
+ register_collector(SidekiqQueueCollector.new)
16
17
  register_collector(DelayedJobCollector.new)
17
18
  register_collector(PumaCollector.new)
18
19
  register_collector(HutchCollector.new)
19
20
  register_collector(UnicornCollector.new)
20
21
  register_collector(ActiveRecordCollector.new)
21
22
  register_collector(ShoryukenCollector.new)
23
+ register_collector(ResqueCollector.new)
22
24
  end
23
25
 
24
26
  def register_collector(collector)
@@ -2,27 +2,39 @@
2
2
 
3
3
  module PrometheusExporter::Server
4
4
  class DelayedJobCollector < TypeCollector
5
+ def initialize
6
+ @delayed_jobs_total = nil
7
+ @delayed_job_duration_seconds = nil
8
+ @delayed_jobs_total = nil
9
+ @delayed_failed_jobs_total = nil
10
+ @delayed_jobs_max_attempts_reached_total = nil
11
+ @delayed_job_duration_seconds_summary = nil
12
+ @delayed_job_attempts_summary = nil
13
+ @delayed_jobs_enqueued = nil
14
+ @delayed_jobs_pending = nil
15
+ end
5
16
 
6
17
  def type
7
18
  "delayed_job"
8
19
  end
9
20
 
10
21
  def collect(obj)
11
- default_labels = { job_name: obj['name'] }
22
+ default_labels = { job_name: obj['name'], queue_name: obj['queue_name'] }
12
23
  custom_labels = obj['custom_labels']
24
+
13
25
  labels = custom_labels.nil? ? default_labels : default_labels.merge(custom_labels)
14
26
 
15
27
  ensure_delayed_job_metrics
16
28
  @delayed_job_duration_seconds.observe(obj["duration"], labels)
17
29
  @delayed_jobs_total.observe(1, labels)
18
30
  @delayed_failed_jobs_total.observe(1, labels) if !obj["success"]
19
- @delayed_jobs_max_attempts_reached_total.observe(1) if obj["attempts"] >= obj["max_attempts"]
20
- @delayed_job_duration_seconds_summary.observe(obj["duration"])
21
- @delayed_job_duration_seconds_summary.observe(obj["duration"], status: "success") if obj["success"]
22
- @delayed_job_duration_seconds_summary.observe(obj["duration"], status: "failed") if !obj["success"]
23
- @delayed_job_attempts_summary.observe(obj["attempts"]) if obj["success"]
24
- @delayed_jobs_enqueued.observe(obj["enqueued"])
25
- @delayed_jobs_pending.observe(obj["pending"])
31
+ @delayed_jobs_max_attempts_reached_total.observe(1, labels) if obj["attempts"] >= obj["max_attempts"]
32
+ @delayed_job_duration_seconds_summary.observe(obj["duration"], labels)
33
+ @delayed_job_duration_seconds_summary.observe(obj["duration"], labels.merge(status: "success")) if obj["success"]
34
+ @delayed_job_duration_seconds_summary.observe(obj["duration"], labels.merge(status: "failed")) if !obj["success"]
35
+ @delayed_job_attempts_summary.observe(obj["attempts"], labels) if obj["success"]
36
+ @delayed_jobs_enqueued.observe(obj["enqueued"], labels)
37
+ @delayed_jobs_pending.observe(obj["pending"], labels)
26
38
  end
27
39
 
28
40
  def metrics
@@ -2,6 +2,12 @@
2
2
 
3
3
  module PrometheusExporter::Server
4
4
  class HutchCollector < TypeCollector
5
+ def initialize
6
+ @hutch_jobs_total = nil
7
+ @hutch_job_duration_seconds = nil
8
+ @hutch_jobs_total = nil
9
+ @hutch_failed_jobs_total = nil
10
+ end
5
11
 
6
12
  def type
7
13
  "hutch"
@@ -34,6 +34,9 @@ module PrometheusExporter::Server
34
34
  if m["custom_labels"]
35
35
  labels.merge!(m["custom_labels"])
36
36
  end
37
+ if m["metric_labels"]
38
+ labels.merge!(m["metric_labels"])
39
+ end
37
40
 
38
41
  PUMA_GAUGES.map do |k, help|
39
42
  k = k.to_s
@@ -51,7 +54,12 @@ module PrometheusExporter::Server
51
54
  now = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
52
55
 
53
56
  obj["created_at"] = now
54
- @puma_metrics.delete_if { |m| m["created_at"] + MAX_PUMA_METRIC_AGE < now }
57
+
58
+ @puma_metrics.delete_if do |current|
59
+ (obj["pid"] == current["pid"] && obj["hostname"] == current["hostname"]) ||
60
+ (current["created_at"] + MAX_PUMA_METRIC_AGE < now)
61
+ end
62
+
55
63
  @puma_metrics << obj
56
64
  end
57
65
  end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PrometheusExporter::Server
4
+ class ResqueCollector < TypeCollector
5
+ MAX_RESQUE_METRIC_AGE = 30
6
+ RESQUE_GAUGES = {
7
+ processed_jobs_total: "Total number of processed Resque jobs.",
8
+ failed_jobs_total: "Total number of failed Resque jobs.",
9
+ pending_jobs_total: "Total number of pending Resque jobs.",
10
+ queues_total: "Total number of Resque queues.",
11
+ workers_total: "Total number of Resque workers running.",
12
+ working_total: "Total number of Resque workers working."
13
+ }
14
+
15
+ def initialize
16
+ @resque_metrics = []
17
+ @gauges = {}
18
+ end
19
+
20
+ def type
21
+ "resque"
22
+ end
23
+
24
+ def metrics
25
+ return [] if resque_metrics.length == 0
26
+
27
+ resque_metrics.map do |metric|
28
+ labels = metric.fetch("custom_labels", {})
29
+
30
+ RESQUE_GAUGES.map do |name, help|
31
+ name = name.to_s
32
+ if value = metric[name]
33
+ gauge = gauges[name] ||= PrometheusExporter::Metric::Gauge.new("resque_#{name}", help)
34
+ gauge.observe(value, labels)
35
+ end
36
+ end
37
+ end
38
+
39
+ gauges.values
40
+ end
41
+
42
+ def collect(object)
43
+ now = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
44
+
45
+ object["created_at"] = now
46
+ resque_metrics.delete_if { |metric| metric["created_at"] + MAX_RESQUE_METRIC_AGE < now }
47
+ resque_metrics << object
48
+ end
49
+
50
+ private
51
+
52
+ attr_reader :resque_metrics, :gauges
53
+ end
54
+ end
@@ -9,6 +9,15 @@ module PrometheusExporter::Server
9
9
 
10
10
  class Runner
11
11
  def initialize(options = {})
12
+ @timeout = nil
13
+ @port = nil
14
+ @bind = nil
15
+ @collector_class = nil
16
+ @type_collectors = nil
17
+ @prefix = nil
18
+ @auth = nil
19
+ @realm = nil
20
+
12
21
  options.each do |k, v|
13
22
  send("#{k}=", v) if self.class.method_defined?("#{k}=")
14
23
  end
@@ -16,6 +25,7 @@ module PrometheusExporter::Server
16
25
 
17
26
  def start
18
27
  PrometheusExporter::Metric::Base.default_prefix = prefix
28
+ PrometheusExporter::Metric::Base.default_labels = label
19
29
 
20
30
  register_type_collectors
21
31
 
@@ -32,12 +42,20 @@ module PrometheusExporter::Server
32
42
  )
33
43
  end
34
44
 
35
- server = server_class.new port: port, bind: bind, collector: collector, timeout: timeout, verbose: verbose
45
+ server = server_class.new(port: port, bind: bind, collector: collector, timeout: timeout, verbose: verbose, auth: auth, realm: realm)
36
46
  server.start
37
47
  end
38
48
 
39
49
  attr_accessor :unicorn_listen_address, :unicorn_pid_file
40
- attr_writer :prefix, :port, :bind, :collector_class, :type_collectors, :timeout, :verbose, :server_class
50
+ attr_writer :prefix, :port, :bind, :collector_class, :type_collectors, :timeout, :verbose, :server_class, :label, :auth, :realm
51
+
52
+ def auth
53
+ @auth || nil
54
+ end
55
+
56
+ def realm
57
+ @realm || PrometheusExporter::DEFAULT_REALM
58
+ end
41
59
 
42
60
  def prefix
43
61
  @prefix || PrometheusExporter::DEFAULT_PREFIX
@@ -76,6 +94,10 @@ module PrometheusExporter::Server
76
94
  @_collector ||= collector_class.new
77
95
  end
78
96
 
97
+ def label
98
+ @label ||= PrometheusExporter::DEFAULT_LABEL
99
+ end
100
+
79
101
  private
80
102
 
81
103
  def register_type_collectors
@@ -3,6 +3,14 @@
3
3
  module PrometheusExporter::Server
4
4
  class ShoryukenCollector < TypeCollector
5
5
 
6
+ def initialize
7
+ @shoryuken_jobs_total = nil
8
+ @shoryuken_job_duration_seconds = nil
9
+ @shoryuken_jobs_total = nil
10
+ @shoryuken_restarted_jobs_total = nil
11
+ @shoryuken_failed_jobs_total = nil
12
+ end
13
+
6
14
  def type
7
15
  "shoryuken"
8
16
  end
@@ -3,12 +3,21 @@
3
3
  module PrometheusExporter::Server
4
4
  class SidekiqCollector < TypeCollector
5
5
 
6
+ def initialize
7
+ @sidekiq_jobs_total = nil
8
+ @sidekiq_job_duration_seconds = nil
9
+ @sidekiq_jobs_total = nil
10
+ @sidekiq_restarted_jobs_total = nil
11
+ @sidekiq_failed_jobs_total = nil
12
+ @sidekiq_dead_jobs_total = nil
13
+ end
14
+
6
15
  def type
7
16
  "sidekiq"
8
17
  end
9
18
 
10
19
  def collect(obj)
11
- default_labels = { job_name: obj['name'] }
20
+ default_labels = { job_name: obj['name'], queue: obj['queue'] }
12
21
  custom_labels = obj['custom_labels']
13
22
  labels = custom_labels.nil? ? default_labels : default_labels.merge(custom_labels)
14
23
 
@@ -43,7 +52,7 @@ module PrometheusExporter::Server
43
52
  if !@sidekiq_jobs_total
44
53
 
45
54
  @sidekiq_job_duration_seconds =
46
- PrometheusExporter::Metric::Counter.new(
55
+ PrometheusExporter::Metric::Summary.new(
47
56
  "sidekiq_job_duration_seconds", "Total time spent in sidekiq jobs.")
48
57
 
49
58
  @sidekiq_jobs_total =
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+ module PrometheusExporter::Server
3
+ class SidekiqQueueCollector < TypeCollector
4
+ MAX_SIDEKIQ_METRIC_AGE = 60
5
+
6
+ SIDEKIQ_QUEUE_GAUGES = {
7
+ 'backlog_total' => 'Size of the sidekiq queue.',
8
+ 'latency_seconds' => 'Latency of the sidekiq queue.',
9
+ }.freeze
10
+
11
+ attr_reader :sidekiq_metrics, :gauges
12
+
13
+ def initialize
14
+ @sidekiq_metrics = []
15
+ @gauges = {}
16
+ end
17
+
18
+ def type
19
+ 'sidekiq_queue'
20
+ end
21
+
22
+ def metrics
23
+ sidekiq_metrics.map do |metric|
24
+ labels = metric.fetch("labels", {})
25
+ SIDEKIQ_QUEUE_GAUGES.map do |name, help|
26
+ if (value = metric[name])
27
+ gauge = gauges[name] ||= PrometheusExporter::Metric::Gauge.new("sidekiq_queue_#{name}", help)
28
+ gauge.observe(value, labels)
29
+ end
30
+ end
31
+ end
32
+
33
+ gauges.values
34
+ end
35
+
36
+ def collect(object)
37
+ now = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
38
+ object['queues'].each do |queue|
39
+ queue["created_at"] = now
40
+ queue["labels"].merge!(object['custom_labels']) if object['custom_labels']
41
+ sidekiq_metrics.delete_if { |metric| metric['created_at'] + MAX_SIDEKIQ_METRIC_AGE < now }
42
+ sidekiq_metrics << queue
43
+ end
44
+ end
45
+ end
46
+ end