gitlab-exporter 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.gitlab-ci.yml +18 -0
  4. data/.rubocop.yml +34 -0
  5. data/CONTRIBUTING.md +651 -0
  6. data/Gemfile +8 -0
  7. data/Gemfile.lock +75 -0
  8. data/LICENSE +25 -0
  9. data/README.md +126 -0
  10. data/bin/gitlab-exporter +17 -0
  11. data/config/gitlab-exporter.yml.example +111 -0
  12. data/gitlab-exporter.gemspec +33 -0
  13. data/lib/gitlab_exporter/cli.rb +342 -0
  14. data/lib/gitlab_exporter/database/base.rb +44 -0
  15. data/lib/gitlab_exporter/database/bloat.rb +74 -0
  16. data/lib/gitlab_exporter/database/bloat_btree.sql +84 -0
  17. data/lib/gitlab_exporter/database/bloat_table.sql +63 -0
  18. data/lib/gitlab_exporter/database/ci_builds.rb +527 -0
  19. data/lib/gitlab_exporter/database/remote_mirrors.rb +74 -0
  20. data/lib/gitlab_exporter/database/row_count.rb +164 -0
  21. data/lib/gitlab_exporter/database/tuple_stats.rb +53 -0
  22. data/lib/gitlab_exporter/database.rb +13 -0
  23. data/lib/gitlab_exporter/git.rb +144 -0
  24. data/lib/gitlab_exporter/memstats/mapping.rb +91 -0
  25. data/lib/gitlab_exporter/memstats.rb +98 -0
  26. data/lib/gitlab_exporter/prober.rb +40 -0
  27. data/lib/gitlab_exporter/process.rb +122 -0
  28. data/lib/gitlab_exporter/prometheus.rb +64 -0
  29. data/lib/gitlab_exporter/sidekiq.rb +171 -0
  30. data/lib/gitlab_exporter/sidekiq_queue_job_stats.lua +42 -0
  31. data/lib/gitlab_exporter/util.rb +83 -0
  32. data/lib/gitlab_exporter/version.rb +5 -0
  33. data/lib/gitlab_exporter/web_exporter.rb +77 -0
  34. data/lib/gitlab_exporter.rb +18 -0
  35. data/spec/cli_spec.rb +31 -0
  36. data/spec/database/bloat_spec.rb +99 -0
  37. data/spec/database/ci_builds_spec.rb +421 -0
  38. data/spec/database/row_count_spec.rb +37 -0
  39. data/spec/fixtures/smaps/sample.txt +10108 -0
  40. data/spec/git_process_proper_spec.rb +27 -0
  41. data/spec/git_spec.rb +52 -0
  42. data/spec/memstats_spec.rb +28 -0
  43. data/spec/prometheus_metrics_spec.rb +17 -0
  44. data/spec/spec_helper.rb +63 -0
  45. data/spec/util_spec.rb +15 -0
  46. metadata +224 -0
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GitLab
4
+ module Exporter
5
+ module MemStats
6
+ # Parses one entry in /proc/[pid]/smaps. For example:
7
+ #
8
+ # 00400000-00401000 r-xp 00000000 08:01 541055 /opt/gitlab/embedded/bin/ruby
9
+ # Size: 4 kB
10
+ # Rss: 4 kB
11
+ # Pss: 0 kB
12
+ # Shared_Clean: 4 kB
13
+ # Shared_Dirty: 0 kB
14
+ # Private_Clean: 0 kB
15
+ # Private_Dirty: 0 kB
16
+ # Referenced: 4 kB
17
+ # Anonymous: 0 kB
18
+ # AnonHugePages: 0 kB
19
+ # Shared_Hugetlb: 0 kB
20
+ # Private_Hugetlb: 0 kB
21
+ # Swap: 0 kB
22
+ # SwapPss: 0 kB
23
+ # KernelPageSize: 4 kB
24
+ # MMUPageSize: 4 kB
25
+ # Locked: 0 kB
26
+ # VmFlags: rd ex mr mw me dw sd
27
+ class Mapping
28
+ FIELDS = %w(size rss shared_clean shared_dirty private_clean private_dirty swap pss).freeze
29
+
30
+ attr_reader :address_start
31
+ attr_reader :address_end
32
+ attr_reader :perms
33
+ attr_reader :offset
34
+ attr_reader :device_major
35
+ attr_reader :device_minor
36
+ attr_reader :inode
37
+ attr_reader :region
38
+
39
+ attr_accessor :size
40
+ attr_accessor :rss
41
+ attr_accessor :shared_clean
42
+ attr_accessor :shared_dirty
43
+ attr_accessor :private_dirty
44
+ attr_accessor :private_clean
45
+ attr_accessor :swap
46
+ attr_accessor :pss
47
+
48
+ def initialize(lines)
49
+ FIELDS.each do |field|
50
+ send("#{field}=", 0)
51
+ end
52
+
53
+ parse_first_line(lines.shift)
54
+
55
+ lines.each do |l|
56
+ parse_field_line(l)
57
+ end
58
+ end
59
+
60
+ def parse_first_line(line)
61
+ line.strip!
62
+
63
+ parts = line.split
64
+ @address_start, @address_end = parts[0].split("-")
65
+ @perms = parts[1]
66
+ @offset = parts[2]
67
+ @device_major, @device_minor = parts[3].split(":")
68
+ @inode = parts[4]
69
+ @region = parts[5] || "anonymous"
70
+ end
71
+
72
+ def parse_field_line(line)
73
+ line.strip!
74
+
75
+ parts = line.split
76
+
77
+ return unless parts
78
+
79
+ parts[0].downcase!
80
+ parts[0].sub!(":", "")
81
+ field = parts[0]
82
+
83
+ return unless respond_to? "#{field}="
84
+
85
+ value = Float(parts[1]).to_i
86
+ send("#{field}=", value)
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "memstats/mapping"
4
+
5
+ # Ported from https://github.com/discourse/discourse/blob/master/script/memstats.rb
6
+ #
7
+ # Aggregate Print useful information from /proc/[pid]/smaps
8
+ #
9
+ # pss - Roughly the amount of memory that is "really" being used by the pid
10
+ # swap - Amount of swap this process is currently using
11
+ #
12
+ # Reference:
13
+ # http://www.mjmwired.net/kernel/Documentation/filesystems/proc.txt#361
14
+ #
15
+ # Example:
16
+ # # ./memstats.rb 4386
17
+ # Process: 4386
18
+ # Command Line: /usr/bin/mongod -f /etc/mongo/mongod.conf
19
+ # Memory Summary:
20
+ # private_clean 107,132 kB
21
+ # private_dirty 2,020,676 kB
22
+ # pss 2,127,860 kB
23
+ # rss 2,128,536 kB
24
+ # shared_clean 728 kB
25
+ # shared_dirty 0 kB
26
+ # size 149,281,668 kB
27
+ # swap 1,719,792 kB
28
+ module GitLab
29
+ module Exporter
30
+ module MemStats
31
+ # Aggregates all metrics for a single PID in /proc/<pid>/smaps
32
+ class Aggregator
33
+ attr_accessor :pid, :totals
34
+
35
+ def initialize(pid)
36
+ @pid = pid
37
+ @totals = Hash.new(0)
38
+ @mappings = []
39
+ @valid = true
40
+
41
+ populate_info
42
+ end
43
+
44
+ def valid?
45
+ @valid
46
+ end
47
+
48
+ private
49
+
50
+ attr_accessor :mappings
51
+
52
+ def consume_mapping(map_lines, totals)
53
+ m = Mapping.new(map_lines)
54
+
55
+ Mapping::FIELDS.each do |field|
56
+ totals[field] += m.send(field)
57
+ end
58
+
59
+ m
60
+ end
61
+
62
+ def create_memstats_not_available(totals)
63
+ Mapping::FIELDS.each do |field|
64
+ totals[field] += Float::NAN
65
+ end
66
+ end
67
+
68
+ def populate_info # rubocop:disable Metrics/MethodLength
69
+ File.open("/proc/#{@pid}/smaps") do |smaps|
70
+ map_lines = []
71
+
72
+ loop do
73
+ break if smaps.eof?
74
+
75
+ line = smaps.readline.strip
76
+
77
+ case line
78
+ when /\w+:\s+/
79
+ map_lines << line
80
+ when /[0-9a-f]+:[0-9a-f]+\s+/
81
+ mappings << consume_mapping(map_lines, totals) if map_lines.size.positive?
82
+
83
+ map_lines.clear
84
+ map_lines << line
85
+ else
86
+ break
87
+ end
88
+ end
89
+ end
90
+ rescue => e
91
+ puts "Error: #{e}"
92
+ @valid = false
93
+ create_memstats_not_available(totals)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,40 @@
1
+ module GitLab
2
+ module Exporter
3
+ # A class to combine multiple probers into one
4
+ class Prober
5
+ def initialize(prober_opts, metrics: PrometheusMetrics.new)
6
+ @prober_opts = prober_opts
7
+ @metrics = metrics
8
+
9
+ resolve_prober_classes
10
+ end
11
+
12
+ def probe_all
13
+ @prober_opts.each do |_probe_name, params|
14
+ Utils.wrap_in_array(params[:opts]).each do |opts|
15
+ prober = params[:class].new(opts, metrics: @metrics)
16
+ params[:methods].each do |meth|
17
+ prober.send(meth)
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+ def write_to(target)
24
+ target.write(@metrics.to_s)
25
+ end
26
+
27
+ private
28
+
29
+ def resolve_prober_classes
30
+ @prober_opts.each do |probe_name, params|
31
+ prober_class_name = params[:class_name] || Utils.camel_case_string("#{probe_name}_prober")
32
+
33
+ klass = prober_class_name.split("::").reduce(GitLab::Exporter) { |a, e| a.const_get(e) }
34
+
35
+ params[:class] = klass
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "memstats"
4
+
5
+ module GitLab
6
+ module Exporter
7
+ # A helper class to extract memory info from /proc/<pid>/status
8
+ #
9
+
10
+ # A helper class to stats from /proc/<pid>/stat
11
+ #
12
+ # See: man 5 proc
13
+ #
14
+ # It takes a pid
15
+ class ProcessStats
16
+ def initialize(pid)
17
+ @pid = pid
18
+ @user_hertz = retrieve_user_hertz
19
+ @stats = populate_info
20
+ end
21
+
22
+ def valid?
23
+ !@stats.nil?
24
+ end
25
+
26
+ def cpu_time
27
+ (@stats[14].to_i + @stats[15].to_i) / @user_hertz
28
+ end
29
+
30
+ def start_time
31
+ @stats[22].to_i / @user_hertz
32
+ end
33
+
34
+ def vsize
35
+ # Virtual memory size in bytes.
36
+ @stats[23].to_i
37
+ end
38
+
39
+ def rss
40
+ # Resident Set Size: number of pages the process has in real memory.
41
+ @stats[24].to_i * 4096
42
+ end
43
+
44
+ private
45
+
46
+ def populate_info
47
+ # Pad the array by one element to make field numbers match the man page.
48
+ [""].concat(File.read("/proc/#{@pid}/stat").split(" "))
49
+ rescue Errno::ENOENT
50
+ nil
51
+ end
52
+
53
+ def retrieve_user_hertz
54
+ Process.clock_getres(:TIMES_BASED_CLOCK_PROCESS_CPUTIME_ID, :hertz)
55
+ rescue Errno::EINVAL
56
+ 100.0
57
+ end
58
+ end
59
+
60
+ # Probes a process for info then writes metrics to a target
61
+ class ProcessProber
62
+ def initialize(options, metrics: PrometheusMetrics.new)
63
+ @metrics = metrics
64
+ @name = options[:name]
65
+ @pids = if options[:pid_or_pattern] =~ /^\d+$/
66
+ [options[:pid_or_pattern]]
67
+ else
68
+ Utils.pgrep(options[:pid_or_pattern])
69
+ end
70
+ @use_quantiles = options.fetch(:quantiles, false)
71
+ end
72
+
73
+ def probe_stat
74
+ @pids.each do |pid|
75
+ stats = ProcessStats.new(pid)
76
+ next unless stats.valid?
77
+
78
+ labels = { name: @name.downcase }
79
+ labels[:pid] = pid unless @use_quantiles
80
+
81
+ @metrics.add("process_cpu_seconds_total", stats.cpu_time, @use_quantiles, **labels)
82
+ @metrics.add("process_resident_memory_bytes", stats.rss, @use_quantiles, **labels)
83
+ @metrics.add("process_virtual_memory_bytes", stats.vsize, @use_quantiles, **labels)
84
+ @metrics.add("process_start_time_seconds", stats.start_time, @use_quantiles, **labels)
85
+ end
86
+
87
+ self
88
+ end
89
+
90
+ def probe_count
91
+ @metrics.add("process_count", @pids.count, name: @name.downcase)
92
+
93
+ self
94
+ end
95
+
96
+ def probe_smaps
97
+ @pids.each do |pid|
98
+ stats = ::GitLab::Exporter::MemStats::Aggregator.new(pid)
99
+
100
+ next unless stats.valid?
101
+
102
+ labels = { name: @name.downcase }
103
+ labels[:pid] = pid unless @use_quantiles
104
+
105
+ ::GitLab::Exporter::MemStats::Mapping::FIELDS.each do |field|
106
+ value = stats.totals[field]
107
+
108
+ if value >= 0
109
+ @metrics.add("process_smaps_#{field}_bytes", value * 1024, @use_quantiles, **labels)
110
+ end
111
+ end
112
+ end
113
+
114
+ self
115
+ end
116
+
117
+ def write_to(target)
118
+ target.write(@metrics.to_s)
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,64 @@
1
+ require "quantile"
2
+
3
+ module GitLab
4
+ module Exporter
5
+ # Prometheus metrics container
6
+ #
7
+ # Provides a simple API to `add` metrics and then turn them `to_s` which will just
8
+ # dump all the metrics in prometheus format
9
+ #
10
+ # The add method also can take any arbitrary amount of labels in a `key: value` format.
11
+ class PrometheusMetrics
12
+ def initialize(include_timestamp: true)
13
+ @metrics = Hash.new { |h, k| h[k] = [] }
14
+ @quantiles = Hash.new { |h, k| h[k] = [] }
15
+ @include_timestamp = include_timestamp
16
+ end
17
+
18
+ def add(name, value, quantile = false, **labels)
19
+ if quantile
20
+ @quantiles[{ name: name, labels: labels }] << value
21
+ else
22
+ @metrics[name] << { value: value, labels: labels, timestamp: (Time.now.to_f * 1000).to_i }
23
+ end
24
+
25
+ self
26
+ end
27
+
28
+ def to_s
29
+ add_quantiles_to_metrics
30
+
31
+ buffer = ""
32
+ @metrics.each do |name, measurements|
33
+ measurements.each do |measurement|
34
+ buffer << name.to_s
35
+ labels = (measurement[:labels] || {}).map { |label, value| "#{label}=\"#{value}\"" }.join(",")
36
+ buffer << "{#{labels}}" unless labels.empty?
37
+ buffer << " #{measurement[:value]}"
38
+ buffer << " #{measurement[:timestamp]}" if @include_timestamp
39
+ buffer << "\n"
40
+ end
41
+ end
42
+ buffer
43
+ end
44
+
45
+ private
46
+
47
+ def add_quantiles_to_metrics
48
+ @quantiles.each do |data, measurements|
49
+ estimator = Quantile::Estimator.new
50
+
51
+ measurements.each do |value|
52
+ estimator.observe(value)
53
+ end
54
+
55
+ estimator.invariants.each do |invariant|
56
+ data[:labels][:quantile] = "#{(invariant.quantile * 100).to_i}th"
57
+
58
+ add(data[:name], estimator.query(invariant.quantile), **data[:labels])
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,171 @@
1
+ require "sidekiq/api"
2
+ require "digest"
3
+
4
+ module GitLab
5
+ module Exporter
6
+ # A prober for Sidekiq queues
7
+ #
8
+ # It takes the Redis URL Sidekiq is connected to
9
+ class SidekiqProber
10
+ QUEUE_JOB_STATS_SCRIPT = File.read(File.expand_path("#{__FILE__}/../sidekiq_queue_job_stats.lua")).freeze
11
+ QUEUE_JOB_STATS_SHA = Digest::SHA1.hexdigest(QUEUE_JOB_STATS_SCRIPT).freeze
12
+
13
+ def initialize(opts, metrics: PrometheusMetrics.new)
14
+ @opts = opts
15
+ @metrics = metrics
16
+
17
+ Sidekiq.configure_client do |config|
18
+ config.redis = redis_options
19
+ end
20
+
21
+ ensure_queue_job_stats_script_loaded
22
+ end
23
+
24
+ def probe_stats
25
+ return self unless connected?
26
+
27
+ stats = Sidekiq::Stats.new
28
+
29
+ @metrics.add("sidekiq_jobs_processed_total", stats.processed)
30
+ @metrics.add("sidekiq_jobs_failed_total", stats.failed)
31
+ @metrics.add("sidekiq_jobs_enqueued_size", stats.enqueued)
32
+ @metrics.add("sidekiq_jobs_scheduled_size", stats.scheduled_size)
33
+ @metrics.add("sidekiq_jobs_retry_size", stats.retry_size)
34
+ @metrics.add("sidekiq_jobs_dead_size", stats.dead_size)
35
+
36
+ @metrics.add("sidekiq_default_queue_latency_seconds", stats.default_queue_latency)
37
+ @metrics.add("sidekiq_processes_size", stats.processes_size)
38
+ @metrics.add("sidekiq_workers_size", stats.workers_size)
39
+
40
+ self
41
+ end
42
+
43
+ def probe_queues
44
+ return self unless connected?
45
+
46
+ Sidekiq::Queue.all.each do |queue|
47
+ @metrics.add("sidekiq_queue_size", queue.size, name: queue.name)
48
+ @metrics.add("sidekiq_queue_latency_seconds", queue.latency, name: queue.name)
49
+ @metrics.add("sidekiq_queue_paused", queue.paused? ? 1 : 0, name: queue.name)
50
+ end
51
+
52
+ self
53
+ end
54
+
55
+ def probe_jobs # rubocop:disable Metrics/MethodLength
56
+ return self unless connected?
57
+
58
+ job_stats = {}
59
+
60
+ Sidekiq::Queue.all.each do |queue|
61
+ begin
62
+ Sidekiq.redis do |conn|
63
+ stats = conn.evalsha(QUEUE_JOB_STATS_SHA, ["queue:#{queue.name}"])
64
+ job_stats.merge!(stats.to_h)
65
+ end
66
+ rescue Redis::CommandError # Could happen if the script exceeded the maximum run time (5 seconds by default)
67
+ # FIXME: Should we call SCRIPT KILL?
68
+ return self
69
+ end
70
+ end
71
+
72
+ job_stats.each do |class_name, count|
73
+ @metrics.add("sidekiq_enqueued_jobs", count, name: class_name)
74
+ end
75
+
76
+ self
77
+ end
78
+
79
+ def probe_workers
80
+ return self unless connected?
81
+
82
+ worker_stats = Hash.new(0)
83
+
84
+ Sidekiq::Workers.new.map do |_pid, _tid, work|
85
+ job_klass = work["payload"]["class"]
86
+
87
+ worker_stats[job_klass] += 1
88
+ end
89
+
90
+ worker_stats.each do |class_name, count|
91
+ @metrics.add("sidekiq_running_jobs", count, name: class_name)
92
+ end
93
+
94
+ self
95
+ end
96
+
97
+ def probe_retries
98
+ return self unless connected?
99
+
100
+ retry_stats = Hash.new(0)
101
+
102
+ Sidekiq::RetrySet.new.map do |job|
103
+ retry_stats[job.klass] += 1
104
+ end
105
+
106
+ retry_stats.each do |class_name, count|
107
+ @metrics.add("sidekiq_to_be_retried_jobs", count, name: class_name)
108
+ end
109
+
110
+ self
111
+ end
112
+
113
+ def probe_dead
114
+ puts "[DEPRECATED] probe_dead is now considered obsolete and will be removed in future major versions,"\
115
+ " please use probe_stats instead"
116
+
117
+ return self unless connected?
118
+
119
+ @metrics.add("sidekiq_dead_jobs", Sidekiq::Stats.new.dead_size)
120
+
121
+ self
122
+ end
123
+
124
+ def write_to(target)
125
+ target.write(@metrics.to_s)
126
+ end
127
+
128
+ private
129
+
130
+ def redis_options
131
+ options = {
132
+ url: @opts[:redis_url],
133
+ namespace: "resque:gitlab",
134
+ connect_timeout: 1,
135
+ reconnect_attempts: 0
136
+ }
137
+
138
+ options[:id] = nil unless redis_enable_client?
139
+ options
140
+ end
141
+
142
+ def redis_enable_client?
143
+ return true if @opts[:redis_enable_client].nil?
144
+
145
+ @opts[:redis_enable_client]
146
+ end
147
+
148
+ def connected?
149
+ @connected ||= begin
150
+ Sidekiq.redis do |conn|
151
+ conn.get("foo")
152
+ end
153
+ true
154
+ end
155
+ rescue Redis::CannotConnectError, Redis::TimeoutError # rubocop:disable Lint/HandleExceptions
156
+ # Maybe we're trying connecting to a slave
157
+ end
158
+
159
+ def ensure_queue_job_stats_script_loaded
160
+ return unless connected?
161
+
162
+ Sidekiq.redis do |conn|
163
+ # Using administrative commands on conn directly (which is a Redis::Namespace)
164
+ # will be removed in redis-namespace 2.0.
165
+ next if conn.redis.script(:exists, QUEUE_JOB_STATS_SHA)
166
+ conn.redis.script(:load, QUEUE_JOB_STATS_SCRIPT)
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end