gitlab-exporter 10.4.0 → 11.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ace0ad673d8d36d1a6dbbacefb7f6aefde7d795e76eda8d3788c4cb4f5eb5247
4
- data.tar.gz: ac42d2e6deaaa3fee45ed74ec1cec81f402abba0c4f14798878b2b006c72400b
3
+ metadata.gz: b3eef01acf595b16deb86b2578776e3487b513865fca93ede7684ba982932706
4
+ data.tar.gz: e5194e0230ac9672b9c30fbd1bf1be3b11333d894a21505dd97e39891f72b960
5
5
  SHA512:
6
- metadata.gz: d3ac36c2b2ec584cb8ac60355649eb79d5a3d96e85a266f14794627fc3a9fa61a5cfb231f6c33e044cbb2e71d7965c50463b6673f80c69de4ebca58d658280e6
7
- data.tar.gz: 43323ec5663db9dd865e84b3c7907ba43f805a958904cfef57d7566b6259a31cc1a7aa96c494ffb976b4b04148c2fbfe63bd688844672ad1938c41bf05181a4b
6
+ metadata.gz: 60c79e4a4e550adf557129aa278986726370e21f77ca5f67864f23fbb181f5c904d02f5015d51d8317c748b9a72b74cfd59a5f14b4a0af0ffce6b03883ceef4a
7
+ data.tar.gz: 5bd99ea95151fd95a5f72597474ec6c81e3fd9e1f0284f5d85b7595257f24cddde5b0a315a6705a6258b8b7d0ebcbe5178f6816978ebc3b2c47a435578d58ee0
data/.gitlab-ci.yml CHANGED
@@ -6,6 +6,10 @@ include:
6
6
  - template: Security/SAST.gitlab-ci.yml # https://gitlab.com/gitlab-org/gitlab-foss/blob/master/lib/gitlab/ci/templates/Security/SAST.gitlab-ci.yml
7
7
  - template: Security/Secret-Detection.gitlab-ci.yml # https://gitlab.com/gitlab-org/gitlab-foss/-/blob/master/lib/gitlab/ci/templates/Security/Secret-Detection.gitlab-ci.yml
8
8
 
9
+ stages:
10
+ - test
11
+ - dast
12
+
9
13
  default:
10
14
  image: ruby:2.7
11
15
  cache:
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- gitlab-exporter (10.4.0)
4
+ gitlab-exporter (11.1.0)
5
5
  connection_pool (= 2.2.5)
6
6
  pg (= 1.2.3)
7
7
  puma (= 5.3.2)
data/README.md CHANGED
@@ -34,7 +34,7 @@ metrics.
34
34
  `git_pull_time_milliseconds`, `git_push_time_milliseconds`
35
35
  * git processes stats (see Process below)
36
36
  1. [Sidekiq](lib/gitlab_exporter/sidekiq.rb)
37
- * Stats
37
+ * Stats (probe_stats)
38
38
  * `sidekiq_jobs_processed_total`
39
39
  * `sidekiq_jobs_failed_total`
40
40
  * `sidekiq_jobs_enqueued_size`
@@ -44,14 +44,21 @@ metrics.
44
44
  * `sidekiq_default_queue_latency_seconds`
45
45
  * `sidekiq_processes_size`
46
46
  * `sidekiq_workers_size`
47
- * Queues
47
+ * Queues (probe_queues)
48
48
  * `sidekiq_queue_size`
49
49
  * `sidekiq_queue_paused`
50
50
  * `sidekiq_queue_latency_seconds`
51
- * Jobs
51
+ * Jobs (probe_jobs_limit)
52
52
  * `sidekiq_enqueued_jobs`
53
+ * Workers (probe_workers)
53
54
  * `sidekiq_running_jobs`
55
+ * Retries (probe_retries)
54
56
  * `sidekiq_to_be_retried_jobs`
57
+ * Future Sets (probe_future_sets)
58
+ * `sidekiq_schedule_set_processing_delay_seconds`
59
+ * `sidekiq_schedule_set_backlog_count`
60
+ * `sidekiq_retry_set_processing_delay_seconds`
61
+ * `sidekiq_retry_set_backlog_count`
55
62
 
56
63
  ### Setup with GitLab Development Kit
57
64
 
@@ -78,8 +78,6 @@ probes:
78
78
  opts:
79
79
  - pid_or_pattern: "sidekiq .* \\[.*?\\]"
80
80
  name: sidekiq
81
- - pid_or_pattern: "unicorn.* worker\\[.*?\\]"
82
- name: unicorn
83
81
  - pid_or_pattern: "git-upload-pack --stateless-rpc"
84
82
  name: git_upload_pack
85
83
  quantiles: true
@@ -217,7 +217,7 @@ module GitLab
217
217
  opts.on("--pid=123", "Process ID") do |val|
218
218
  @pid = val
219
219
  end
220
- opts.on("--pattern=unicorn", "Process command pattern") do |val|
220
+ opts.on("--pattern=worker", "Process command pattern") do |val|
221
221
  @pattern = val
222
222
  end
223
223
  opts.on("--name=NAME", "Process name to be used in metrics") do |val|
@@ -275,7 +275,7 @@ module GitLab
275
275
  ::GitLab::Exporter::SidekiqProber.new(redis_url: @redis_url)
276
276
  .probe_stats
277
277
  .probe_queues
278
- .probe_jobs
278
+ .probe_jobs_limit
279
279
  .probe_workers
280
280
  .probe_retries
281
281
  .write_to(@target)
@@ -15,6 +15,21 @@ module GitLab
15
15
  @include_timestamp = include_timestamp
16
16
  end
17
17
 
18
+ class << self
19
+ def describe(name, description)
20
+ @metric_descriptions ||= {}
21
+ @metric_descriptions[name] = description
22
+ end
23
+
24
+ def description(name)
25
+ @metric_descriptions && @metric_descriptions[name]
26
+ end
27
+
28
+ def clear_descriptions
29
+ @metric_descriptions = {}
30
+ end
31
+ end
32
+
18
33
  def add(name, value, quantile = false, **labels)
19
34
  fail "value '#{value}' must be a number" unless value.is_a?(Numeric)
20
35
 
@@ -32,6 +47,8 @@ module GitLab
32
47
 
33
48
  buffer = ""
34
49
  @metrics.each do |name, measurements|
50
+ buffer << "# HELP #{name} #{self.class.description(name)}\n" if self.class.description(name)
51
+
35
52
  measurements.each do |measurement|
36
53
  buffer << name.to_s
37
54
  labels = (measurement[:labels] || {}).map { |label, value| "#{label}=\"#{value}\"" }.join(",")
@@ -1,4 +1,5 @@
1
1
  require "sidekiq/api"
2
+ require "sidekiq/scheduled"
2
3
  require "digest"
3
4
 
4
5
  module GitLab
@@ -7,8 +8,12 @@ module GitLab
7
8
  #
8
9
  # It takes the Redis URL Sidekiq is connected to
9
10
  class SidekiqProber
10
- QUEUE_JOB_STATS_SCRIPT = File.read(File.expand_path("#{__FILE__}/../sidekiq_queue_job_stats.lua")).freeze
11
- QUEUE_JOB_STATS_SHA = Digest::SHA1.hexdigest(QUEUE_JOB_STATS_SCRIPT).freeze
11
+ # The maximum depth (from the head) of each queue to probe. Probing the
12
+ # entirety of a very large queue will take longer and run the risk of
13
+ # timing out. But when we have a very large queue, we are most in need of
14
+ # reliable metrics. This trades off completeness for predictability by
15
+ # only taking a limited amount of items from the head of the queue.
16
+ PROBE_JOBS_LIMIT = 1_000
12
17
 
13
18
  POOL_SIZE = 3
14
19
 
@@ -17,6 +22,9 @@ module GitLab
17
22
  # needed to be re-initialized
18
23
  POOL_TIMEOUT = 90
19
24
 
25
+ PrometheusMetrics.describe("sidekiq_enqueued_jobs",
26
+ "Total number of jobs enqueued by class name. Only inspects the first #{PROBE_JOBS_LIMIT} jobs per queue.") # rubocop:disable Layout/LineLength
27
+
20
28
  def self.connection_pool
21
29
  @@connection_pool ||= Hash.new do |h, connection_hash| # rubocop:disable Style/ClassVars
22
30
  config = connection_hash.merge(pool_timeout: POOL_TIMEOUT, size: POOL_SIZE)
@@ -63,17 +71,51 @@ module GitLab
63
71
  end
64
72
 
65
73
  def probe_jobs
74
+ puts "[REMOVED] probe_jobs is now considered obsolete and does not emit any metrics,"\
75
+ " please use probe_jobs_limit instead"
76
+
77
+ self
78
+ end
79
+
80
+ def probe_future_sets
81
+ now = Time.now.to_f
82
+ with_sidekiq do
83
+ Sidekiq.redis do |conn|
84
+ Sidekiq::Scheduled::SETS.each do |set|
85
+ # Default to 0; if all jobs are due in the future, there is no "negative" delay.
86
+ delay = 0
87
+
88
+ _job, timestamp = conn.zrangebyscore(set, "-inf", now.to_s, limit: [0, 1], withscores: true).first
89
+ delay = now - timestamp if timestamp
90
+
91
+ @metrics.add("sidekiq_#{set}_set_processing_delay_seconds", delay)
92
+
93
+ # zcount is O(log(N)) (prob. binary search), so is still quick even with large sets
94
+ @metrics.add("sidekiq_#{set}_set_backlog_count",
95
+ conn.zcount(set, "-inf", now.to_s))
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ # Count worker classes present in Sidekiq queues. This only looks at the
102
+ # first PROBE_JOBS_LIMIT jobs in each queue. This means that we run a
103
+ # single LRANGE command for each queue, which does not block other
104
+ # commands. For queues over PROBE_JOBS_LIMIT in size, this means that we
105
+ # will not have completely accurate statistics, but the probe performance
106
+ # will also not degrade as the queue gets larger.
107
+ def probe_jobs_limit
66
108
  with_sidekiq do
67
- job_stats = {}
109
+ job_stats = Hash.new(0)
68
110
 
69
111
  Sidekiq::Queue.all.each do |queue|
70
112
  Sidekiq.redis do |conn|
71
- stats = conn.evalsha(QUEUE_JOB_STATS_SHA, ["queue:#{queue.name}"])
72
- job_stats.merge!(stats.to_h)
113
+ conn.lrange("queue:#{queue.name}", 0, PROBE_JOBS_LIMIT).each do |job|
114
+ job_class = Sidekiq.load_json(job)["class"]
115
+
116
+ job_stats[job_class] += 1
117
+ end
73
118
  end
74
- rescue Redis::CommandError # Could happen if the script exceeded the maximum run time (5 seconds by default)
75
- # FIXME: Should we call SCRIPT KILL?
76
- return self
77
119
  end
78
120
 
79
121
  job_stats.each do |class_name, count|
@@ -169,14 +211,9 @@ module GitLab
169
211
  def connected?
170
212
  return @connected unless @connected.nil?
171
213
 
172
- # This is also a good "connected check"
173
214
  Sidekiq.redis do |conn|
174
- # Using administrative commands on conn directly (which is a Redis::Namespace)
175
- # will be removed in redis-namespace 2.0.
176
- conn.redis.script(:load, QUEUE_JOB_STATS_SCRIPT) unless conn.redis.script(:exists, QUEUE_JOB_STATS_SHA)
215
+ @connected = (conn.ping == "PONG")
177
216
  end
178
-
179
- @connected = true
180
217
  rescue Redis::BaseConnectionError => e
181
218
  @logger&.error "Error connecting to the Redis: #{e}"
182
219
  @connected = false
@@ -1,5 +1,5 @@
1
1
  module GitLab
2
2
  module Exporter
3
- VERSION = "10.4.0".freeze
3
+ VERSION = "11.1.0".freeze
4
4
  end
5
5
  end
@@ -23,4 +23,21 @@ describe GitLab::Exporter::PrometheusMetrics do
23
23
  subject.add("mymetric", "invalid", mylabel: "x", myotherlabel: "y").to_s
24
24
  }.to raise_error(RuntimeError)
25
25
  end
26
+
27
+ it "supports described metrics" do
28
+ time = Time.now
29
+
30
+ allow(Time).to receive(:now).and_return(time)
31
+
32
+ described_class.describe("mymetric", "description")
33
+ described_class.describe("missingmetric", "otherdescription")
34
+ subject.add("mymetric", 1.3, mylabel: "x", myotherlabel: "y")
35
+
36
+ expect(subject.to_s).to eq(<<~METRICS)
37
+ # HELP mymetric description
38
+ mymetric{mylabel="x",myotherlabel="y"} 1.3 #{(time.to_f * 1000).to_i}
39
+ METRICS
40
+
41
+ described_class.clear_descriptions
42
+ end
26
43
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-exporter
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.4.0
4
+ version: 11.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pablo Carranza
@@ -189,7 +189,6 @@ files:
189
189
  - lib/gitlab_exporter/prometheus.rb
190
190
  - lib/gitlab_exporter/ruby.rb
191
191
  - lib/gitlab_exporter/sidekiq.rb
192
- - lib/gitlab_exporter/sidekiq_queue_job_stats.lua
193
192
  - lib/gitlab_exporter/util.rb
194
193
  - lib/gitlab_exporter/version.rb
195
194
  - lib/gitlab_exporter/web_exporter.rb
@@ -1,42 +0,0 @@
1
- --
2
- -- Adapted from https://github.com/mperham/sidekiq/blob/2f9258e4fe77991c526f7a65c92bcf792eef8338/lib/sidekiq/api.rb#L231
3
- --
4
- local queue_name = KEYS[1]
5
- local initial_size = redis.call('llen', queue_name)
6
- local deleted_size = 0
7
- local page = 0
8
- local page_size = 2000
9
- local temp_job_stats = {}
10
- local final_job_stats = {}
11
-
12
- while true do
13
- local range_start = page * page_size - deleted_size
14
- local range_end = range_start + page_size - 1
15
- local entries = redis.call('lrange', queue_name, range_start, range_end)
16
-
17
- if #entries == 0 then
18
- break
19
- end
20
-
21
- page = page + 1
22
-
23
- for index, entry in next, entries do
24
- local class = cjson.decode(entry)['class']
25
- if class ~= nil then
26
- if temp_job_stats[class] ~= nil then
27
- temp_job_stats[class] = temp_job_stats[class] + 1
28
- else
29
- temp_job_stats[class] = 1
30
- end
31
- end
32
- end
33
-
34
- deleted_size = initial_size - redis.call('llen', queue_name)
35
- end
36
-
37
- for class, count in next, temp_job_stats do
38
- local stat_entry = {class, count}
39
- table.insert(final_job_stats, stat_entry)
40
- end
41
-
42
- return final_job_stats