gitlab-exporter 10.4.0 → 11.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ace0ad673d8d36d1a6dbbacefb7f6aefde7d795e76eda8d3788c4cb4f5eb5247
4
- data.tar.gz: ac42d2e6deaaa3fee45ed74ec1cec81f402abba0c4f14798878b2b006c72400b
3
+ metadata.gz: b3eef01acf595b16deb86b2578776e3487b513865fca93ede7684ba982932706
4
+ data.tar.gz: e5194e0230ac9672b9c30fbd1bf1be3b11333d894a21505dd97e39891f72b960
5
5
  SHA512:
6
- metadata.gz: d3ac36c2b2ec584cb8ac60355649eb79d5a3d96e85a266f14794627fc3a9fa61a5cfb231f6c33e044cbb2e71d7965c50463b6673f80c69de4ebca58d658280e6
7
- data.tar.gz: 43323ec5663db9dd865e84b3c7907ba43f805a958904cfef57d7566b6259a31cc1a7aa96c494ffb976b4b04148c2fbfe63bd688844672ad1938c41bf05181a4b
6
+ metadata.gz: 60c79e4a4e550adf557129aa278986726370e21f77ca5f67864f23fbb181f5c904d02f5015d51d8317c748b9a72b74cfd59a5f14b4a0af0ffce6b03883ceef4a
7
+ data.tar.gz: 5bd99ea95151fd95a5f72597474ec6c81e3fd9e1f0284f5d85b7595257f24cddde5b0a315a6705a6258b8b7d0ebcbe5178f6816978ebc3b2c47a435578d58ee0
data/.gitlab-ci.yml CHANGED
@@ -6,6 +6,10 @@ include:
6
6
  - template: Security/SAST.gitlab-ci.yml # https://gitlab.com/gitlab-org/gitlab-foss/blob/master/lib/gitlab/ci/templates/Security/SAST.gitlab-ci.yml
7
7
  - template: Security/Secret-Detection.gitlab-ci.yml # https://gitlab.com/gitlab-org/gitlab-foss/-/blob/master/lib/gitlab/ci/templates/Security/Secret-Detection.gitlab-ci.yml
8
8
 
9
+ stages:
10
+ - test
11
+ - dast
12
+
9
13
  default:
10
14
  image: ruby:2.7
11
15
  cache:
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- gitlab-exporter (10.4.0)
4
+ gitlab-exporter (11.1.0)
5
5
  connection_pool (= 2.2.5)
6
6
  pg (= 1.2.3)
7
7
  puma (= 5.3.2)
data/README.md CHANGED
@@ -34,7 +34,7 @@ metrics.
34
34
  `git_pull_time_milliseconds`, `git_push_time_milliseconds`
35
35
  * git processes stats (see Process below)
36
36
  1. [Sidekiq](lib/gitlab_exporter/sidekiq.rb)
37
- * Stats
37
+ * Stats (probe_stats)
38
38
  * `sidekiq_jobs_processed_total`
39
39
  * `sidekiq_jobs_failed_total`
40
40
  * `sidekiq_jobs_enqueued_size`
@@ -44,14 +44,21 @@ metrics.
44
44
  * `sidekiq_default_queue_latency_seconds`
45
45
  * `sidekiq_processes_size`
46
46
  * `sidekiq_workers_size`
47
- * Queues
47
+ * Queues (probe_queues)
48
48
  * `sidekiq_queue_size`
49
49
  * `sidekiq_queue_paused`
50
50
  * `sidekiq_queue_latency_seconds`
51
- * Jobs
51
+ * Jobs (probe_jobs_limit)
52
52
  * `sidekiq_enqueued_jobs`
53
+ * Workers (probe_workers)
53
54
  * `sidekiq_running_jobs`
55
+ * Retries (probe_retries)
54
56
  * `sidekiq_to_be_retried_jobs`
57
+ * Future Sets (probe_future_sets)
58
+ * `sidekiq_schedule_set_processing_delay_seconds`
59
+ * `sidekiq_schedule_set_backlog_count`
60
+ * `sidekiq_retry_set_processing_delay_seconds`
61
+ * `sidekiq_retry_set_backlog_count`
55
62
 
56
63
  ### Setup with GitLab Development Kit
57
64
 
@@ -78,8 +78,6 @@ probes:
78
78
  opts:
79
79
  - pid_or_pattern: "sidekiq .* \\[.*?\\]"
80
80
  name: sidekiq
81
- - pid_or_pattern: "unicorn.* worker\\[.*?\\]"
82
- name: unicorn
83
81
  - pid_or_pattern: "git-upload-pack --stateless-rpc"
84
82
  name: git_upload_pack
85
83
  quantiles: true
@@ -217,7 +217,7 @@ module GitLab
217
217
  opts.on("--pid=123", "Process ID") do |val|
218
218
  @pid = val
219
219
  end
220
- opts.on("--pattern=unicorn", "Process command pattern") do |val|
220
+ opts.on("--pattern=worker", "Process command pattern") do |val|
221
221
  @pattern = val
222
222
  end
223
223
  opts.on("--name=NAME", "Process name to be used in metrics") do |val|
@@ -275,7 +275,7 @@ module GitLab
275
275
  ::GitLab::Exporter::SidekiqProber.new(redis_url: @redis_url)
276
276
  .probe_stats
277
277
  .probe_queues
278
- .probe_jobs
278
+ .probe_jobs_limit
279
279
  .probe_workers
280
280
  .probe_retries
281
281
  .write_to(@target)
@@ -15,6 +15,21 @@ module GitLab
15
15
  @include_timestamp = include_timestamp
16
16
  end
17
17
 
18
+ class << self
19
+ def describe(name, description)
20
+ @metric_descriptions ||= {}
21
+ @metric_descriptions[name] = description
22
+ end
23
+
24
+ def description(name)
25
+ @metric_descriptions && @metric_descriptions[name]
26
+ end
27
+
28
+ def clear_descriptions
29
+ @metric_descriptions = {}
30
+ end
31
+ end
32
+
18
33
  def add(name, value, quantile = false, **labels)
19
34
  fail "value '#{value}' must be a number" unless value.is_a?(Numeric)
20
35
 
@@ -32,6 +47,8 @@ module GitLab
32
47
 
33
48
  buffer = ""
34
49
  @metrics.each do |name, measurements|
50
+ buffer << "# HELP #{name} #{self.class.description(name)}\n" if self.class.description(name)
51
+
35
52
  measurements.each do |measurement|
36
53
  buffer << name.to_s
37
54
  labels = (measurement[:labels] || {}).map { |label, value| "#{label}=\"#{value}\"" }.join(",")
@@ -1,4 +1,5 @@
1
1
  require "sidekiq/api"
2
+ require "sidekiq/scheduled"
2
3
  require "digest"
3
4
 
4
5
  module GitLab
@@ -7,8 +8,12 @@ module GitLab
7
8
  #
8
9
  # It takes the Redis URL Sidekiq is connected to
9
10
  class SidekiqProber
10
- QUEUE_JOB_STATS_SCRIPT = File.read(File.expand_path("#{__FILE__}/../sidekiq_queue_job_stats.lua")).freeze
11
- QUEUE_JOB_STATS_SHA = Digest::SHA1.hexdigest(QUEUE_JOB_STATS_SCRIPT).freeze
11
+ # The maximum depth (from the head) of each queue to probe. Probing the
12
+ # entirety of a very large queue will take longer and run the risk of
13
+ # timing out. But when we have a very large queue, we are most in need of
14
+ # reliable metrics. This trades off completeness for predictability by
15
+ # only taking a limited amount of items from the head of the queue.
16
+ PROBE_JOBS_LIMIT = 1_000
12
17
 
13
18
  POOL_SIZE = 3
14
19
 
@@ -17,6 +22,9 @@ module GitLab
17
22
  # needed to be re-initialized
18
23
  POOL_TIMEOUT = 90
19
24
 
25
+ PrometheusMetrics.describe("sidekiq_enqueued_jobs",
26
+ "Total number of jobs enqueued by class name. Only inspects the first #{PROBE_JOBS_LIMIT} jobs per queue.") # rubocop:disable Layout/LineLength
27
+
20
28
  def self.connection_pool
21
29
  @@connection_pool ||= Hash.new do |h, connection_hash| # rubocop:disable Style/ClassVars
22
30
  config = connection_hash.merge(pool_timeout: POOL_TIMEOUT, size: POOL_SIZE)
@@ -63,17 +71,51 @@ module GitLab
63
71
  end
64
72
 
65
73
  def probe_jobs
74
+ puts "[REMOVED] probe_jobs is now considered obsolete and does not emit any metrics,"\
75
+ " please use probe_jobs_limit instead"
76
+
77
+ self
78
+ end
79
+
80
+ def probe_future_sets
81
+ now = Time.now.to_f
82
+ with_sidekiq do
83
+ Sidekiq.redis do |conn|
84
+ Sidekiq::Scheduled::SETS.each do |set|
85
+ # Default to 0; if all jobs are due in the future, there is no "negative" delay.
86
+ delay = 0
87
+
88
+ _job, timestamp = conn.zrangebyscore(set, "-inf", now.to_s, limit: [0, 1], withscores: true).first
89
+ delay = now - timestamp if timestamp
90
+
91
+ @metrics.add("sidekiq_#{set}_set_processing_delay_seconds", delay)
92
+
93
+ # zcount is O(log(N)) (prob. binary search), so is still quick even with large sets
94
+ @metrics.add("sidekiq_#{set}_set_backlog_count",
95
+ conn.zcount(set, "-inf", now.to_s))
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ # Count worker classes present in Sidekiq queues. This only looks at the
102
+ # first PROBE_JOBS_LIMIT jobs in each queue. This means that we run a
103
+ # single LRANGE command for each queue, which does not block other
104
+ # commands. For queues over PROBE_JOBS_LIMIT in size, this means that we
105
+ # will not have completely accurate statistics, but the probe performance
106
+ # will also not degrade as the queue gets larger.
107
+ def probe_jobs_limit
66
108
  with_sidekiq do
67
- job_stats = {}
109
+ job_stats = Hash.new(0)
68
110
 
69
111
  Sidekiq::Queue.all.each do |queue|
70
112
  Sidekiq.redis do |conn|
71
- stats = conn.evalsha(QUEUE_JOB_STATS_SHA, ["queue:#{queue.name}"])
72
- job_stats.merge!(stats.to_h)
113
+ conn.lrange("queue:#{queue.name}", 0, PROBE_JOBS_LIMIT).each do |job|
114
+ job_class = Sidekiq.load_json(job)["class"]
115
+
116
+ job_stats[job_class] += 1
117
+ end
73
118
  end
74
- rescue Redis::CommandError # Could happen if the script exceeded the maximum run time (5 seconds by default)
75
- # FIXME: Should we call SCRIPT KILL?
76
- return self
77
119
  end
78
120
 
79
121
  job_stats.each do |class_name, count|
@@ -169,14 +211,9 @@ module GitLab
169
211
  def connected?
170
212
  return @connected unless @connected.nil?
171
213
 
172
- # This is also a good "connected check"
173
214
  Sidekiq.redis do |conn|
174
- # Using administrative commands on conn directly (which is a Redis::Namespace)
175
- # will be removed in redis-namespace 2.0.
176
- conn.redis.script(:load, QUEUE_JOB_STATS_SCRIPT) unless conn.redis.script(:exists, QUEUE_JOB_STATS_SHA)
215
+ @connected = (conn.ping == "PONG")
177
216
  end
178
-
179
- @connected = true
180
217
  rescue Redis::BaseConnectionError => e
181
218
  @logger&.error "Error connecting to the Redis: #{e}"
182
219
  @connected = false
@@ -1,5 +1,5 @@
1
1
  module GitLab
2
2
  module Exporter
3
- VERSION = "10.4.0".freeze
3
+ VERSION = "11.1.0".freeze
4
4
  end
5
5
  end
@@ -23,4 +23,21 @@ describe GitLab::Exporter::PrometheusMetrics do
23
23
  subject.add("mymetric", "invalid", mylabel: "x", myotherlabel: "y").to_s
24
24
  }.to raise_error(RuntimeError)
25
25
  end
26
+
27
+ it "supports described metrics" do
28
+ time = Time.now
29
+
30
+ allow(Time).to receive(:now).and_return(time)
31
+
32
+ described_class.describe("mymetric", "description")
33
+ described_class.describe("missingmetric", "otherdescription")
34
+ subject.add("mymetric", 1.3, mylabel: "x", myotherlabel: "y")
35
+
36
+ expect(subject.to_s).to eq(<<~METRICS)
37
+ # HELP mymetric description
38
+ mymetric{mylabel="x",myotherlabel="y"} 1.3 #{(time.to_f * 1000).to_i}
39
+ METRICS
40
+
41
+ described_class.clear_descriptions
42
+ end
26
43
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-exporter
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.4.0
4
+ version: 11.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pablo Carranza
@@ -189,7 +189,6 @@ files:
189
189
  - lib/gitlab_exporter/prometheus.rb
190
190
  - lib/gitlab_exporter/ruby.rb
191
191
  - lib/gitlab_exporter/sidekiq.rb
192
- - lib/gitlab_exporter/sidekiq_queue_job_stats.lua
193
192
  - lib/gitlab_exporter/util.rb
194
193
  - lib/gitlab_exporter/version.rb
195
194
  - lib/gitlab_exporter/web_exporter.rb
@@ -1,42 +0,0 @@
1
- --
2
- -- Adapted from https://github.com/mperham/sidekiq/blob/2f9258e4fe77991c526f7a65c92bcf792eef8338/lib/sidekiq/api.rb#L231
3
- --
4
- local queue_name = KEYS[1]
5
- local initial_size = redis.call('llen', queue_name)
6
- local deleted_size = 0
7
- local page = 0
8
- local page_size = 2000
9
- local temp_job_stats = {}
10
- local final_job_stats = {}
11
-
12
- while true do
13
- local range_start = page * page_size - deleted_size
14
- local range_end = range_start + page_size - 1
15
- local entries = redis.call('lrange', queue_name, range_start, range_end)
16
-
17
- if #entries == 0 then
18
- break
19
- end
20
-
21
- page = page + 1
22
-
23
- for index, entry in next, entries do
24
- local class = cjson.decode(entry)['class']
25
- if class ~= nil then
26
- if temp_job_stats[class] ~= nil then
27
- temp_job_stats[class] = temp_job_stats[class] + 1
28
- else
29
- temp_job_stats[class] = 1
30
- end
31
- end
32
- end
33
-
34
- deleted_size = initial_size - redis.call('llen', queue_name)
35
- end
36
-
37
- for class, count in next, temp_job_stats do
38
- local stat_entry = {class, count}
39
- table.insert(final_job_stats, stat_entry)
40
- end
41
-
42
- return final_job_stats