gitlab-exporter 10.4.0 → 11.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +4 -0
- data/Gemfile.lock +1 -1
- data/README.md +10 -3
- data/config/gitlab-exporter.yml.example +0 -2
- data/lib/gitlab_exporter/cli.rb +2 -2
- data/lib/gitlab_exporter/prometheus.rb +17 -0
- data/lib/gitlab_exporter/sidekiq.rb +51 -14
- data/lib/gitlab_exporter/version.rb +1 -1
- data/spec/prometheus_metrics_spec.rb +17 -0
- metadata +1 -2
- data/lib/gitlab_exporter/sidekiq_queue_job_stats.lua +0 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3eef01acf595b16deb86b2578776e3487b513865fca93ede7684ba982932706
|
4
|
+
data.tar.gz: e5194e0230ac9672b9c30fbd1bf1be3b11333d894a21505dd97e39891f72b960
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60c79e4a4e550adf557129aa278986726370e21f77ca5f67864f23fbb181f5c904d02f5015d51d8317c748b9a72b74cfd59a5f14b4a0af0ffce6b03883ceef4a
|
7
|
+
data.tar.gz: 5bd99ea95151fd95a5f72597474ec6c81e3fd9e1f0284f5d85b7595257f24cddde5b0a315a6705a6258b8b7d0ebcbe5178f6816978ebc3b2c47a435578d58ee0
|
data/.gitlab-ci.yml
CHANGED
@@ -6,6 +6,10 @@ include:
|
|
6
6
|
- template: Security/SAST.gitlab-ci.yml # https://gitlab.com/gitlab-org/gitlab-foss/blob/master/lib/gitlab/ci/templates/Security/SAST.gitlab-ci.yml
|
7
7
|
- template: Security/Secret-Detection.gitlab-ci.yml # https://gitlab.com/gitlab-org/gitlab-foss/-/blob/master/lib/gitlab/ci/templates/Security/Secret-Detection.gitlab-ci.yml
|
8
8
|
|
9
|
+
stages:
|
10
|
+
- test
|
11
|
+
- dast
|
12
|
+
|
9
13
|
default:
|
10
14
|
image: ruby:2.7
|
11
15
|
cache:
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -34,7 +34,7 @@ metrics.
|
|
34
34
|
`git_pull_time_milliseconds`, `git_push_time_milliseconds`
|
35
35
|
* git processes stats (see Process below)
|
36
36
|
1. [Sidekiq](lib/gitlab_exporter/sidekiq.rb)
|
37
|
-
* Stats
|
37
|
+
* Stats (probe_stats)
|
38
38
|
* `sidekiq_jobs_processed_total`
|
39
39
|
* `sidekiq_jobs_failed_total`
|
40
40
|
* `sidekiq_jobs_enqueued_size`
|
@@ -44,14 +44,21 @@ metrics.
|
|
44
44
|
* `sidekiq_default_queue_latency_seconds`
|
45
45
|
* `sidekiq_processes_size`
|
46
46
|
* `sidekiq_workers_size`
|
47
|
-
* Queues
|
47
|
+
* Queues (probe_queues)
|
48
48
|
* `sidekiq_queue_size`
|
49
49
|
* `sidekiq_queue_paused`
|
50
50
|
* `sidekiq_queue_latency_seconds`
|
51
|
-
* Jobs
|
51
|
+
* Jobs (probe_jobs_limit)
|
52
52
|
* `sidekiq_enqueued_jobs`
|
53
|
+
* Workers (probe_workers)
|
53
54
|
* `sidekiq_running_jobs`
|
55
|
+
* Retries (probe_retries)
|
54
56
|
* `sidekiq_to_be_retried_jobs`
|
57
|
+
* Future Sets (probe_future_sets)
|
58
|
+
* `sidekiq_schedule_set_processing_delay_seconds`
|
59
|
+
* `sidekiq_schedule_set_backlog_count`
|
60
|
+
* `sidekiq_retry_set_processing_delay_seconds`
|
61
|
+
* `sidekiq_retry_set_backlog_count`
|
55
62
|
|
56
63
|
### Setup with GitLab Development Kit
|
57
64
|
|
data/lib/gitlab_exporter/cli.rb
CHANGED
@@ -217,7 +217,7 @@ module GitLab
|
|
217
217
|
opts.on("--pid=123", "Process ID") do |val|
|
218
218
|
@pid = val
|
219
219
|
end
|
220
|
-
opts.on("--pattern=
|
220
|
+
opts.on("--pattern=worker", "Process command pattern") do |val|
|
221
221
|
@pattern = val
|
222
222
|
end
|
223
223
|
opts.on("--name=NAME", "Process name to be used in metrics") do |val|
|
@@ -275,7 +275,7 @@ module GitLab
|
|
275
275
|
::GitLab::Exporter::SidekiqProber.new(redis_url: @redis_url)
|
276
276
|
.probe_stats
|
277
277
|
.probe_queues
|
278
|
-
.
|
278
|
+
.probe_jobs_limit
|
279
279
|
.probe_workers
|
280
280
|
.probe_retries
|
281
281
|
.write_to(@target)
|
@@ -15,6 +15,21 @@ module GitLab
|
|
15
15
|
@include_timestamp = include_timestamp
|
16
16
|
end
|
17
17
|
|
18
|
+
class << self
|
19
|
+
def describe(name, description)
|
20
|
+
@metric_descriptions ||= {}
|
21
|
+
@metric_descriptions[name] = description
|
22
|
+
end
|
23
|
+
|
24
|
+
def description(name)
|
25
|
+
@metric_descriptions && @metric_descriptions[name]
|
26
|
+
end
|
27
|
+
|
28
|
+
def clear_descriptions
|
29
|
+
@metric_descriptions = {}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
18
33
|
def add(name, value, quantile = false, **labels)
|
19
34
|
fail "value '#{value}' must be a number" unless value.is_a?(Numeric)
|
20
35
|
|
@@ -32,6 +47,8 @@ module GitLab
|
|
32
47
|
|
33
48
|
buffer = ""
|
34
49
|
@metrics.each do |name, measurements|
|
50
|
+
buffer << "# HELP #{name} #{self.class.description(name)}\n" if self.class.description(name)
|
51
|
+
|
35
52
|
measurements.each do |measurement|
|
36
53
|
buffer << name.to_s
|
37
54
|
labels = (measurement[:labels] || {}).map { |label, value| "#{label}=\"#{value}\"" }.join(",")
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "sidekiq/api"
|
2
|
+
require "sidekiq/scheduled"
|
2
3
|
require "digest"
|
3
4
|
|
4
5
|
module GitLab
|
@@ -7,8 +8,12 @@ module GitLab
|
|
7
8
|
#
|
8
9
|
# It takes the Redis URL Sidekiq is connected to
|
9
10
|
class SidekiqProber
|
10
|
-
|
11
|
-
|
11
|
+
# The maximum depth (from the head) of each queue to probe. Probing the
|
12
|
+
# entirety of a very large queue will take longer and run the risk of
|
13
|
+
# timing out. But when we have a very large queue, we are most in need of
|
14
|
+
# reliable metrics. This trades off completeness for predictability by
|
15
|
+
# only taking a limited amount of items from the head of the queue.
|
16
|
+
PROBE_JOBS_LIMIT = 1_000
|
12
17
|
|
13
18
|
POOL_SIZE = 3
|
14
19
|
|
@@ -17,6 +22,9 @@ module GitLab
|
|
17
22
|
# needed to be re-initialized
|
18
23
|
POOL_TIMEOUT = 90
|
19
24
|
|
25
|
+
PrometheusMetrics.describe("sidekiq_enqueued_jobs",
|
26
|
+
"Total number of jobs enqueued by class name. Only inspects the first #{PROBE_JOBS_LIMIT} jobs per queue.") # rubocop:disable Layout/LineLength
|
27
|
+
|
20
28
|
def self.connection_pool
|
21
29
|
@@connection_pool ||= Hash.new do |h, connection_hash| # rubocop:disable Style/ClassVars
|
22
30
|
config = connection_hash.merge(pool_timeout: POOL_TIMEOUT, size: POOL_SIZE)
|
@@ -63,17 +71,51 @@ module GitLab
|
|
63
71
|
end
|
64
72
|
|
65
73
|
def probe_jobs
|
74
|
+
puts "[REMOVED] probe_jobs is now considered obsolete and does not emit any metrics,"\
|
75
|
+
" please use probe_jobs_limit instead"
|
76
|
+
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
def probe_future_sets
|
81
|
+
now = Time.now.to_f
|
82
|
+
with_sidekiq do
|
83
|
+
Sidekiq.redis do |conn|
|
84
|
+
Sidekiq::Scheduled::SETS.each do |set|
|
85
|
+
# Default to 0; if all jobs are due in the future, there is no "negative" delay.
|
86
|
+
delay = 0
|
87
|
+
|
88
|
+
_job, timestamp = conn.zrangebyscore(set, "-inf", now.to_s, limit: [0, 1], withscores: true).first
|
89
|
+
delay = now - timestamp if timestamp
|
90
|
+
|
91
|
+
@metrics.add("sidekiq_#{set}_set_processing_delay_seconds", delay)
|
92
|
+
|
93
|
+
# zcount is O(log(N)) (prob. binary search), so is still quick even with large sets
|
94
|
+
@metrics.add("sidekiq_#{set}_set_backlog_count",
|
95
|
+
conn.zcount(set, "-inf", now.to_s))
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Count worker classes present in Sidekiq queues. This only looks at the
|
102
|
+
# first PROBE_JOBS_LIMIT jobs in each queue. This means that we run a
|
103
|
+
# single LRANGE command for each queue, which does not block other
|
104
|
+
# commands. For queues over PROBE_JOBS_LIMIT in size, this means that we
|
105
|
+
# will not have completely accurate statistics, but the probe performance
|
106
|
+
# will also not degrade as the queue gets larger.
|
107
|
+
def probe_jobs_limit
|
66
108
|
with_sidekiq do
|
67
|
-
job_stats =
|
109
|
+
job_stats = Hash.new(0)
|
68
110
|
|
69
111
|
Sidekiq::Queue.all.each do |queue|
|
70
112
|
Sidekiq.redis do |conn|
|
71
|
-
|
72
|
-
|
113
|
+
conn.lrange("queue:#{queue.name}", 0, PROBE_JOBS_LIMIT).each do |job|
|
114
|
+
job_class = Sidekiq.load_json(job)["class"]
|
115
|
+
|
116
|
+
job_stats[job_class] += 1
|
117
|
+
end
|
73
118
|
end
|
74
|
-
rescue Redis::CommandError # Could happen if the script exceeded the maximum run time (5 seconds by default)
|
75
|
-
# FIXME: Should we call SCRIPT KILL?
|
76
|
-
return self
|
77
119
|
end
|
78
120
|
|
79
121
|
job_stats.each do |class_name, count|
|
@@ -169,14 +211,9 @@ module GitLab
|
|
169
211
|
def connected?
|
170
212
|
return @connected unless @connected.nil?
|
171
213
|
|
172
|
-
# This is also a good "connected check"
|
173
214
|
Sidekiq.redis do |conn|
|
174
|
-
|
175
|
-
# will be removed in redis-namespace 2.0.
|
176
|
-
conn.redis.script(:load, QUEUE_JOB_STATS_SCRIPT) unless conn.redis.script(:exists, QUEUE_JOB_STATS_SHA)
|
215
|
+
@connected = (conn.ping == "PONG")
|
177
216
|
end
|
178
|
-
|
179
|
-
@connected = true
|
180
217
|
rescue Redis::BaseConnectionError => e
|
181
218
|
@logger&.error "Error connecting to the Redis: #{e}"
|
182
219
|
@connected = false
|
@@ -23,4 +23,21 @@ describe GitLab::Exporter::PrometheusMetrics do
|
|
23
23
|
subject.add("mymetric", "invalid", mylabel: "x", myotherlabel: "y").to_s
|
24
24
|
}.to raise_error(RuntimeError)
|
25
25
|
end
|
26
|
+
|
27
|
+
it "supports described metrics" do
|
28
|
+
time = Time.now
|
29
|
+
|
30
|
+
allow(Time).to receive(:now).and_return(time)
|
31
|
+
|
32
|
+
described_class.describe("mymetric", "description")
|
33
|
+
described_class.describe("missingmetric", "otherdescription")
|
34
|
+
subject.add("mymetric", 1.3, mylabel: "x", myotherlabel: "y")
|
35
|
+
|
36
|
+
expect(subject.to_s).to eq(<<~METRICS)
|
37
|
+
# HELP mymetric description
|
38
|
+
mymetric{mylabel="x",myotherlabel="y"} 1.3 #{(time.to_f * 1000).to_i}
|
39
|
+
METRICS
|
40
|
+
|
41
|
+
described_class.clear_descriptions
|
42
|
+
end
|
26
43
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitlab-exporter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 11.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pablo Carranza
|
@@ -189,7 +189,6 @@ files:
|
|
189
189
|
- lib/gitlab_exporter/prometheus.rb
|
190
190
|
- lib/gitlab_exporter/ruby.rb
|
191
191
|
- lib/gitlab_exporter/sidekiq.rb
|
192
|
-
- lib/gitlab_exporter/sidekiq_queue_job_stats.lua
|
193
192
|
- lib/gitlab_exporter/util.rb
|
194
193
|
- lib/gitlab_exporter/version.rb
|
195
194
|
- lib/gitlab_exporter/web_exporter.rb
|
@@ -1,42 +0,0 @@
|
|
1
|
-
--
|
2
|
-
-- Adapted from https://github.com/mperham/sidekiq/blob/2f9258e4fe77991c526f7a65c92bcf792eef8338/lib/sidekiq/api.rb#L231
|
3
|
-
--
|
4
|
-
local queue_name = KEYS[1]
|
5
|
-
local initial_size = redis.call('llen', queue_name)
|
6
|
-
local deleted_size = 0
|
7
|
-
local page = 0
|
8
|
-
local page_size = 2000
|
9
|
-
local temp_job_stats = {}
|
10
|
-
local final_job_stats = {}
|
11
|
-
|
12
|
-
while true do
|
13
|
-
local range_start = page * page_size - deleted_size
|
14
|
-
local range_end = range_start + page_size - 1
|
15
|
-
local entries = redis.call('lrange', queue_name, range_start, range_end)
|
16
|
-
|
17
|
-
if #entries == 0 then
|
18
|
-
break
|
19
|
-
end
|
20
|
-
|
21
|
-
page = page + 1
|
22
|
-
|
23
|
-
for index, entry in next, entries do
|
24
|
-
local class = cjson.decode(entry)['class']
|
25
|
-
if class ~= nil then
|
26
|
-
if temp_job_stats[class] ~= nil then
|
27
|
-
temp_job_stats[class] = temp_job_stats[class] + 1
|
28
|
-
else
|
29
|
-
temp_job_stats[class] = 1
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
deleted_size = initial_size - redis.call('llen', queue_name)
|
35
|
-
end
|
36
|
-
|
37
|
-
for class, count in next, temp_job_stats do
|
38
|
-
local stat_entry = {class, count}
|
39
|
-
table.insert(final_job_stats, stat_entry)
|
40
|
-
end
|
41
|
-
|
42
|
-
return final_job_stats
|