gitlab-monitor 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.gitlab-ci.yml +18 -0
  4. data/.rubocop.yml +34 -0
  5. data/CONTRIBUTING.md +651 -0
  6. data/Gemfile +8 -0
  7. data/Gemfile.lock +75 -0
  8. data/LICENSE +25 -0
  9. data/README.md +110 -0
  10. data/bin/gitlab-mon +17 -0
  11. data/config/gitlab-monitor.yml.example +112 -0
  12. data/gitlab-monitor.gemspec +33 -0
  13. data/lib/gitlab_monitor.rb +18 -0
  14. data/lib/gitlab_monitor/cli.rb +341 -0
  15. data/lib/gitlab_monitor/database.rb +13 -0
  16. data/lib/gitlab_monitor/database/base.rb +44 -0
  17. data/lib/gitlab_monitor/database/bloat.rb +74 -0
  18. data/lib/gitlab_monitor/database/bloat_btree.sql +84 -0
  19. data/lib/gitlab_monitor/database/bloat_table.sql +63 -0
  20. data/lib/gitlab_monitor/database/ci_builds.rb +527 -0
  21. data/lib/gitlab_monitor/database/remote_mirrors.rb +74 -0
  22. data/lib/gitlab_monitor/database/row_count.rb +164 -0
  23. data/lib/gitlab_monitor/database/tuple_stats.rb +53 -0
  24. data/lib/gitlab_monitor/git.rb +144 -0
  25. data/lib/gitlab_monitor/memstats.rb +98 -0
  26. data/lib/gitlab_monitor/memstats/mapping.rb +91 -0
  27. data/lib/gitlab_monitor/prober.rb +40 -0
  28. data/lib/gitlab_monitor/process.rb +122 -0
  29. data/lib/gitlab_monitor/prometheus.rb +64 -0
  30. data/lib/gitlab_monitor/sidekiq.rb +149 -0
  31. data/lib/gitlab_monitor/sidekiq_queue_job_stats.lua +42 -0
  32. data/lib/gitlab_monitor/util.rb +83 -0
  33. data/lib/gitlab_monitor/version.rb +5 -0
  34. data/lib/gitlab_monitor/web_exporter.rb +77 -0
  35. data/spec/cli_spec.rb +31 -0
  36. data/spec/database/bloat_spec.rb +99 -0
  37. data/spec/database/ci_builds_spec.rb +421 -0
  38. data/spec/database/row_count_spec.rb +37 -0
  39. data/spec/fixtures/smaps/sample.txt +10108 -0
  40. data/spec/git_process_proper_spec.rb +27 -0
  41. data/spec/git_spec.rb +52 -0
  42. data/spec/memstats_spec.rb +28 -0
  43. data/spec/prometheus_metrics_spec.rb +17 -0
  44. data/spec/spec_helper.rb +63 -0
  45. data/spec/util_spec.rb +15 -0
  46. metadata +225 -0
@@ -0,0 +1,74 @@
1
+ module GitLab
2
+ module Monitor
3
+ module Database
4
+ # A helper class to collect remote mirrors metrics.
5
+ class RemoteMirrorsCollector < Base
6
+ QUERY = <<~SQL.freeze
7
+ SELECT project_id, url,
8
+ EXTRACT(EPOCH FROM last_successful_update_at) AS last_successful_update_at,
9
+ EXTRACT(EPOCH FROM last_update_at) AS last_update_at
10
+ FROM remote_mirrors WHERE project_id IN (%s) AND enabled = 't'
11
+ SQL
12
+
13
+ def initialize(args)
14
+ super(args)
15
+
16
+ @project_ids = args[:project_ids]
17
+ end
18
+
19
+ def run
20
+ return if @project_ids.nil? || @project_ids.empty?
21
+
22
+ execute(QUERY % [@project_ids.join(",")]) # rubocop:disable Style/FormatString
23
+ end
24
+
25
+ private
26
+
27
+ def execute(query)
28
+ with_connection_pool do |conn|
29
+ conn.exec(query)
30
+ end
31
+ rescue PG::UndefinedTable, PG::UndefinedColumn
32
+ nil
33
+ end
34
+ end
35
+
36
+ # The prober which is called when gathering metrics
37
+ class RemoteMirrorsProber
38
+ def initialize(opts, metrics: PrometheusMetrics.new)
39
+ @metrics = metrics
40
+ @collector = RemoteMirrorsCollector.new(
41
+ connection_string: opts[:connection_string],
42
+ project_ids: opts[:project_ids]
43
+ )
44
+ end
45
+
46
+ def probe_db # rubocop:disable Metrics/MethodLength
47
+ results = @collector.run
48
+ results.to_a.each do |row|
49
+ @metrics.add(
50
+ "project_remote_mirror_last_successful_update_time_seconds",
51
+ row["last_successful_update_at"].to_i,
52
+ project_id: row["project_id"],
53
+ url: row["url"]
54
+ )
55
+ @metrics.add(
56
+ "project_remote_mirror_last_update_time_seconds",
57
+ row["last_update_at"].to_i,
58
+ project_id: row["project_id"],
59
+ url: row["url"]
60
+ )
61
+ end
62
+
63
+ self
64
+ rescue PG::ConnectionBad
65
+ self
66
+ end
67
+
68
+ def write_to(target)
69
+ target.write(@metrics.to_s)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,164 @@
1
+ require "set"
2
+
3
+ module GitLab
4
+ module Monitor
5
+ module Database
6
+ # A helper class that executes the query its given and returns an int of
7
+ # the row count
8
+ # This class works under the assumption you do COUNT(*) queries, define
9
+ # queries in the QUERIES constant. If in doubt how these work, read
10
+ # #construct_query
11
+ class RowCountCollector < Base
12
+ WHERE_MIRROR_ENABLED = <<~SQL.freeze
13
+ projects.mirror = true
14
+ AND project_mirror_data.retry_count <= 14
15
+ AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
16
+ SQL
17
+
18
+ MIRROR_QUERY = {
19
+ select: :projects,
20
+ joins: <<~SQL,
21
+ INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
22
+ INNER JOIN namespaces ON projects.namespace_id = namespaces.id
23
+ LEFT JOIN plans ON namespaces.plan_id = plans.id
24
+ SQL
25
+ check: "SELECT 1 FROM information_schema.tables WHERE table_name='plans'"
26
+ }.freeze
27
+
28
+ QUERIES = {
29
+ mirrors_ready_to_sync: MIRROR_QUERY.merge( # EE only
30
+ where: <<~SQL
31
+ #{WHERE_MIRROR_ENABLED}
32
+ AND project_mirror_data.status NOT IN ('scheduled', 'started')
33
+ AND project_mirror_data.next_execution_timestamp <= NOW()
34
+ SQL
35
+ ),
36
+ mirrors_not_updated_recently: MIRROR_QUERY.merge( # EE only
37
+ where: <<~SQL
38
+ #{WHERE_MIRROR_ENABLED}
39
+ AND project_mirror_data.status NOT IN ('scheduled', 'started')
40
+ AND (project_mirror_data.next_execution_timestamp - project_mirror_data.last_update_at) <= '30 minutes'::interval
41
+ AND project_mirror_data.last_update_at < NOW() - '30 minutes'::interval
42
+ SQL
43
+ ),
44
+ mirrors_updated_very_recently: MIRROR_QUERY.merge( # EE only
45
+ where: <<~SQL
46
+ #{WHERE_MIRROR_ENABLED}
47
+ AND project_mirror_data.status NOT IN ('scheduled', 'started')
48
+ AND project_mirror_data.last_update_at >= NOW() - '30 seconds'::interval
49
+ SQL
50
+ ),
51
+ mirrors_behind_schedule: MIRROR_QUERY.merge( # EE only
52
+ where: <<~SQL
53
+ #{WHERE_MIRROR_ENABLED}
54
+ AND project_mirror_data.status NOT IN ('scheduled', 'started')
55
+ AND project_mirror_data.next_execution_timestamp <= NOW() - '10 seconds'::interval
56
+ SQL
57
+ ),
58
+ mirrors_scheduled_or_started: MIRROR_QUERY.merge( # EE only
59
+ where: <<~SQL
60
+ #{WHERE_MIRROR_ENABLED}
61
+ AND project_mirror_data.status IN ('scheduled', 'started')
62
+ SQL
63
+ ),
64
+ mirrors_scheduled: MIRROR_QUERY.merge( # EE only
65
+ where: <<~SQL
66
+ #{WHERE_MIRROR_ENABLED}
67
+ AND project_mirror_data.status = 'scheduled'
68
+ SQL
69
+ ),
70
+ mirrors_started: MIRROR_QUERY.merge( # EE only
71
+ where: <<~SQL
72
+ #{WHERE_MIRROR_ENABLED}
73
+ AND project_mirror_data.status = 'started'
74
+ SQL
75
+ ),
76
+ soft_deleted_projects: { select: :projects, where: "pending_delete=true" },
77
+ orphaned_projects: {
78
+ select: :projects,
79
+ joins: "LEFT JOIN namespaces ON projects.namespace_id = namespaces.id",
80
+ where: "namespaces.id IS NULL"
81
+ },
82
+ uploads: { select: :uploads }
83
+ }.freeze
84
+
85
+ def initialize(args)
86
+ super(args)
87
+
88
+ @selected_queries = Set.new(args[:selected_queries].map(&:to_sym)) unless args[:selected_queries].nil?
89
+ end
90
+
91
+ def run
92
+ results = Hash.new(0)
93
+
94
+ QUERIES.each do |key, query_hash|
95
+ next if query_hash[:check] && !successful_check?(query_hash[:check])
96
+ next if !@selected_queries.nil? && !@selected_queries.include?(key)
97
+
98
+ results[key] = count_from_query_hash(query_hash)
99
+ end
100
+
101
+ results
102
+ end
103
+
104
+ private
105
+
106
+ def count_from_query_hash(query_hash)
107
+ result = execute(construct_query(query_hash))
108
+ return 0 unless result
109
+
110
+ result[0]["count"]
111
+ end
112
+
113
+ def successful_check?(query)
114
+ result = execute("SELECT EXISTS (#{query})")
115
+ return unless result
116
+
117
+ result[0]["exists"] == "t"
118
+ end
119
+
120
+ def execute(query)
121
+ with_connection_pool do |conn|
122
+ conn.exec(query)
123
+ end
124
+ rescue PG::UndefinedTable, PG::UndefinedColumn
125
+ nil
126
+ end
127
+
128
+ # Not private so I can test it without meta programming tricks
129
+ def construct_query(query)
130
+ query_string = "SELECT COUNT(*) FROM #{query[:select]} "
131
+ query_string << "#{query[:joins]} " if query[:joins]
132
+ query_string << "WHERE #{query[:where]}" if query[:where]
133
+ query_string << ";"
134
+ end
135
+ end
136
+
137
+ # The prober which is called when gathering metrics
138
+ class RowCountProber
139
+ def initialize(opts, metrics: PrometheusMetrics.new)
140
+ @metrics = metrics
141
+ @collector = RowCountCollector.new(
142
+ connection_string: opts[:connection_string],
143
+ selected_queries: opts[:selected_queries]
144
+ )
145
+ end
146
+
147
+ def probe_db
148
+ results = @collector.run
149
+ results.each do |key, value|
150
+ @metrics.add("gitlab_database_rows", value.to_i, query_name: key.to_s)
151
+ end
152
+
153
+ self
154
+ rescue PG::ConnectionBad
155
+ self
156
+ end
157
+
158
+ def write_to(target)
159
+ target.write(@metrics.to_s)
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,53 @@
1
+ module GitLab
2
+ module Monitor
3
+ module Database
4
+ # A helper class to collect tuple stats from the database
5
+ #
6
+ # It takes a connection string (e.g. "dbname=test port=5432")
7
+ class TupleStatsCollector < Base
8
+ COLUMNS = %w(relname seq_tup_read idx_tup_fetch n_tup_ins n_tup_upd n_tup_del n_tup_hot_upd n_dead_tup seq_scan)
9
+ .join(",")
10
+ QUERY = <<-SQL.freeze
11
+ SELECT #{COLUMNS}
12
+ FROM pg_stat_user_tables
13
+ WHERE relname IN (SELECT tablename FROM pg_tables WHERE tableowner = 'gitlab')
14
+ GROUP BY #{COLUMNS}
15
+ SQL
16
+
17
+ def run
18
+ with_connection_pool do |conn|
19
+ conn.exec(QUERY).each.with_object({}) do |row, stats|
20
+ stats[row.delete("relname")] = row
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ # Probes the DB specified by opts[:connection_string] for tuple stats, then converts them to metrics
27
+ class TuplesProber
28
+ def initialize(opts, metrics: PrometheusMetrics.new)
29
+ @metrics = metrics
30
+ @collector = TupleStatsCollector.new(connection_string: opts[:connection_string])
31
+ end
32
+
33
+ def probe_db
34
+ result = @collector.run
35
+
36
+ result.each do |table_name, tuple_stats|
37
+ tuple_stats.each do |column_name, value|
38
+ @metrics.add("gitlab_database_stat_table_#{column_name}", value.to_s, table_name: table_name)
39
+ end
40
+ end
41
+
42
+ self
43
+ rescue PG::ConnectionBad
44
+ self
45
+ end
46
+
47
+ def write_to(target)
48
+ target.write(@metrics.to_s)
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,144 @@
1
+ require "open3"
2
+
3
+ module GitLab
4
+ module Monitor
5
+ # Git monitoring helping class
6
+ #
7
+ # Takes a repository path for construction and provides 2 main methods:
8
+ # - pull
9
+ # - push
10
+ #
11
+ # Both methods return a CommandResult which includes the output of the execution
12
+ # plus the tracked execution time.
13
+ class Git
14
+ def initialize(repo)
15
+ fail "Repository #{repo} does not exists" unless Dir.exist? repo
16
+ @repo = repo
17
+ @tracker = TimeTracker.new
18
+ end
19
+
20
+ def pull
21
+ @tracker.track { execute "git pull -q" }
22
+ end
23
+
24
+ def push
25
+ empty_commit
26
+ @tracker.track { execute "git push -q" }
27
+ end
28
+
29
+ def empty_commit(message = "Beep")
30
+ @tracker.track { execute("git commit --allow-empty -m '#{message}'") }
31
+ end
32
+
33
+ private
34
+
35
+ def execute(command)
36
+ result = CommandResult.new(*Open3.capture2e(command, chdir: @repo))
37
+ fail "Command #{command} failed with status #{result.status}\n#{result.stdout}" if result.failed?
38
+ result
39
+ end
40
+ end
41
+
42
+ # Result of a command
43
+ #
44
+ # Provides some handy methods for checking if the execution failed and a simple to_s that will
45
+ # return the command output
46
+ CommandResult = Struct.new(:stdout, :status) do
47
+ def failed?
48
+ status.nonzero?
49
+ end
50
+
51
+ def status
52
+ self[:status].exitstatus
53
+ end
54
+
55
+ def to_s
56
+ stdout
57
+ end
58
+ end
59
+
60
+ # Handles creating a Git object, probing for both pull and push, and finally writing to metrics
61
+ #
62
+ # Optionally takes a metrics object which by default is a PrometheusMetrics, useful to change the
63
+ # metrics writer to something else.
64
+ class GitProber
65
+ def initialize(opts, metrics: PrometheusMetrics.new)
66
+ @metrics = metrics
67
+ @labels = opts[:labels] || {}
68
+ @git = Git.new(opts[:source])
69
+ end
70
+
71
+ def probe_pull
72
+ @metrics.add "git_pull_time_milliseconds", (@git.pull.time * 1000).to_i, **@labels
73
+ self
74
+ end
75
+
76
+ def probe_push
77
+ @metrics.add "git_push_time_milliseconds", (@git.push.time * 1000).to_i, **@labels
78
+ self
79
+ end
80
+
81
+ def write_to(target)
82
+ target.write(@metrics.to_s)
83
+ end
84
+ end
85
+
86
+ # A special prober for git processes
87
+ class GitProcessProber
88
+ def initialize(opts, metrics: PrometheusMetrics.new)
89
+ @opts = opts
90
+ @metrics = metrics
91
+ end
92
+
93
+ def probe_git # rubocop:disable Metrics/MethodLength
94
+ counts = Hash.new(0)
95
+
96
+ Utils.pgrep("^git ").each do |pid|
97
+ process_cmd = begin
98
+ File.read("/proc/#{pid}/cmdline")
99
+ rescue
100
+ "" # Process file is gone (race condition)
101
+ end
102
+ subcommand = self.class.extract_subcommand(process_cmd)
103
+ next unless subcommand # Unlikely, but just to be safe
104
+
105
+ name = "git #{subcommand}"
106
+ counts[name] += 1
107
+
108
+ prober = ProcessProber.new(
109
+ {
110
+ name: name,
111
+ pid_or_pattern: pid,
112
+ quantiles: @opts[:quantiles]
113
+ },
114
+ metrics: @metrics
115
+ )
116
+
117
+ prober
118
+ .probe_stat
119
+ end
120
+
121
+ counts.each do |name, count|
122
+ @metrics.add("process_count", count, name: name)
123
+ end
124
+
125
+ self
126
+ end
127
+
128
+ def write_to(target)
129
+ target.write(@metrics.to_s)
130
+ end
131
+
132
+ def self.extract_subcommand(cmd)
133
+ return if cmd.empty?
134
+ cmd_splitted = cmd.split("\u0000") # cmdline does not return it space-separated
135
+
136
+ cmd_splitted.shift # Because it's "git"
137
+ cmd_splitted.shift while cmd_splitted.first &&
138
+ (cmd_splitted.first.empty? || cmd_splitted.first !~ /^[^-][a-z\-]*$/)
139
+
140
+ cmd_splitted[0]
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "memstats/mapping"
4
+
5
+ # Ported from https://github.com/discourse/discourse/blob/master/script/memstats.rb
6
+ #
7
+ # Aggregate Print useful information from /proc/[pid]/smaps
8
+ #
9
+ # pss - Roughly the amount of memory that is "really" being used by the pid
10
+ # swap - Amount of swap this process is currently using
11
+ #
12
+ # Reference:
13
+ # http://www.mjmwired.net/kernel/Documentation/filesystems/proc.txt#361
14
+ #
15
+ # Example:
16
+ # # ./memstats.rb 4386
17
+ # Process: 4386
18
+ # Command Line: /usr/bin/mongod -f /etc/mongo/mongod.conf
19
+ # Memory Summary:
20
+ # private_clean 107,132 kB
21
+ # private_dirty 2,020,676 kB
22
+ # pss 2,127,860 kB
23
+ # rss 2,128,536 kB
24
+ # shared_clean 728 kB
25
+ # shared_dirty 0 kB
26
+ # size 149,281,668 kB
27
+ # swap 1,719,792 kB
28
+ module GitLab
29
+ module Monitor
30
+ module MemStats
31
+ # Aggregates all metrics for a single PID in /proc/<pid>/smaps
32
+ class Aggregator
33
+ attr_accessor :pid, :totals
34
+
35
+ def initialize(pid)
36
+ @pid = pid
37
+ @totals = Hash.new(0)
38
+ @mappings = []
39
+ @valid = true
40
+
41
+ populate_info
42
+ end
43
+
44
+ def valid?
45
+ @valid
46
+ end
47
+
48
+ private
49
+
50
+ attr_accessor :mappings
51
+
52
+ def consume_mapping(map_lines, totals)
53
+ m = Mapping.new(map_lines)
54
+
55
+ Mapping::FIELDS.each do |field|
56
+ totals[field] += m.send(field)
57
+ end
58
+
59
+ m
60
+ end
61
+
62
+ def create_memstats_not_available(totals)
63
+ Mapping::FIELDS.each do |field|
64
+ totals[field] += Float::NAN
65
+ end
66
+ end
67
+
68
+ def populate_info # rubocop:disable Metrics/MethodLength
69
+ File.open("/proc/#{@pid}/smaps") do |smaps|
70
+ map_lines = []
71
+
72
+ loop do
73
+ break if smaps.eof?
74
+
75
+ line = smaps.readline.strip
76
+
77
+ case line
78
+ when /\w+:\s+/
79
+ map_lines << line
80
+ when /[0-9a-f]+:[0-9a-f]+\s+/
81
+ mappings << consume_mapping(map_lines, totals) if map_lines.size.positive?
82
+
83
+ map_lines.clear
84
+ map_lines << line
85
+ else
86
+ break
87
+ end
88
+ end
89
+ end
90
+ rescue => e
91
+ puts "Error: #{e}"
92
+ @valid = false
93
+ create_memstats_not_available(totals)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end