gitlab-monitor 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.gitlab-ci.yml +18 -0
- data/.rubocop.yml +34 -0
- data/CONTRIBUTING.md +651 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +75 -0
- data/LICENSE +25 -0
- data/README.md +110 -0
- data/bin/gitlab-mon +17 -0
- data/config/gitlab-monitor.yml.example +112 -0
- data/gitlab-monitor.gemspec +33 -0
- data/lib/gitlab_monitor.rb +18 -0
- data/lib/gitlab_monitor/cli.rb +341 -0
- data/lib/gitlab_monitor/database.rb +13 -0
- data/lib/gitlab_monitor/database/base.rb +44 -0
- data/lib/gitlab_monitor/database/bloat.rb +74 -0
- data/lib/gitlab_monitor/database/bloat_btree.sql +84 -0
- data/lib/gitlab_monitor/database/bloat_table.sql +63 -0
- data/lib/gitlab_monitor/database/ci_builds.rb +527 -0
- data/lib/gitlab_monitor/database/remote_mirrors.rb +74 -0
- data/lib/gitlab_monitor/database/row_count.rb +164 -0
- data/lib/gitlab_monitor/database/tuple_stats.rb +53 -0
- data/lib/gitlab_monitor/git.rb +144 -0
- data/lib/gitlab_monitor/memstats.rb +98 -0
- data/lib/gitlab_monitor/memstats/mapping.rb +91 -0
- data/lib/gitlab_monitor/prober.rb +40 -0
- data/lib/gitlab_monitor/process.rb +122 -0
- data/lib/gitlab_monitor/prometheus.rb +64 -0
- data/lib/gitlab_monitor/sidekiq.rb +149 -0
- data/lib/gitlab_monitor/sidekiq_queue_job_stats.lua +42 -0
- data/lib/gitlab_monitor/util.rb +83 -0
- data/lib/gitlab_monitor/version.rb +5 -0
- data/lib/gitlab_monitor/web_exporter.rb +77 -0
- data/spec/cli_spec.rb +31 -0
- data/spec/database/bloat_spec.rb +99 -0
- data/spec/database/ci_builds_spec.rb +421 -0
- data/spec/database/row_count_spec.rb +37 -0
- data/spec/fixtures/smaps/sample.txt +10108 -0
- data/spec/git_process_proper_spec.rb +27 -0
- data/spec/git_spec.rb +52 -0
- data/spec/memstats_spec.rb +28 -0
- data/spec/prometheus_metrics_spec.rb +17 -0
- data/spec/spec_helper.rb +63 -0
- data/spec/util_spec.rb +15 -0
- metadata +225 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
module GitLab
|
2
|
+
module Monitor
|
3
|
+
# Database-related classes
|
4
|
+
module Database
|
5
|
+
autoload :Base, "gitlab_monitor/database/base"
|
6
|
+
autoload :CiBuildsProber, "gitlab_monitor/database/ci_builds"
|
7
|
+
autoload :TuplesProber, "gitlab_monitor/database/tuple_stats"
|
8
|
+
autoload :RowCountProber, "gitlab_monitor/database/row_count"
|
9
|
+
autoload :BloatProber, "gitlab_monitor/database/bloat"
|
10
|
+
autoload :RemoteMirrorsProber, "gitlab_monitor/database/remote_mirrors"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require "pg"
|
2
|
+
require "connection_pool"
|
3
|
+
|
4
|
+
module GitLab
|
5
|
+
module Monitor
|
6
|
+
module Database
|
7
|
+
# An abstract class for interacting with DB
|
8
|
+
#
|
9
|
+
# It takes a connection string (e.g. "dbname=test port=5432")
|
10
|
+
class Base
|
11
|
+
def self.connection_pool
|
12
|
+
@connection_pool ||= Hash.new do |h, connection_string|
|
13
|
+
h[connection_string] = ConnectionPool.new(size: 3, timeout: 5) do
|
14
|
+
PG.connect(connection_string)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(args)
|
20
|
+
@connection_string = args[:connection_string]
|
21
|
+
end
|
22
|
+
|
23
|
+
def run
|
24
|
+
fail NotImplemented
|
25
|
+
end
|
26
|
+
|
27
|
+
def connection_pool
|
28
|
+
self.class.connection_pool[@connection_string]
|
29
|
+
end
|
30
|
+
|
31
|
+
def with_connection_pool
|
32
|
+
connection_pool.with do |conn|
|
33
|
+
begin
|
34
|
+
yield conn
|
35
|
+
rescue PG::UnableToSend => e
|
36
|
+
conn.reset
|
37
|
+
raise e
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module GitLab
|
2
|
+
module Monitor
|
3
|
+
module Database
|
4
|
+
# Helper to collect bloat metrics.
|
5
|
+
class BloatCollector < Base
|
6
|
+
def run(type = :btree)
|
7
|
+
execute(self.class.query_for(type)).each_with_object({}) do |row, h|
|
8
|
+
h[row["object_name"]] = row
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def execute(query)
|
15
|
+
with_connection_pool do |conn|
|
16
|
+
conn.exec(query)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class << self
|
21
|
+
def query_for(type)
|
22
|
+
@queries ||= {}
|
23
|
+
|
24
|
+
return @queries[type] if @queries[type]
|
25
|
+
|
26
|
+
file = File.join(__dir__, "bloat_#{type}.sql")
|
27
|
+
fail "Unknown bloat query file: #{file}" unless File.exist?(file)
|
28
|
+
|
29
|
+
@queries[type] = File.read(file)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Prober class to gather bloat metrics
|
35
|
+
class BloatProber
|
36
|
+
METRIC_KEYS = %w(bloat_ratio bloat_size extra_size real_size).freeze
|
37
|
+
|
38
|
+
attr_reader :metrics, :collector, :bloat_types
|
39
|
+
|
40
|
+
def initialize(opts,
|
41
|
+
metrics: PrometheusMetrics.new,
|
42
|
+
collector: BloatCollector.new(connection_string: opts[:connection_string]))
|
43
|
+
@metrics = metrics
|
44
|
+
@collector = collector
|
45
|
+
@bloat_types = opts[:bloat_types] || %i(btree table)
|
46
|
+
end
|
47
|
+
|
48
|
+
def probe_db
|
49
|
+
bloat_types.each do |type|
|
50
|
+
probe_for_type(type)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def write_to(target)
|
55
|
+
target.write(metrics.to_s)
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def probe_for_type(type)
|
61
|
+
collector.run(type).each do |query_name, data|
|
62
|
+
METRIC_KEYS.each do |key|
|
63
|
+
metrics.add("gitlab_database_bloat_#{type}_#{key}", data[key], query_name: query_name)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
self
|
68
|
+
rescue PG::ConnectionBad
|
69
|
+
self
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
-- Originally from: https://github.com/ioguix/pgsql-bloat-estimation/blob/master/btree/btree_bloat.sql
|
2
|
+
-- WARNING: executed with a non-superuser role, the query inspect only index on tables you are granted to read.
|
3
|
+
-- WARNING: rows with is_na = 't' are known to have bad statistics ("name" type is not supported).
|
4
|
+
-- This query is compatible with PostgreSQL 8.2 and after
|
5
|
+
SELECT current_database(), nspname AS schemaname, tblname, idxname AS object_name, bs*(relpages)::bigint AS real_size,
|
6
|
+
bs*(relpages-est_pages)::bigint AS extra_size,
|
7
|
+
100 * (relpages-est_pages)::float / relpages AS extra_ratio,
|
8
|
+
fillfactor,
|
9
|
+
CASE WHEN relpages > est_pages_ff
|
10
|
+
THEN bs*(relpages-est_pages_ff)
|
11
|
+
ELSE 0
|
12
|
+
END AS bloat_size,
|
13
|
+
100 * (relpages-est_pages_ff)::float / relpages AS bloat_ratio,
|
14
|
+
is_na
|
15
|
+
-- , 100-(sub.pst).avg_leaf_density, est_pages, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, sub.reltuples, sub.relpages -- (DEBUG INFO)
|
16
|
+
FROM (
|
17
|
+
SELECT coalesce(1 +
|
18
|
+
ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth)
|
19
|
+
) AS est_pages,
|
20
|
+
coalesce(1 +
|
21
|
+
ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0
|
22
|
+
) AS est_pages_ff,
|
23
|
+
bs, nspname, table_oid, tblname, idxname, relpages, fillfactor, is_na
|
24
|
+
-- , stattuple.pgstatindex(quote_ident(nspname)||'.'||quote_ident(idxname)) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO)
|
25
|
+
FROM (
|
26
|
+
SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, relam, table_oid, fillfactor,
|
27
|
+
( index_tuple_hdr_bm +
|
28
|
+
maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN
|
29
|
+
WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign
|
30
|
+
ELSE index_tuple_hdr_bm%maxalign
|
31
|
+
END
|
32
|
+
+ nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN
|
33
|
+
WHEN nulldatawidth = 0 THEN 0
|
34
|
+
WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
|
35
|
+
ELSE nulldatawidth::integer%maxalign
|
36
|
+
END
|
37
|
+
)::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na
|
38
|
+
-- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO)
|
39
|
+
FROM (
|
40
|
+
SELECT
|
41
|
+
i.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, i.relam, a.attrelid AS table_oid,
|
42
|
+
current_setting('block_size')::numeric AS bs, fillfactor,
|
43
|
+
CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?)
|
44
|
+
WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8
|
45
|
+
ELSE 4
|
46
|
+
END AS maxalign,
|
47
|
+
/* per page header, fixed size: 20 for 7.X, 24 for others */
|
48
|
+
24 AS pagehdr,
|
49
|
+
/* per page btree opaque data */
|
50
|
+
16 AS pageopqdata,
|
51
|
+
/* per tuple header: add IndexAttributeBitMapData if some cols are null-able */
|
52
|
+
CASE WHEN max(coalesce(s.null_frac,0)) = 0
|
53
|
+
THEN 2 -- IndexTupleData size
|
54
|
+
ELSE 2 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8)
|
55
|
+
END AS index_tuple_hdr_bm,
|
56
|
+
/* data len: we remove null values save space using it fractionnal part from stats */
|
57
|
+
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS nulldatawidth,
|
58
|
+
max( CASE WHEN a.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na
|
59
|
+
FROM pg_attribute AS a
|
60
|
+
JOIN (
|
61
|
+
SELECT nspname, tbl.relname AS tblname, idx.relname AS idxname, idx.reltuples, idx.relpages, idx.relam,
|
62
|
+
indrelid, indexrelid, indkey::smallint[] AS attnum,
|
63
|
+
coalesce(substring(
|
64
|
+
array_to_string(idx.reloptions, ' ')
|
65
|
+
from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor
|
66
|
+
FROM pg_index
|
67
|
+
JOIN pg_class idx ON idx.oid=pg_index.indexrelid
|
68
|
+
JOIN pg_class tbl ON tbl.oid=pg_index.indrelid
|
69
|
+
JOIN pg_namespace ON pg_namespace.oid = idx.relnamespace
|
70
|
+
WHERE pg_index.indisvalid AND tbl.relkind = 'r' AND idx.relpages > 0
|
71
|
+
) AS i ON a.attrelid = i.indexrelid
|
72
|
+
JOIN pg_stats AS s ON s.schemaname = i.nspname
|
73
|
+
AND ((s.tablename = i.tblname AND s.attname = pg_catalog.pg_get_indexdef(a.attrelid, a.attnum, TRUE)) -- stats from tbl
|
74
|
+
OR (s.tablename = i.idxname AND s.attname = a.attname))-- stats from functionnal cols
|
75
|
+
JOIN pg_type AS t ON a.atttypid = t.oid
|
76
|
+
WHERE a.attnum > 0
|
77
|
+
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
|
78
|
+
) AS s1
|
79
|
+
) AS s2
|
80
|
+
JOIN pg_am am ON s2.relam = am.oid WHERE am.amname = 'btree'
|
81
|
+
) AS sub
|
82
|
+
WHERE NOT is_na
|
83
|
+
AND nspname = 'public'
|
84
|
+
ORDER BY 2,3,4;
|
@@ -0,0 +1,63 @@
|
|
1
|
+
-- Originally from: https://github.com/ioguix/pgsql-bloat-estimation/blob/master/table/table_bloat.sql
|
2
|
+
/* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read.
|
3
|
+
* This query is compatible with PostgreSQL 9.0 and more
|
4
|
+
*/
|
5
|
+
SELECT current_database(), schemaname, tblname AS object_name, bs*tblpages AS real_size,
|
6
|
+
(tblpages-est_tblpages)*bs AS extra_size,
|
7
|
+
CASE WHEN tblpages - est_tblpages > 0
|
8
|
+
THEN 100 * (tblpages - est_tblpages)/tblpages::float
|
9
|
+
ELSE 0
|
10
|
+
END AS extra_ratio, fillfactor,
|
11
|
+
CASE WHEN tblpages - est_tblpages_ff > 0
|
12
|
+
THEN (tblpages-est_tblpages_ff)*bs
|
13
|
+
ELSE 0
|
14
|
+
END AS bloat_size,
|
15
|
+
CASE WHEN tblpages - est_tblpages_ff > 0
|
16
|
+
THEN 100 * (tblpages - est_tblpages_ff)/tblpages::float
|
17
|
+
ELSE 0
|
18
|
+
END AS bloat_ratio, is_na
|
19
|
+
-- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag
|
20
|
+
FROM (
|
21
|
+
SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages,
|
22
|
+
ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) AS est_tblpages_ff,
|
23
|
+
tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
|
24
|
+
-- , stattuple.pgstattuple(tblid) AS pst
|
25
|
+
FROM (
|
26
|
+
SELECT
|
27
|
+
( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
|
28
|
+
- CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END
|
29
|
+
- CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END
|
30
|
+
) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages,
|
31
|
+
toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
|
32
|
+
FROM (
|
33
|
+
SELECT
|
34
|
+
tbl.oid AS tblid, ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples,
|
35
|
+
tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages,
|
36
|
+
coalesce(toast.reltuples, 0) AS toasttuples,
|
37
|
+
coalesce(substring(
|
38
|
+
array_to_string(tbl.reloptions, ' ')
|
39
|
+
FROM 'fillfactor=([0-9]+)')::smallint, 100) AS fillfactor,
|
40
|
+
current_setting('block_size')::numeric AS bs,
|
41
|
+
CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma,
|
42
|
+
24 AS page_hdr,
|
43
|
+
23 + CASE WHEN MAX(coalesce(null_frac,0)) > 0 THEN ( 7 + count(*) ) / 8 ELSE 0::int END
|
44
|
+
+ CASE WHEN tbl.relhasoids THEN 4 ELSE 0 END AS tpl_hdr_size,
|
45
|
+
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024) ) AS tpl_data_size,
|
46
|
+
bool_or(att.atttypid = 'pg_catalog.name'::regtype)
|
47
|
+
OR count(att.attname) <> count(s.attname) AS is_na
|
48
|
+
FROM pg_attribute AS att
|
49
|
+
JOIN pg_class AS tbl ON att.attrelid = tbl.oid
|
50
|
+
JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace
|
51
|
+
LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname
|
52
|
+
AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname
|
53
|
+
LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid
|
54
|
+
WHERE att.attnum > 0 AND NOT att.attisdropped
|
55
|
+
AND tbl.relkind = 'r'
|
56
|
+
GROUP BY 1,2,3,4,5,6,7,8,9,10, tbl.relhasoids
|
57
|
+
ORDER BY 2,3
|
58
|
+
) AS s
|
59
|
+
) AS s2
|
60
|
+
) AS s3
|
61
|
+
WHERE NOT is_na
|
62
|
+
-- AND tblpages*((pst).free_percent + (pst).dead_tuple_percent)::float4/100 >= 1
|
63
|
+
AND schemaname= 'public';
|
@@ -0,0 +1,527 @@
|
|
1
|
+
module GitLab
|
2
|
+
module Monitor
|
3
|
+
module Database
|
4
|
+
# A helper class to collect CI builds metrics.
|
5
|
+
class CiBuildsCollector < Base # rubocop:disable Metrics/ClassLength
|
6
|
+
SET_RANDOM_PAGE_COST = "SET LOCAL random_page_cost TO 1".freeze
|
7
|
+
|
8
|
+
BUILDS_QUERY_EE =
|
9
|
+
<<~SQL.freeze
|
10
|
+
SELECT
|
11
|
+
projects.namespace_id,
|
12
|
+
ci_builds.status,
|
13
|
+
projects.shared_runners_enabled,
|
14
|
+
(COALESCE(namespaces.shared_runners_minutes_limit, application_settings.shared_runners_minutes, 0) = 0 OR
|
15
|
+
COALESCE(namespace_statistics.shared_runners_seconds, 0) < COALESCE(namespaces.shared_runners_minutes_limit, application_settings.shared_runners_minutes, 0) * 60) as has_minutes,
|
16
|
+
COUNT(*) AS count
|
17
|
+
FROM ci_builds
|
18
|
+
JOIN projects
|
19
|
+
ON projects.id = ci_builds.project_id
|
20
|
+
JOIN namespaces
|
21
|
+
ON namespaces.id = projects.namespace_id
|
22
|
+
LEFT JOIN namespace_statistics
|
23
|
+
ON namespace_statistics.namespace_id = namespaces.id
|
24
|
+
JOIN application_settings
|
25
|
+
ON application_settings.id = 1
|
26
|
+
WHERE ci_builds.type = 'Ci::Build'
|
27
|
+
AND ci_builds.status = '%s'
|
28
|
+
AND projects.pending_delete = 'f'
|
29
|
+
GROUP BY
|
30
|
+
projects.namespace_id,
|
31
|
+
ci_builds.status,
|
32
|
+
projects.shared_runners_enabled,
|
33
|
+
namespaces.shared_runners_minutes_limit,
|
34
|
+
namespace_statistics.shared_runners_seconds,
|
35
|
+
application_settings.shared_runners_minutes
|
36
|
+
SQL
|
37
|
+
|
38
|
+
BUILDS_QUERY_CE =
|
39
|
+
<<~SQL.freeze
|
40
|
+
SELECT
|
41
|
+
projects.namespace_id,
|
42
|
+
ci_builds.status,
|
43
|
+
projects.shared_runners_enabled,
|
44
|
+
COUNT(*) AS count
|
45
|
+
FROM ci_builds
|
46
|
+
JOIN projects
|
47
|
+
ON projects.id = ci_builds.project_id
|
48
|
+
WHERE ci_builds.type = 'Ci::Build'
|
49
|
+
AND ci_builds.status = '%s'
|
50
|
+
AND projects.pending_delete = 'f'
|
51
|
+
GROUP BY
|
52
|
+
projects.namespace_id,
|
53
|
+
ci_builds.status,
|
54
|
+
projects.shared_runners_enabled
|
55
|
+
SQL
|
56
|
+
|
57
|
+
STALE_BUILDS_QUERY =
|
58
|
+
<<~SQL.freeze
|
59
|
+
SELECT
|
60
|
+
COUNT(*) AS count
|
61
|
+
FROM ci_builds
|
62
|
+
JOIN projects
|
63
|
+
ON projects.id = ci_builds.project_id
|
64
|
+
WHERE ci_builds.type = 'Ci::Build'
|
65
|
+
AND ci_builds.status = 'running'
|
66
|
+
AND ci_builds.updated_at < NOW() - INTERVAL '1 hour'
|
67
|
+
AND projects.pending_delete = 'f'
|
68
|
+
SQL
|
69
|
+
|
70
|
+
PER_RUNNER_QUERY_EE =
|
71
|
+
<<~SQL.freeze
|
72
|
+
SELECT
|
73
|
+
ci_builds.runner_id,
|
74
|
+
ci_runners.is_shared,
|
75
|
+
projects.namespace_id,
|
76
|
+
projects.mirror,
|
77
|
+
projects.mirror_trigger_builds,
|
78
|
+
ci_pipelines.pipeline_schedule_id,
|
79
|
+
ci_builds.trigger_request_id,
|
80
|
+
(COALESCE(namespaces.shared_runners_minutes_limit, application_settings.shared_runners_minutes, 0) = 0 OR
|
81
|
+
COALESCE(namespace_statistics.shared_runners_seconds, 0) < COALESCE(namespaces.shared_runners_minutes_limit, application_settings.shared_runners_minutes, 0) * 60) as has_minutes,
|
82
|
+
COUNT(*) AS count
|
83
|
+
FROM ci_builds
|
84
|
+
JOIN ci_runners
|
85
|
+
ON ci_runners.id = ci_builds.runner_id
|
86
|
+
JOIN projects
|
87
|
+
ON projects.id = ci_builds.project_id
|
88
|
+
JOIN ci_pipelines
|
89
|
+
ON ci_pipelines.id = ci_builds.commit_id
|
90
|
+
JOIN namespaces
|
91
|
+
ON namespaces.id = projects.namespace_id
|
92
|
+
LEFT JOIN namespace_statistics
|
93
|
+
ON namespace_statistics.namespace_id = namespaces.id
|
94
|
+
JOIN application_settings
|
95
|
+
ON application_settings.id = 1
|
96
|
+
WHERE ci_builds.type = 'Ci::Build'
|
97
|
+
AND ci_builds.status = 'running'
|
98
|
+
AND projects.pending_delete = 'f'
|
99
|
+
GROUP BY
|
100
|
+
ci_builds.runner_id,
|
101
|
+
ci_runners.is_shared,
|
102
|
+
projects.namespace_id,
|
103
|
+
projects.mirror,
|
104
|
+
projects.mirror_trigger_builds,
|
105
|
+
ci_pipelines.pipeline_schedule_id,
|
106
|
+
ci_builds.trigger_request_id,
|
107
|
+
namespaces.shared_runners_minutes_limit,
|
108
|
+
namespace_statistics.shared_runners_seconds,
|
109
|
+
application_settings.shared_runners_minutes
|
110
|
+
SQL
|
111
|
+
|
112
|
+
PER_RUNNER_QUERY_CE =
|
113
|
+
<<~SQL.freeze
|
114
|
+
SELECT
|
115
|
+
ci_builds.runner_id,
|
116
|
+
ci_runners.is_shared,
|
117
|
+
projects.namespace_id,
|
118
|
+
ci_pipelines.pipeline_schedule_id,
|
119
|
+
ci_builds.trigger_request_id,
|
120
|
+
COUNT(*) AS count
|
121
|
+
FROM ci_builds
|
122
|
+
JOIN ci_runners
|
123
|
+
ON ci_runners.id = ci_builds.runner_id
|
124
|
+
JOIN projects
|
125
|
+
ON projects.id = ci_builds.project_id
|
126
|
+
JOIN ci_pipelines
|
127
|
+
ON ci_pipelines.id = ci_builds.commit_id
|
128
|
+
WHERE ci_builds.type = 'Ci::Build'
|
129
|
+
AND ci_builds.status = 'running'
|
130
|
+
AND projects.pending_delete = 'f'
|
131
|
+
GROUP BY
|
132
|
+
ci_builds.runner_id,
|
133
|
+
ci_runners.is_shared,
|
134
|
+
projects.namespace_id,
|
135
|
+
ci_pipelines.pipeline_schedule_id,
|
136
|
+
ci_builds.trigger_request_id
|
137
|
+
SQL
|
138
|
+
|
139
|
+
MIRROR_COLUMN_QUERY =
|
140
|
+
<<~SQL.freeze
|
141
|
+
SELECT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='projects' AND column_name='mirror')
|
142
|
+
SQL
|
143
|
+
|
144
|
+
REPEATED_COMMANDS_QUERY_EE =
|
145
|
+
<<~SQL.freeze
|
146
|
+
SELECT
|
147
|
+
subquery.namespace_id,
|
148
|
+
subquery.shared_runners_enabled,
|
149
|
+
subquery.project_id,
|
150
|
+
subquery.status,
|
151
|
+
subquery.has_minutes,
|
152
|
+
MAX(subquery.count) as count
|
153
|
+
FROM (
|
154
|
+
SELECT
|
155
|
+
projects.namespace_id,
|
156
|
+
projects.shared_runners_enabled,
|
157
|
+
ci_builds.project_id,
|
158
|
+
ci_builds.commit_id,
|
159
|
+
ci_builds.status,
|
160
|
+
(COALESCE(namespaces.shared_runners_minutes_limit, application_settings.shared_runners_minutes, 0) = 0 OR
|
161
|
+
COALESCE(namespace_statistics.shared_runners_seconds, 0) < COALESCE(namespaces.shared_runners_minutes_limit, application_settings.shared_runners_minutes, 0) * 60) as has_minutes,
|
162
|
+
COUNT(*) AS count
|
163
|
+
FROM ci_builds
|
164
|
+
JOIN projects
|
165
|
+
ON projects.id = ci_builds.project_id
|
166
|
+
JOIN namespaces
|
167
|
+
ON namespaces.id = projects.namespace_id
|
168
|
+
LEFT JOIN namespace_statistics
|
169
|
+
ON namespace_statistics.namespace_id = namespaces.id
|
170
|
+
JOIN application_settings
|
171
|
+
ON application_settings.id = 1
|
172
|
+
WHERE ci_builds.type = 'Ci::Build'
|
173
|
+
AND ci_builds.status IN ('running', 'pending')
|
174
|
+
GROUP BY
|
175
|
+
projects.namespace_id,
|
176
|
+
projects.shared_runners_enabled,
|
177
|
+
ci_builds.project_id,
|
178
|
+
ci_builds.commit_id,
|
179
|
+
ci_builds.status,
|
180
|
+
ci_builds.commands,
|
181
|
+
namespaces.shared_runners_minutes_limit,
|
182
|
+
namespace_statistics.shared_runners_seconds,
|
183
|
+
application_settings.shared_runners_minutes
|
184
|
+
HAVING COUNT(*) > %d
|
185
|
+
) AS subquery
|
186
|
+
GROUP BY
|
187
|
+
subquery.namespace_id,
|
188
|
+
subquery.shared_runners_enabled,
|
189
|
+
subquery.project_id,
|
190
|
+
subquery.commit_id,
|
191
|
+
subquery.status,
|
192
|
+
subquery.has_minutes
|
193
|
+
SQL
|
194
|
+
|
195
|
+
REPEATED_COMMANDS_QUERY_CE =
|
196
|
+
<<~SQL.freeze
|
197
|
+
SELECT
|
198
|
+
subquery.namespace_id,
|
199
|
+
subquery.shared_runners_enabled,
|
200
|
+
subquery.project_id,
|
201
|
+
subquery.status,
|
202
|
+
MAX(subquery.count) as count
|
203
|
+
FROM (
|
204
|
+
SELECT
|
205
|
+
projects.namespace_id,
|
206
|
+
projects.shared_runners_enabled,
|
207
|
+
ci_builds.project_id,
|
208
|
+
ci_builds.commit_id,
|
209
|
+
ci_builds.status,
|
210
|
+
COUNT(*) AS count
|
211
|
+
FROM ci_builds
|
212
|
+
JOIN projects
|
213
|
+
ON projects.id = ci_builds.project_id
|
214
|
+
JOIN namespaces
|
215
|
+
ON namespaces.id = projects.namespace_id
|
216
|
+
WHERE ci_builds.type = 'Ci::Build'
|
217
|
+
AND ci_builds.status IN ('running', 'pending')
|
218
|
+
GROUP BY
|
219
|
+
projects.namespace_id,
|
220
|
+
projects.shared_runners_enabled,
|
221
|
+
ci_builds.project_id,
|
222
|
+
ci_builds.commit_id,
|
223
|
+
ci_builds.status,
|
224
|
+
ci_builds.commands
|
225
|
+
HAVING COUNT(*) > %d
|
226
|
+
) AS subquery
|
227
|
+
GROUP BY
|
228
|
+
subquery.namespace_id,
|
229
|
+
subquery.shared_runners_enabled,
|
230
|
+
subquery.project_id,
|
231
|
+
subquery.commit_id,
|
232
|
+
subquery.status
|
233
|
+
SQL
|
234
|
+
|
235
|
+
UNARCHIVED_TRACES_QUERY =
|
236
|
+
<<~SQL.freeze
|
237
|
+
SELECT
|
238
|
+
COUNT(*) as count
|
239
|
+
FROM ci_builds
|
240
|
+
JOIN ci_build_trace_chunks
|
241
|
+
ON ci_build_trace_chunks.build_id = ci_builds.id
|
242
|
+
LEFT JOIN ci_job_artifacts
|
243
|
+
ON ci_job_artifacts.job_id = ci_builds.id
|
244
|
+
AND ci_job_artifacts.file_type = 3
|
245
|
+
WHERE ci_builds.type = 'Ci::Build'
|
246
|
+
AND ci_builds.status IN ('success', 'failed', 'canceled')
|
247
|
+
AND ci_builds.finished_at < '%s'
|
248
|
+
AND ci_job_artifacts.job_id IS NULL
|
249
|
+
SQL
|
250
|
+
|
251
|
+
STATUS_CREATED = "created".freeze
|
252
|
+
STATUS_PENDING = "pending".freeze
|
253
|
+
|
254
|
+
DEFAULT_UNARCHIVED_TRACES_OFFSET_MINUTES = 1440
|
255
|
+
|
256
|
+
def initialize(opts)
|
257
|
+
super(opts)
|
258
|
+
|
259
|
+
@allowed_repeated_commands_count = opts[:allowed_repeated_commands_count]
|
260
|
+
@created_builds_counting_disabled = opts[:created_builds_counting_disabled]
|
261
|
+
@unarchived_traces_offset_minutes = opts[:unarchived_traces_offset_minutes]
|
262
|
+
end
|
263
|
+
|
264
|
+
def run
|
265
|
+
results = {}
|
266
|
+
results[:created_builds] = builds(STATUS_CREATED) unless @created_builds_counting_disabled
|
267
|
+
results[:pending_builds] = builds(STATUS_PENDING)
|
268
|
+
results[:stale_builds] = stale_builds
|
269
|
+
results[:per_runner] = per_runner_builds
|
270
|
+
results[:repeated_commands] = repeated_commands
|
271
|
+
results[:unarchived_traces] = unarchived_traces
|
272
|
+
results
|
273
|
+
end
|
274
|
+
|
275
|
+
private
|
276
|
+
|
277
|
+
def builds(status)
|
278
|
+
results = []
|
279
|
+
|
280
|
+
query = mirror_column? ? BUILDS_QUERY_EE : BUILDS_QUERY_CE
|
281
|
+
query = query % [status] # rubocop:disable Style/FormatString
|
282
|
+
exec_query_with_custom_random_page_cost(query).each do |row|
|
283
|
+
results << transform_builds_row_to_values(row)
|
284
|
+
end
|
285
|
+
|
286
|
+
results
|
287
|
+
rescue PG::UndefinedTable, PG::UndefinedColumn
|
288
|
+
results
|
289
|
+
end
|
290
|
+
|
291
|
+
def transform_builds_row_to_values(row)
|
292
|
+
values = { namespace: row["namespace_id"].to_s,
|
293
|
+
shared_runners: row["shared_runners_enabled"] == "t" ? "yes" : "no",
|
294
|
+
value: row["count"].to_i }
|
295
|
+
include_ee_fields(values, row)
|
296
|
+
end
|
297
|
+
|
298
|
+
def stale_builds
|
299
|
+
with_connection_pool do |conn|
|
300
|
+
conn.exec(STALE_BUILDS_QUERY)[0]["count"].to_i
|
301
|
+
end
|
302
|
+
rescue PG::UndefinedTable, PG::UndefinedColumn
|
303
|
+
0
|
304
|
+
end
|
305
|
+
|
306
|
+
def per_runner_builds
|
307
|
+
results = []
|
308
|
+
|
309
|
+
query = mirror_column? ? PER_RUNNER_QUERY_EE : PER_RUNNER_QUERY_CE
|
310
|
+
exec_query_with_custom_random_page_cost(query).each do |row|
|
311
|
+
results << transform_per_runners_builds_row_to_values(row)
|
312
|
+
end
|
313
|
+
|
314
|
+
results
|
315
|
+
rescue PG::UndefinedTable, PG::UndefinedColumn
|
316
|
+
[]
|
317
|
+
end
|
318
|
+
|
319
|
+
def transform_per_runners_builds_row_to_values(row)
|
320
|
+
values = { runner: row["runner_id"].to_s,
|
321
|
+
shared_runner: row["is_shared"] == "t" ? "yes" : "no",
|
322
|
+
namespace: row["namespace_id"].to_s,
|
323
|
+
scheduled: row["pipeline_schedule_id"] ? "yes" : "no",
|
324
|
+
triggered: row["trigger_request_id"] ? "yes" : "no",
|
325
|
+
value: row["count"].to_i }
|
326
|
+
include_ee_fields(values, row)
|
327
|
+
end
|
328
|
+
|
329
|
+
def repeated_commands
|
330
|
+
results = []
|
331
|
+
|
332
|
+
query = mirror_column? ? REPEATED_COMMANDS_QUERY_EE : REPEATED_COMMANDS_QUERY_CE
|
333
|
+
query = query % [allowed_repeated_commands_count] # rubocop:disable Style/FormatString
|
334
|
+
exec_query_with_custom_random_page_cost(query).each do |row|
|
335
|
+
results << transform_repeated_commands_row_to_values(row)
|
336
|
+
end
|
337
|
+
|
338
|
+
results
|
339
|
+
rescue PG::UndefinedTable, PG::UndefinedColumn
|
340
|
+
[]
|
341
|
+
end
|
342
|
+
|
343
|
+
def allowed_repeated_commands_count
|
344
|
+
@allowed_repeated_commands_count ||= 2
|
345
|
+
end
|
346
|
+
|
347
|
+
def transform_repeated_commands_row_to_values(row)
|
348
|
+
values = { namespace: row["namespace_id"].to_s,
|
349
|
+
project: row["project_id"].to_s,
|
350
|
+
shared_runners: row["shared_runners_enabled"] == "t" ? "yes" : "no",
|
351
|
+
status: row["status"].to_s,
|
352
|
+
value: row["count"].to_i }
|
353
|
+
|
354
|
+
include_has_minutes_field(values, row)
|
355
|
+
end
|
356
|
+
|
357
|
+
def unarchived_traces
|
358
|
+
time = Time.now - (unarchived_traces_offset_minutes * 60)
|
359
|
+
query = UNARCHIVED_TRACES_QUERY % [time.strftime("%F %T")] # rubocop:disable Style/FormatString
|
360
|
+
|
361
|
+
with_connection_pool do |conn|
|
362
|
+
conn.exec(query)[0]["count"].to_i
|
363
|
+
end
|
364
|
+
rescue PG::UndefinedTable, PG::UndefinedColumn
|
365
|
+
0
|
366
|
+
end
|
367
|
+
|
368
|
+
def unarchived_traces_offset_minutes
|
369
|
+
@unarchived_traces_offset_minutes ||= DEFAULT_UNARCHIVED_TRACES_OFFSET_MINUTES
|
370
|
+
end
|
371
|
+
|
372
|
+
def include_ee_fields(values, row)
|
373
|
+
values.merge!(include_bool_if_row_defined(row, :mirror))
|
374
|
+
values.merge!(include_bool_if_row_defined(row, :mirror_trigger_builds))
|
375
|
+
include_has_minutes_field(values, row)
|
376
|
+
end
|
377
|
+
|
378
|
+
def include_has_minutes_field(values, row)
|
379
|
+
values.merge!(include_bool_if_row_defined(row, :has_minutes))
|
380
|
+
values
|
381
|
+
end
|
382
|
+
|
383
|
+
def include_bool_if_row_defined(row, field)
|
384
|
+
return {} unless row[field.to_s]
|
385
|
+
{ field => row[field.to_s] == "t" ? "yes" : "no" }
|
386
|
+
end
|
387
|
+
|
388
|
+
def exec_query_with_custom_random_page_cost(query)
|
389
|
+
with_connection_pool do |conn|
|
390
|
+
conn.transaction do |trans|
|
391
|
+
trans.exec(SET_RANDOM_PAGE_COST)
|
392
|
+
trans.exec(query)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
def mirror_column?
|
398
|
+
@mirror_column ||=
|
399
|
+
begin
|
400
|
+
with_connection_pool do |conn|
|
401
|
+
conn.exec(MIRROR_COLUMN_QUERY)[0]["exists"] == "t"
|
402
|
+
end
|
403
|
+
rescue PG::UndefinedColumn
|
404
|
+
false
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
# The prober which is called when gathering metrics
|
410
|
+
class CiBuildsProber
|
411
|
+
def initialize(opts, metrics: PrometheusMetrics.new)
|
412
|
+
@metrics = metrics
|
413
|
+
|
414
|
+
collector_opts = { connection_string: opts[:connection_string],
|
415
|
+
allowed_repeated_commands_count: opts[:allowed_repeated_commands_count],
|
416
|
+
created_builds_counting_disabled: opts[:created_builds_counting_disabled],
|
417
|
+
unarchived_traces_offset_minutes: opts[:unarchived_traces_offset_minutes] }
|
418
|
+
@collector = CiBuildsCollector.new(collector_opts)
|
419
|
+
end
|
420
|
+
|
421
|
+
def probe_db
|
422
|
+
@results = @collector.run
|
423
|
+
|
424
|
+
ci_builds_metrics(@results[:created_builds], "ci_created_builds") if @results[:created_builds]
|
425
|
+
ci_builds_metrics(@results[:pending_builds], "ci_pending_builds")
|
426
|
+
ci_stale_builds_metrics
|
427
|
+
metrics_per_runner
|
428
|
+
repeated_commands_metrics
|
429
|
+
unarchived_traces_metrics
|
430
|
+
|
431
|
+
self
|
432
|
+
rescue PG::ConnectionBad
|
433
|
+
self
|
434
|
+
end
|
435
|
+
|
436
|
+
def write_to(target)
|
437
|
+
target.write(@metrics.to_s)
|
438
|
+
end
|
439
|
+
|
440
|
+
private
|
441
|
+
|
442
|
+
def ci_builds_metrics(results_list, metric_name)
|
443
|
+
other_values = {}
|
444
|
+
|
445
|
+
results_list.each do |metric|
|
446
|
+
# If we have a low value, put the value into an "other" bucket.
|
447
|
+
if metric[:value] < 10
|
448
|
+
key = { shared_runners: metric[:shared_runners] }
|
449
|
+
key[:has_minutes] = metric[:has_minutes] if metric[:has_minutes]
|
450
|
+
|
451
|
+
other_values[key] ||= 0
|
452
|
+
other_values[key] += metric[:value]
|
453
|
+
else
|
454
|
+
add_ci_created_pending_builds(metric_name, metric[:value], metric)
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
# Add metrics for the "other" bucket.
|
459
|
+
other_values.each { |key, value| add_ci_created_pending_builds(metric_name, value, key) }
|
460
|
+
end
|
461
|
+
|
462
|
+
def add_ci_created_pending_builds(metric_name, value, labels)
|
463
|
+
add_metric_with_namespace_label(metric_name,
|
464
|
+
[:namespace, :shared_runners, :has_minutes],
|
465
|
+
value,
|
466
|
+
labels)
|
467
|
+
end
|
468
|
+
|
469
|
+
def ci_stale_builds_metrics
|
470
|
+
@metrics.add("ci_stale_builds", @results[:stale_builds])
|
471
|
+
end
|
472
|
+
|
473
|
+
def metrics_per_runner
|
474
|
+
other_values = {}
|
475
|
+
|
476
|
+
@results[:per_runner].each do |metric|
|
477
|
+
# If we have a low value, put the value into an "other" bucket.
|
478
|
+
if metric[:value] < 10
|
479
|
+
key = { runner: metric[:runner], shared_runner: metric[:shared_runner],
|
480
|
+
scheduled: metric[:scheduled], triggered: metric[:triggered] }
|
481
|
+
key[:mirror] = metric[:mirror] if metric[:mirror]
|
482
|
+
key[:mirror_trigger_builds] = metric[:mirror_trigger_builds] if metric[:mirror_trigger_builds]
|
483
|
+
key[:has_minutes] = metric[:has_minutes] if metric[:has_minutes]
|
484
|
+
|
485
|
+
other_values[key] ||= 0
|
486
|
+
other_values[key] += metric[:value]
|
487
|
+
else
|
488
|
+
add_ci_running_builds(metric[:value], metric)
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
# Add metrics for the "other" bucket.
|
493
|
+
other_values.each { |key, value| add_ci_running_builds(value, key) }
|
494
|
+
end
|
495
|
+
|
496
|
+
def add_ci_running_builds(value, labels)
|
497
|
+
add_metric_with_namespace_label(
|
498
|
+
"ci_running_builds",
|
499
|
+
[:runner, :namespace, :shared_runner, :scheduled,
|
500
|
+
:triggered, :mirror, :mirror_trigger_builds, :has_minutes],
|
501
|
+
value,
|
502
|
+
labels
|
503
|
+
)
|
504
|
+
end
|
505
|
+
|
506
|
+
def add_metric_with_namespace_label(metric_name, allowed_labels, value, labels)
|
507
|
+
labels[:namespace] = "" unless labels[:namespace]
|
508
|
+
|
509
|
+
selected_labels = labels.select { |k, _| allowed_labels.include?(k) }.sort.to_h
|
510
|
+
@metrics.add(metric_name, value, selected_labels)
|
511
|
+
end
|
512
|
+
|
513
|
+
def repeated_commands_metrics
|
514
|
+
@results[:repeated_commands].each do |metric|
|
515
|
+
value = metric.delete(:value)
|
516
|
+
|
517
|
+
@metrics.add("ci_repeated_commands_builds", value, metric)
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
def unarchived_traces_metrics
|
522
|
+
@metrics.add("ci_unarchived_traces", @results[:unarchived_traces])
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
527
|
+
end
|