gitlab_quality-test_tooling 3.6.1 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6922c9895f727e5c4ad2c7fd9ecc7ffa2c17f0de9125ae24def5627dc040d2f9
4
- data.tar.gz: 89a32f162c38d2098ffad75338adfb3ef3d90b773819ba06b03d03db111cd252
3
+ metadata.gz: 3cb2e6580b8b15e78116b52ffd4fed91e0ab7db14ce914eb90f34ace0ca84741
4
+ data.tar.gz: 14c55d5e6048c27a891dded80da8d23c3c3b71df15b04c8ff1c98404643bb19e
5
5
  SHA512:
6
- metadata.gz: 1c216c46fe2a705acba58bee70576cc7e4a71773802bc58ac59f60ef3ca413186f4a3bf5255712986b8943ffe7248797ce1e9d35a078176fa76cad458ea95519
7
- data.tar.gz: 69c7c48e2270d86d89b6c12d5d11800079fe20af2c6024f65b8d2d35f89499a63816cb188a3c086520f571a9288cd51330849b115554904c7e802d9e2d2fff42
6
+ metadata.gz: 607a792c6df20d566ab3ec3dba96b80ed8ae8c08c6c6fe30ab9cbc39ccc2201c0934b86641ff137a2b9726d2b3386ef0ed22cb76f160faeacd74aef91c4007a8
7
+ data.tar.gz: 39d357544ea50bbc8968a224b699110f6957774140e7c8074fa9f28a5cd1cf03b1cdb6f0f0e22fd9a2b97ad06b0d54d183d06db043ff52a048a30758e928294f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- gitlab_quality-test_tooling (3.6.1)
4
+ gitlab_quality-test_tooling (3.7.0)
5
5
  activesupport (>= 7.0)
6
6
  amatch (~> 0.4.1)
7
7
  fog-google (~> 1.24, >= 1.24.1)
data/README.md CHANGED
@@ -292,7 +292,7 @@ Options:
292
292
  --clickhouse-username USERNAME
293
293
  ClickHouse username
294
294
  --clickhouse-shared-database DATABASE
295
- ClickHouse shared database name (default: shared)
295
+ ClickHouse shared database name
296
296
  --responsibility-patterns PATH
297
297
  Path to YAML file with responsibility classification patterns
298
298
 
@@ -312,7 +312,7 @@ Usage: exe/sync-category-owners [options]
312
312
  Options:
313
313
  --clickhouse-url URL ClickHouse server URL
314
314
  --clickhouse-database DATABASE
315
- ClickHouse database name (default: shared)
315
+ ClickHouse database name
316
316
  --clickhouse-username USERNAME
317
317
  ClickHouse username
318
318
 
@@ -24,7 +24,7 @@ options = OptionParser.new do |opts|
24
24
  params[:clickhouse_url] = url
25
25
  end
26
26
 
27
- opts.on('--clickhouse-database DATABASE', 'ClickHouse database name (default: shared)') do |database|
27
+ opts.on('--clickhouse-database DATABASE', 'ClickHouse database name') do |database|
28
28
  params[:clickhouse_database] = database
29
29
  end
30
30
 
@@ -51,9 +51,6 @@ options = OptionParser.new do |opts|
51
51
  opts.parse(ARGV)
52
52
  end
53
53
 
54
- # Default database to 'shared' if not specified
55
- params[:clickhouse_database] ||= 'shared'
56
-
57
54
  if params.any? && (required_params - params.keys).none?
58
55
  clickhouse_password = ENV.fetch('GLCI_CLICKHOUSE_METRICS_PASSWORD', nil)
59
56
  if clickhouse_password.to_s.strip.empty?
@@ -88,10 +85,9 @@ if params.any? && (required_params - params.keys).none?
88
85
  password: clickhouse_password
89
86
  )
90
87
 
91
- category_owners_table.truncate
92
88
  category_owners_table.push(category_owners.as_db_table)
93
89
 
94
- puts "Successfully synced #{category_owners.as_db_table.length} feature categories to ClickHouse"
90
+ puts "Successfully synced feature category ownership data to ClickHouse"
95
91
  else
96
92
  puts "Missing argument(s). Required arguments are: #{required_params.map { |p| "--#{p.to_s.tr('_', '-')}" }.join(', ')}"
97
93
  puts options
data/exe/test-coverage CHANGED
@@ -22,7 +22,7 @@ require_relative '../lib/gitlab_quality/test_tooling/code_coverage/responsibilit
22
22
  require_relative '../lib/gitlab_quality/test_tooling/code_coverage/responsibility_patterns_config'
23
23
 
24
24
  params = {}
25
- required_params = [:test_reports, :coverage_report, :test_map, :clickhouse_url, :clickhouse_database, :clickhouse_username, :responsibility_patterns]
25
+ required_params = [:test_reports, :coverage_report, :test_map, :clickhouse_url, :clickhouse_database, :clickhouse_username, :clickhouse_shared_database, :responsibility_patterns]
26
26
 
27
27
  options = OptionParser.new do |opts|
28
28
  opts.banner = "Usage: #{$PROGRAM_NAME} [options]"
@@ -55,7 +55,7 @@ options = OptionParser.new do |opts|
55
55
  params[:clickhouse_username] = username
56
56
  end
57
57
 
58
- opts.on('--clickhouse-shared-database DATABASE', 'ClickHouse shared database name (default: shared)') do |database|
58
+ opts.on('--clickhouse-shared-database DATABASE', 'ClickHouse shared database name') do |database|
59
59
  params[:clickhouse_shared_database] = database
60
60
  end
61
61
 
@@ -91,7 +91,7 @@ if params.any? && (required_params - params.keys).none?
91
91
  exit 1
92
92
  end
93
93
 
94
- [:clickhouse_url, :clickhouse_database, :clickhouse_username].each do |param|
94
+ [:clickhouse_url, :clickhouse_database, :clickhouse_username, :clickhouse_shared_database].each do |param|
95
95
  if params[param].to_s.strip.empty?
96
96
  puts "Error: --#{param.to_s.tr('_', '-')} cannot be empty"
97
97
  exit 1
@@ -172,7 +172,7 @@ if params.any? && (required_params - params.keys).none?
172
172
 
173
173
  shared_clickhouse_data = {
174
174
  url: params[:clickhouse_url],
175
- database: params[:clickhouse_shared_database] || 'shared',
175
+ database: params[:clickhouse_shared_database],
176
176
  username: params[:clickhouse_username],
177
177
  password: clickhouse_password
178
178
  }
@@ -13,12 +13,33 @@ module GitlabQuality
13
13
 
14
14
  KNOWN_UNOWNED = %w[shared not_owned tooling].freeze
15
15
 
16
- def truncate
17
- logger.debug("#{LOG_PREFIX} Truncating table #{full_table_name} ...")
16
+ # SQL query to get the latest ownership record for each category
17
+ # Uses window function to avoid loading entire table history
18
+ LATEST_RECORDS_QUERY = <<~SQL
19
+ SELECT category, group, stage, section
20
+ FROM (
21
+ SELECT category, group, stage, section,
22
+ ROW_NUMBER() OVER (PARTITION BY category ORDER BY timestamp DESC) as rn
23
+ FROM %{table_name}
24
+ )
25
+ WHERE rn = 1
26
+ SQL
18
27
 
19
- client.query("TRUNCATE TABLE #{full_table_name}")
28
+ # Insert only new category ownership records that don't already exist
29
+ # This avoids needing TRUNCATE permission
30
+ def push(data)
31
+ return logger.warn("#{LOG_PREFIX} No data found, skipping insert!") if data.empty?
20
32
 
21
- logger.info("#{LOG_PREFIX} Successfully truncated table #{full_table_name}")
33
+ sanitized_data = sanitize_and_filter_data(data)
34
+ return if sanitized_data.empty?
35
+
36
+ new_records = filter_new_records(sanitized_data)
37
+ return if new_records.empty?
38
+
39
+ insert_new_records(new_records, sanitized_data.size)
40
+ rescue StandardError => e
41
+ logger.error("#{LOG_PREFIX} Error occurred while pushing data to #{full_table_name}: #{e.message}")
42
+ raise
22
43
  end
23
44
 
24
45
  # Owners of particular feature category as group, stage and section
@@ -41,14 +62,74 @@ module GitlabQuality
41
62
  private
42
63
 
43
64
  def records
44
- @records ||= client
45
- .query("SELECT category, group, stage, section FROM #{table_name}")
46
- .each_with_object({}) { |record, hsh| hsh[record["category"]] = record.slice("group", "stage", "section") }
65
+ @records ||= fetch_latest_records.each_with_object({}) do |record, hsh|
66
+ hsh[record["category"]] = record.slice("group", "stage", "section")
67
+ end
68
+ end
69
+
70
+ def sanitize_and_filter_data(data)
71
+ logger.debug("#{LOG_PREFIX} Starting data export to ClickHouse")
72
+ sanitized_data = sanitize(data)
73
+
74
+ logger.warn("#{LOG_PREFIX} No valid data found after sanitization, skipping ClickHouse export!") if sanitized_data.empty?
75
+
76
+ sanitized_data
77
+ end
78
+
79
+ def filter_new_records(sanitized_data)
80
+ existing_records = fetch_existing_records
81
+ # Deduplicate against latest records per category to prevent inserting duplicate historical records.
82
+ # This ensures we only insert records with new category+ownership combinations, even if an older
83
+ # version of the same category+ownership existed previously.
84
+ new_records = sanitized_data.reject { |record| existing_records.include?(record_key(record)) }
85
+
86
+ logger.info("#{LOG_PREFIX} No new records to insert, all data already exists") if new_records.empty?
87
+
88
+ new_records
89
+ end
90
+
91
+ def insert_new_records(new_records, total_sanitized_count)
92
+ client.insert_json_data(table_name, new_records)
93
+ new_count = new_records.size
94
+ existing_count = total_sanitized_count - new_count
95
+ record_word = new_count == 1 ? 'record' : 'records'
96
+ logger.info("#{LOG_PREFIX} Inserted #{new_count} new #{record_word} (#{existing_count} already existed)")
97
+ end
98
+
99
+ def fetch_existing_records
100
+ fetch_latest_records.to_set { |record| record_key(record) }
101
+ end
102
+
103
+ def fetch_latest_records
104
+ query = format(LATEST_RECORDS_QUERY, table_name: table_name)
105
+ client.query(query)
106
+ end
107
+
108
+ def sanitized_data_record(record)
109
+ {
110
+ timestamp: time,
111
+ category: record[:feature_category],
112
+ group: record[:group],
113
+ stage: record[:stage],
114
+ section: record[:section]
115
+ }
116
+ end
117
+
118
+ def record_key(record)
119
+ # Create a unique key for the combination of category + ownership
120
+ # Normalize to string keys for consistent access
121
+ normalized = record.transform_keys(&:to_s)
122
+ [
123
+ normalized["category"],
124
+ normalized["group"],
125
+ normalized["stage"],
126
+ normalized["section"]
127
+ ]
47
128
  end
48
129
 
49
130
  # @return [Boolean] True if the record is valid, false otherwise
50
131
  def valid_record?(record)
51
- required_fields = %i[category group stage section]
132
+ required_fields = %i[feature_category group stage section]
52
133
 
53
134
  required_fields.each do |field|
54
135
  if record[field].nil?
@@ -671,7 +671,7 @@ module GitlabQuality
671
671
  # @return [String] the reason to ignore the failures, or `nil` if any failures should not be ignored.
672
672
  def ignored_failure_reason(failures, failure_to_ignore)
673
673
  failures_to_ignore = compute_ignored_failures(failures, failure_to_ignore)
674
- return if failures_to_ignore.empty?
674
+ return if failures_to_ignore.empty? || failures_to_ignore.size < failures.size
675
675
 
676
676
  "the errors included: #{failures_to_ignore.map { |e| "`#{e}`" }.join(', ')}"
677
677
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module GitlabQuality
4
4
  module TestTooling
5
- VERSION = "3.6.1"
5
+ VERSION = "3.7.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab_quality-test_tooling
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.1
4
+ version: 3.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GitLab Quality
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-02-03 00:00:00.000000000 Z
11
+ date: 2026-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: climate_control