RubyGems - gitlab_quality-test_tooling - Versions diffs - 3.0.0 → 3.7.0 - Mend

gitlab_quality-test_tooling 3.0.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

data/lib/gitlab_quality/test_tooling/code_coverage/click_house/category_owners_table.rb CHANGED Viewed

@@ -11,58 +11,125 @@ module GitlabQuality
           MissingMappingError = Class.new(StandardError)
-          # Creates the ClickHouse table, if it doesn't exist already
-          # @return [nil]
-          def create
-            logger.debug("#{LOG_PREFIX} Creating category_owners table if it doesn't exist ...")
-            client.query(<<~SQL)
-              CREATE TABLE IF NOT EXISTS #{table_name} (
-                timestamp DateTime64(6, 'UTC') DEFAULT now64(),
-                category String,
-                group String,
-                stage String,
-                section String,
-                INDEX idx_group group TYPE set(360) GRANULARITY 1,
-                INDEX idx_stage stage TYPE set(360) GRANULARITY 1,
-                INDEX idx_section section TYPE set(360) GRANULARITY 1
-              ) ENGINE = MergeTree()
-              ORDER BY (category, timestamp)
-              SETTINGS index_granularity = 8192;
-            SQL
-            logger.info("#{LOG_PREFIX} Category owners table created/verified successfully")
-          end
+          KNOWN_UNOWNED = %w[shared not_owned tooling].freeze
+          # SQL query to get the latest ownership record for each category
+          # Uses window function to avoid loading entire table history
+          LATEST_RECORDS_QUERY = <<~SQL
+            SELECT category, group, stage, section
+            FROM (
+              SELECT category, group, stage, section,
+                     ROW_NUMBER() OVER (PARTITION BY category ORDER BY timestamp DESC) as rn
+              FROM %{table_name}
+            )
+            WHERE rn = 1
+          SQL
+          # Insert only new category ownership records that don't already exist
+          # This avoids needing TRUNCATE permission
+          def push(data)
+            return logger.warn("#{LOG_PREFIX} No data found, skipping insert!") if data.empty?
-          def truncate
-            logger.debug("#{LOG_PREFIX} Truncating table #{full_table_name} ...")
+            sanitized_data = sanitize_and_filter_data(data)
+            return if sanitized_data.empty?
-            client.query("TRUNCATE TABLE #{full_table_name}")
+            new_records = filter_new_records(sanitized_data)
+            return if new_records.empty?
-            logger.info("#{LOG_PREFIX} Successfully truncated table #{full_table_name}")
+            insert_new_records(new_records, sanitized_data.size)
+          rescue StandardError => e
+            logger.error("#{LOG_PREFIX} Error occurred while pushing data to #{full_table_name}: #{e.message}")
+            raise
           end
-          # Owners of particular category as group, stage and section
+          # Owners of particular feature category as group, stage and section
           #
-          # @param category_name [String]
+          # @param feature_category_name [String] the feature_category name
           # @return [Hash]
-          def owners(category_name)
-            records.fetch(category_name)
+          def owners(feature_category_name)
+            if KNOWN_UNOWNED.include?(feature_category_name)
+              logger.info(
+                "#{LOG_PREFIX} #{feature_category_name} is a known feature category without owner..."
+              )
+              return {}
+            end
+            records.fetch(feature_category_name)
           rescue KeyError
-            raise(MissingMappingError, "Category '#{category_name}' not found in table '#{table_name}'")
+            raise(MissingMappingError, "Feature category '#{feature_category_name}' not found in table '#{table_name}'")
           end
           private
           def records
-            @records ||= client
-              .query("SELECT category, group, stage, section FROM #{table_name}")
-              .each_with_object({}) { |record, hsh| hsh[record["category"]] = record.slice("group", "stage", "section") }
+            @records ||= fetch_latest_records.each_with_object({}) do |record, hsh|
+              hsh[record["category"]] = record.slice("group", "stage", "section")
+            end
+          end
+          def sanitize_and_filter_data(data)
+            logger.debug("#{LOG_PREFIX} Starting data export to ClickHouse")
+            sanitized_data = sanitize(data)
+            logger.warn("#{LOG_PREFIX} No valid data found after sanitization, skipping ClickHouse export!") if sanitized_data.empty?
+            sanitized_data
+          end
+          def filter_new_records(sanitized_data)
+            existing_records = fetch_existing_records
+            # Deduplicate against latest records per category to prevent inserting duplicate historical records.
+            # This ensures we only insert records with new category+ownership combinations, even if an older
+            # version of the same category+ownership existed previously.
+            new_records = sanitized_data.reject { |record| existing_records.include?(record_key(record)) }
+            logger.info("#{LOG_PREFIX} No new records to insert, all data already exists") if new_records.empty?
+            new_records
+          end
+          def insert_new_records(new_records, total_sanitized_count)
+            client.insert_json_data(table_name, new_records)
+            new_count = new_records.size
+            existing_count = total_sanitized_count - new_count
+            record_word = new_count == 1 ? 'record' : 'records'
+            logger.info("#{LOG_PREFIX} Inserted #{new_count} new #{record_word} (#{existing_count} already existed)")
+          end
+          def fetch_existing_records
+            fetch_latest_records.to_set { |record| record_key(record) }
+          end
+          def fetch_latest_records
+            query = format(LATEST_RECORDS_QUERY, table_name: table_name)
+            client.query(query)
+          end
+          def sanitized_data_record(record)
+            {
+              timestamp: time,
+              category: record[:feature_category],
+              group: record[:group],
+              stage: record[:stage],
+              section: record[:section]
+            }
+          end
+          def record_key(record)
+            # Create a unique key for the combination of category + ownership
+            # Normalize to string keys for consistent access
+            normalized = record.transform_keys(&:to_s)
+            [
+              normalized["category"],
+              normalized["group"],
+              normalized["stage"],
+              normalized["section"]
+            ]
           end
           # @return [Boolean] True if the record is valid, false otherwise
           def valid_record?(record)
-            required_fields = %i[category group stage section]
+            required_fields = %i[feature_category group stage section]
             required_fields.each do |field|
               if record[field].nil?

data/lib/gitlab_quality/test_tooling/code_coverage/click_house/coverage_metrics_table.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
-require 'time'
 require_relative 'table'
+require_relative 'category_owners_table'
 module GitlabQuality
   module TestTooling
@@ -10,39 +10,15 @@ module GitlabQuality
         class CoverageMetricsTable < GitlabQuality::TestTooling::CodeCoverage::ClickHouse::Table
           TABLE_NAME = "coverage_metrics"
-          # Creates the ClickHouse table, if it doesn't exist already
-          # @return [nil]
-          def create
-            logger.debug("#{LOG_PREFIX} Creating coverage_metrics table if it doesn't exist ...")
-            client.query(<<~SQL)
-              CREATE TABLE IF NOT EXISTS #{table_name} (
-                timestamp DateTime64(6, 'UTC'),
-                file String,
-                line_coverage Float64,
-                branch_coverage Nullable(Float64),
-                function_coverage Nullable(Float64),
-                source_file_type String,
-                category Nullable(String),
-                ci_project_id Nullable(UInt32),
-                ci_project_path Nullable(String),
-                ci_job_name Nullable(String),
-                ci_job_id Nullable(UInt64),
-                ci_pipeline_id Nullable(UInt64),
-                ci_merge_request_iid Nullable(UInt32),
-                ci_branch Nullable(String),
-                ci_target_branch Nullable(String)
-              ) ENGINE = MergeTree()
-              PARTITION BY toYYYYMM(timestamp)
-              ORDER BY (ci_project_path, timestamp, file, ci_pipeline_id)
-              SETTINGS index_granularity = 8192, allow_nullable_key = 1;
-            SQL
-            logger.info("#{LOG_PREFIX} Coverage metrics table created/verified successfully")
+          def initialize(category_owners_table: nil, **args)
+            super(**args)
+            @category_owners_table = category_owners_table
           end
           private
+          attr_reader :category_owners_table
           # @return [Boolean] True if the record is valid, false otherwise
           def valid_record?(record)
             valid_file?(record) &&
@@ -97,17 +73,48 @@ module GitlabQuality
               branch_coverage: record[:branch_coverage],
               function_coverage: record[:function_coverage],
               source_file_type: record[:source_file_type],
-              category: record[:category],
+              is_responsible: record[:is_responsible],
+              is_dependent: record[:is_dependent],
+              category: record[:feature_category],
+              **coverage_counts(record),
+              **org_data(record[:feature_category]),
               **ci_metadata
             }
           end
-          # @return [Time] Common timestamp for all coverage records
-          def time
-            @time ||= begin
-              ci_created_at = ENV.fetch('CI_PIPELINE_CREATED_AT', nil)
-              ci_created_at ? Time.strptime(ci_created_at, '%Y-%m-%dT%H:%M:%S%z') : Time.now.utc
-            end
+          # @return [Hash] Raw coverage counts from the record
+          def coverage_counts(record)
+            {
+              total_lines: record[:total_lines] || 0,
+              covered_lines: record[:covered_lines] || 0,
+              total_branches: record[:total_branches] || 0,
+              covered_branches: record[:covered_branches] || 0,
+              total_functions: record[:total_functions] || 0,
+              covered_functions: record[:covered_functions] || 0
+            }
+          end
+          # @param category [String, nil] Feature category name
+          # @return [Hash] Organization data (group, stage, section) for the category
+          def org_data(category)
+            return { group: '', stage: '', section: '' } if category.nil? || category_owners_table.nil?
+            @org_data_cache ||= {}
+            @org_data_cache[category] ||= fetch_org_data(category)
+          end
+          # @param category [String] Feature category name
+          # @return [Hash] Organization data fetched from category_owners_table
+          def fetch_org_data(category)
+            owners = category_owners_table.owners(category)
+            {
+              group: owners['group'] || '',
+              stage: owners['stage'] || '',
+              section: owners['section'] || ''
+            }
+          rescue CategoryOwnersTable::MissingMappingError
+            logger.warn("#{LOG_PREFIX} No org data found for category '#{category}', using empty values")
+            { group: '', stage: '', section: '' }
           end
           # @return [Hash] CI-related metadata

data/lib/gitlab_quality/test_tooling/code_coverage/click_house/table.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require 'time'
 module GitlabQuality
   module TestTooling
     module CodeCoverage
@@ -58,6 +60,21 @@ module GitlabQuality
             raise NotImplementedError, "#{self.class}##{__method__} method must be implemented in a subclass"
           end
+          # @return [Time] Common timestamp for all records, memoized
+          def time
+            @time ||= parse_ci_timestamp
+          end
+          def parse_ci_timestamp
+            ci_created_at = ENV.fetch('CI_PIPELINE_CREATED_AT', nil)
+            return Time.now.utc unless ci_created_at
+            Time.strptime(ci_created_at, '%Y-%m-%dT%H:%M:%S%z')
+          rescue ArgumentError
+            logger.warn("#{LOG_PREFIX} Invalid CI_PIPELINE_CREATED_AT format: #{ci_created_at}, using current time")
+            Time.now.utc
+          end
           # @return [GitlabQuality::TestTooling::ClickHouse::Client]
           def client
             @client ||= GitlabQuality::TestTooling::ClickHouse::Client.new(

data/lib/gitlab_quality/test_tooling/code_coverage/click_house/test_file_mappings_table.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+require_relative 'table'
+module GitlabQuality
+  module TestTooling
+    module CodeCoverage
+      module ClickHouse
+        class TestFileMappingsTable < GitlabQuality::TestTooling::CodeCoverage::ClickHouse::Table
+          TABLE_NAME = "test_file_mappings"
+          private
+          # @return [Boolean] True if the record is valid, false otherwise
+          def valid_record?(record)
+            valid_test_file?(record) && valid_source_file?(record)
+          end
+          # @return [Boolean] True if the test_file field is present
+          def valid_test_file?(record)
+            return true unless record[:test_file].blank?
+            logger.warn("#{LOG_PREFIX} Skipping record with nil/empty test_file: #{record}")
+            false
+          end
+          # @return [Boolean] True if the source_file field is present
+          def valid_source_file?(record)
+            return true unless record[:source_file].blank?
+            logger.warn("#{LOG_PREFIX} Skipping record with nil/empty source_file: #{record}")
+            false
+          end
+          # @return [Hash] Transformed mapping data including timestamp and CI metadata
+          def sanitized_data_record(record)
+            {
+              timestamp: time,
+              test_file: record[:test_file],
+              source_file: record[:source_file],
+              ci_project_path: ENV.fetch('CI_PROJECT_PATH', nil),
+              category: record[:category] || '',
+              group: record[:group] || '',
+              stage: record[:stage] || '',
+              section: record[:section] || ''
+            }
+          end
+        end
+      end
+    end
+  end
+end

data/lib/gitlab_quality/test_tooling/code_coverage/coverage_data.rb CHANGED Viewed

@@ -4,22 +4,31 @@ module GitlabQuality
   module TestTooling
     module CodeCoverage
       class CoverageData
+        RESPONSIBLE = 'responsible'
+        DEPENDENT = 'dependent'
         # @param [Hash<String, Hash>] code_coverage_by_source_file Source file
         #   mapped to test coverage data
         # @param [Hash<String, Array<String>>] source_file_to_tests Source files
         #   mapped to all test files testing them
-        # @param [Hash<String, Array<String>>] tests_to_categories Test files
+        # @param [Hash<String, Array<String>>] tests_to_feature_categories Test files
         #   mapped to all feature categories they belong to
-        # @param [Hash<String, Hash>] categories_to_teams Mapping of categories
+        # @param [Hash<String, Hash>] feature_categories_to_teams Mapping of feature categories
         #   to teams (i.e., groups, stages, sections)
         # @param [Hash<String, String>] source_file_types Mapping of source files
         #   to their types (frontend, backend, etc.)
-        def initialize(code_coverage_by_source_file, source_file_to_tests, tests_to_categories, categories_to_teams, source_file_types = {})
+        # @param [Hash<String, String>] test_classifications Mapping of test files
+        #   to their responsibility classification (responsible or dependent)
+        def initialize(
+          code_coverage_by_source_file, source_file_to_tests, tests_to_feature_categories,
+          feature_categories_to_teams, source_file_types = {}, test_classifications = {}
+        )
           @code_coverage_by_source_file = code_coverage_by_source_file
           @source_file_to_tests = source_file_to_tests
-          @tests_to_categories = tests_to_categories
-          @categories_to_teams = categories_to_teams
+          @tests_to_feature_categories = tests_to_feature_categories
+          @feature_categories_to_teams = feature_categories_to_teams
           @source_file_types = source_file_types
+          @test_classifications = test_classifications
         end
         # @return [Array<Hash<Symbol, String>>] Mapping of column name to row
@@ -32,7 +41,9 @@ module GitlabQuality
         #       branch_coverage: 95.0
         #       function_coverage: 100.0
         #       source_file_type: "backend"
-        #       category: "team_planning"
+        #       is_responsible: true
+        #       is_dependent: false
+        #       feature_category: "team_planning"
         #       group: "project_management"
         #       stage: "plan"
         #       section: "dev"
@@ -40,55 +51,87 @@ module GitlabQuality
         #     ...
         #   ]
         def as_db_table
-          all_files.flat_map do |file|
-            coverage_data = @code_coverage_by_source_file[file]
-            line_coverage = coverage_data&.dig(:percentage)
-            branch_coverage = coverage_data&.dig(:branch_percentage)
-            function_coverage = coverage_data&.dig(:function_percentage)
-            categories = categories_for(file)
-            base_data = {
-              file: file,
-              line_coverage: line_coverage,
-              branch_coverage: branch_coverage,
-              function_coverage: function_coverage,
-              source_file_type: @source_file_types[file] || 'other'
-            }
-            if categories.empty?
-              base_data.merge(no_owner_info)
-            else
-              categories.map do |category|
-                base_data.merge(owner_info(category))
-              end
+          all_files.flat_map { |file| records_for_file(file) }
+        end
+        private
+        def records_for_file(file)
+          base_data = base_data_for(file)
+          feature_categories_with_flags = feature_categories_with_responsibility_flags_for(file)
+          if feature_categories_with_flags.empty?
+            base_data.merge(no_owner_info).merge(is_responsible: nil, is_dependent: nil)
+          else
+            feature_categories_with_flags.map do |feature_category, flags|
+              base_data.merge(owner_info(feature_category)).merge(
+                is_responsible: flags[:is_responsible],
+                is_dependent: flags[:is_dependent]
+              )
             end
           end
         end
-        private
+        def base_data_for(file)
+          coverage_data = @code_coverage_by_source_file[file]
+          {
+            file: file,
+            line_coverage: coverage_data&.dig(:percentage),
+            branch_coverage: coverage_data&.dig(:branch_percentage),
+            function_coverage: coverage_data&.dig(:function_percentage),
+            source_file_type: @source_file_types[file] || 'other',
+            total_lines: coverage_data&.dig(:total_lines) || 0,
+            covered_lines: coverage_data&.dig(:covered_lines) || 0,
+            total_branches: coverage_data&.dig(:total_branches) || 0,
+            covered_branches: coverage_data&.dig(:covered_branches) || 0,
+            total_functions: coverage_data&.dig(:total_functions) || 0,
+            covered_functions: coverage_data&.dig(:covered_functions) || 0
+          }
+        end
         def no_owner_info
           {
-            category: nil,
+            feature_category: nil,
             group: nil,
             stage: nil,
             section: nil
           }
         end
-        def owner_info(category)
-          owner_info = @categories_to_teams[category]
+        def owner_info(feature_category)
+          owner_info = @feature_categories_to_teams[feature_category]
           {
-            category: category,
+            feature_category: feature_category,
             group: owner_info&.dig(:group),
             stage: owner_info&.dig(:stage),
             section: owner_info&.dig(:section)
           }
         end
-        def categories_for(file)
-          @source_file_to_tests[file]&.flat_map { |test_file| @tests_to_categories[test_file] || [] }&.uniq || []
+        # Returns a hash of feature_category => { is_responsible: bool, is_dependent: bool }
+        # for a given source file. A feature category can have both flags true if it has
+        # both unit tests (responsible) and integration/E2E tests (dependent).
+        def feature_categories_with_responsibility_flags_for(file)
+          test_files = @source_file_to_tests[file] || []
+          return {} if test_files.empty?
+          test_files.each_with_object({}) do |test_file, feature_category_to_flags|
+            feature_categories = @tests_to_feature_categories[test_file] || []
+            classification = @test_classifications[test_file]
+            feature_categories.each do |feature_category|
+              feature_category_to_flags[feature_category] ||= { is_responsible: false, is_dependent: false }
+              case classification
+              when RESPONSIBLE
+                feature_category_to_flags[feature_category][:is_responsible] = true
+              when DEPENDENT
+                feature_category_to_flags[feature_category][:is_dependent] = true
+              end
+            end
+          end
         end
         def all_files

data/lib/gitlab_quality/test_tooling/code_coverage/lcov_file.rb CHANGED Viewed

@@ -85,7 +85,7 @@ module GitlabQuality
         end
         def register_source_file(filename)
-          @current_file = filename.gsub(%r{^\./}, '')
+          @current_file = normalize_path(filename)
           @parsed_content[@current_file] = {
             line_coverage: {},
             branch_coverage: {},
@@ -94,6 +94,16 @@ module GitlabQuality
           }
         end
+        def normalize_path(filename)
+          # Remove leading ./ if present
+          path = filename.gsub(%r{^\./}, '')
+          # Handle GDK/CI paths like "../../../home/gdk/gitlab-development-kit/gitlab/app/..."
+          # Extract path starting from known root directories
+          match = path.match(%r{((?:ee/)?(?:app|lib|config|db|spec|scripts|tooling|workhorse|vendor)/.+)$})
+          match ? match[1] : path
+        end
         def register_line_data(line_no, count)
           return unless @current_file

data/lib/gitlab_quality/test_tooling/code_coverage/responsibility_classifier.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+module GitlabQuality
+  module TestTooling
+    module CodeCoverage
+      class ResponsibilityClassifier
+        RESPONSIBLE = 'responsible'
+        DEPENDENT = 'dependent'
+        # @param test_to_sources [Hash<String, Array<String>>] Test files mapped to source files they cover
+        # @param responsible_patterns [Array<Regexp>] Patterns for unit tests
+        # @param dependent_patterns [Array<Regexp>] Patterns for integration/E2E tests
+        def initialize(test_to_sources, responsible_patterns:, dependent_patterns:)
+          @test_to_sources = test_to_sources
+          @responsible_patterns = responsible_patterns
+          @dependent_patterns = dependent_patterns
+        end
+        # Classifies each test file as responsible or dependent
+        # @return [Hash<String, String>] Test file path => classification
+        def classify_tests
+          @test_to_sources.keys.each_with_object({}) do |test_file, result|
+            result[test_file] = classify_test(test_file)
+          end
+        end
+        private
+        # Classifies a test file as responsible (unit) or dependent (integration/E2E).
+        #
+        # Dependent patterns are checked first because it's the safer default:
+        # - is_responsible: true claims "this file has unit test coverage"
+        # - is_dependent: true claims "this file has integration test coverage"
+        #
+        # If uncertain (overlapping patterns or no match), we default to dependent
+        # to avoid incorrectly inflating unit test coverage metrics.
+        def classify_test(test_file)
+          return DEPENDENT if @dependent_patterns.any? { |p| test_file.match?(p) }
+          return RESPONSIBLE if @responsible_patterns.any? { |p| test_file.match?(p) }
+          # Default to dependent for unknown test types
+          DEPENDENT
+        end
+      end
+    end
+  end
+end

data/lib/gitlab_quality/test_tooling/code_coverage/responsibility_patterns_config.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+require 'yaml'
+module GitlabQuality
+  module TestTooling
+    module CodeCoverage
+      class ResponsibilityPatternsConfig
+        ConfigError = Class.new(StandardError)
+        attr_reader :responsible_patterns, :dependent_patterns
+        # @param file_path [String] Path to YAML config file
+        # @raise [ConfigError] if file cannot be loaded or parsed
+        def initialize(file_path)
+          @file_path = file_path
+          @config = load_config
+          @responsible_patterns = parse_patterns('responsible')
+          @dependent_patterns = parse_patterns('dependent')
+        end
+        private
+        def load_config
+          YAML.load_file(@file_path)
+        rescue Errno::ENOENT
+          raise ConfigError, "Config file not found: #{@file_path}"
+        rescue Psych::SyntaxError => e
+          raise ConfigError, "Invalid YAML syntax in #{@file_path}: #{e.message}"
+        end
+        def parse_patterns(key)
+          patterns = @config[key]
+          raise ConfigError, "Missing or invalid '#{key}' key in #{@file_path}. Expected an array of patterns." unless patterns.is_a?(Array)
+          patterns.map do |pattern|
+            Regexp.new(pattern)
+          rescue RegexpError => e
+            raise ConfigError, "Invalid regex pattern '#{pattern}' in #{@file_path}: #{e.message}"
+          end
+        end
+      end
+    end
+  end
+end