RubyGems - ruby-maat - Versions diffs - 1.0.0 - Mend

ruby-maat 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

checksums.yaml +7 -0
data/.commitlintrc.json +44 -0
data/.mailmap +3 -0
data/.overcommit.yml +77 -0
data/.release-please-config.json +33 -0
data/.release-please-manifest.json +3 -0
data/.rspec +3 -0
data/.rubocop.yml +48 -0
data/CHANGELOG.md +46 -0
data/CI_CD_SETUP.md +180 -0
data/CLAUDE.md +130 -0
data/Dockerfile +40 -0
data/README.md +444 -0
data/README_RUBY.md +300 -0
data/RELEASE_PLEASE_SETUP.md +198 -0
data/RUBY_MAAT.md +227 -0
data/Rakefile +12 -0
data/doc/imgs/abs_churn_sample.png +0 -0
data/doc/imgs/code_age_sample.png +0 -0
data/doc/imgs/coupling_sample.png +0 -0
data/doc/imgs/crime_cover.jpg +0 -0
data/doc/imgs/tree_map_sample.png +0 -0
data/doc/intro.md +3 -0
data/exe/ruby-maat +6 -0
data/lib/ruby_maat/analysis/authors.rb +47 -0
data/lib/ruby_maat/analysis/base_analysis.rb +70 -0
data/lib/ruby_maat/analysis/churn.rb +255 -0
data/lib/ruby_maat/analysis/code_age.rb +53 -0
data/lib/ruby_maat/analysis/commit_messages.rb +58 -0
data/lib/ruby_maat/analysis/communication.rb +56 -0
data/lib/ruby_maat/analysis/effort.rb +150 -0
data/lib/ruby_maat/analysis/entities.rb +40 -0
data/lib/ruby_maat/analysis/identity.rb +12 -0
data/lib/ruby_maat/analysis/logical_coupling.rb +134 -0
data/lib/ruby_maat/analysis/sum_of_coupling.rb +43 -0
data/lib/ruby_maat/analysis/summary.rb +43 -0
data/lib/ruby_maat/app.rb +143 -0
data/lib/ruby_maat/change_record.rb +47 -0
data/lib/ruby_maat/cli.rb +187 -0
data/lib/ruby_maat/dataset.rb +205 -0
data/lib/ruby_maat/groupers/layer_grouper.rb +67 -0
data/lib/ruby_maat/groupers/team_mapper.rb +51 -0
data/lib/ruby_maat/groupers/time_grouper.rb +70 -0
data/lib/ruby_maat/output/csv_output.rb +65 -0
data/lib/ruby_maat/parsers/base_parser.rb +63 -0
data/lib/ruby_maat/parsers/git2_parser.rb +72 -0
data/lib/ruby_maat/parsers/git_parser.rb +66 -0
data/lib/ruby_maat/parsers/mercurial_parser.rb +64 -0
data/lib/ruby_maat/parsers/perforce_parser.rb +77 -0
data/lib/ruby_maat/parsers/svn_parser.rb +76 -0
data/lib/ruby_maat/parsers/tfs_parser.rb +103 -0
data/lib/ruby_maat/version.rb +5 -0
data/lib/ruby_maat.rb +44 -0
metadata +143 -0

data/lib/ruby_maat/cli.rb ADDED Viewed

@@ -0,0 +1,187 @@
+# frozen_string_literal: true
+require "optparse"
+require "date"
+module RubyMaat
+  # Command Line Interface - Ruby port of code-maat.cmd-line
+  class CLI
+    VERSION_INFO = "Ruby Maat version #{RubyMaat::VERSION} - A Ruby port of Code Maat".freeze
+    def initialize
+      @options = {}
+      @parser = build_option_parser
+    end
+    def run(args)
+      @parser.parse!(args)
+      if @options[:help]
+        puts usage
+        exit 0
+      end
+      validate_required_options!
+      app = App.new(@options)
+      app.run
+    rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
+      warn "Error: #{e.message}"
+      warn usage
+      exit 1
+    rescue ArgumentError => e
+      warn "Error: #{e.message}"
+      warn usage
+      exit 1
+    rescue => e
+      warn "Error: #{e.message}"
+      warn e.backtrace.join("\n") if @options[:verbose]
+      exit 1
+    end
+    private
+    def build_option_parser
+      OptionParser.new do |opts|
+        opts.banner = usage_banner
+        # Required options
+        opts.on("-l", "--log LOG", "Log file with input data") do |log|
+          @options[:log] = log
+        end
+        opts.on("-c", "--version-control VCS",
+          "Input vcs module type: supports svn, git, git2, hg, p4, or tfs") do |vcs|
+          @options[:version_control] = vcs
+        end
+        # Analysis selection
+        opts.on("-a", "--analysis ANALYSIS",
+          "The analysis to run (#{App.analysis_names})",
+          "(default: authors)") do |analysis|
+          @options[:analysis] = analysis
+        end
+        # Input/Output options
+        opts.on("--input-encoding ENCODING",
+          "Specify an encoding other than UTF-8 for the log file") do |encoding|
+          @options[:input_encoding] = encoding
+        end
+        opts.on("-r", "--rows ROWS", Integer, "Max rows in output") do |rows|
+          @options[:rows] = rows
+        end
+        opts.on("-o", "--outfile OUTFILE", "Write the result to the given file name") do |outfile|
+          @options[:outfile] = outfile
+        end
+        # Grouping and mapping options
+        opts.on("-g", "--group GROUP",
+          "A file with a pre-defined set of layers. Data will be aggregated according to the group of layers.") do |group|
+          @options[:group] = group
+        end
+        opts.on("-p", "--team-map-file TEAM_MAP_FILE",
+          "A CSV file with author,team that translates individuals into teams.") do |team_map|
+          @options[:team_map_file] = team_map
+        end
+        # Analysis threshold options
+        opts.on("-n", "--min-revs MIN_REVS", Integer,
+          "Minimum number of revisions to include an entity in the analysis (default: 5)") do |min_revs|
+          @options[:min_revs] = min_revs
+        end
+        opts.on("-m", "--min-shared-revs MIN_SHARED_REVS", Integer,
+          "Minimum number of shared revisions to include an entity in the analysis (default: 5)") do |min_shared|
+          @options[:min_shared_revs] = min_shared
+        end
+        opts.on("-i", "--min-coupling MIN_COUPLING", Integer,
+          "Minimum degree of coupling (in percentage) to consider (default: 30)") do |min_coupling|
+          @options[:min_coupling] = min_coupling
+        end
+        opts.on("-x", "--max-coupling MAX_COUPLING", Integer,
+          "Maximum degree of coupling (in percentage) to consider (default: 100)") do |max_coupling|
+          @options[:max_coupling] = max_coupling
+        end
+        opts.on("-s", "--max-changeset-size MAX_CHANGESET_SIZE", Integer,
+          "Maximum number of modules in a change set if it shall be included in a coupling analysis (default: 30)") do |max_size|
+          @options[:max_changeset_size] = max_size
+        end
+        # Analysis-specific options
+        opts.on("-e", "--expression-to-match MATCH_EXPRESSION",
+          "A regex to match against commit messages. Used with -messages analyses") do |expression|
+          @options[:expression_to_match] = expression
+        end
+        opts.on("-t", "--temporal-period TEMPORAL_PERIOD",
+          "Used for coupling analyses. Instructs Ruby Maat to consider all commits during the rolling temporal period as a single, logical commit set") do |period|
+          @options[:temporal_period] = period
+        end
+        opts.on("-d", "--age-time-now AGE_TIME_NOW",
+          "Specify a date as YYYY-MM-dd that counts as time zero when doing a code age analysis") do |date_str|
+          @options[:age_time_now] = Date.parse(date_str)
+        rescue Date::Error
+          raise ArgumentError, "Invalid date format for --age-time-now: #{date_str}. Use YYYY-MM-dd format."
+        end
+        opts.on("--verbose-results",
+          "Includes additional analysis details together with the results. Only implemented for change coupling.") do
+          @options[:verbose_results] = true
+        end
+        # Help and version
+        opts.on("-h", "--help", "Show this help message") do
+          @options[:help] = true
+        end
+        opts.on("--version", "Show version information") do
+          puts VERSION_INFO
+          exit 0
+        end
+        opts.on("--verbose", "Enable verbose error output") do
+          @options[:verbose] = true
+        end
+      end
+    end
+    def usage_banner
+      <<~BANNER
+        #{VERSION_INFO}
+        This is Ruby Maat, a Ruby port of Code Maat - a program used to collect statistics from a VCS.
+        Usage: ruby-maat -l log-file -c vcs-type [options]
+        Options:
+      BANNER
+    end
+    def usage
+      @parser.help
+    end
+    def validate_required_options!
+      missing = []
+      missing << "log file (-l/--log)" unless @options[:log]
+      missing << "version control system (-c/--version-control)" unless @options[:version_control]
+      raise ArgumentError, "Missing required options: #{missing.join(", ")}" unless missing.empty?
+      # Set defaults
+      @options[:analysis] ||= "authors"
+      @options[:min_revs] ||= 5
+      @options[:min_shared_revs] ||= 5
+      @options[:min_coupling] ||= 30
+      @options[:max_coupling] ||= 100
+      @options[:max_changeset_size] ||= 30
+    end
+  end
+end

data/lib/ruby_maat/dataset.rb ADDED Viewed

@@ -0,0 +1,205 @@
+# frozen_string_literal: true
+require "rover"
+module RubyMaat
+  # Wrapper around Rover DataFrame to provide domain-specific operations
+  # This replaces Incanter datasets from the Clojure version
+  class Dataset
+    def initialize(change_records = [])
+      @data = build_dataframe(change_records)
+    end
+    def self.from_changes(change_records)
+      new(change_records)
+    end
+    def to_df
+      @data
+    end
+    # Group by entity and count distinct authors
+    def group_by_entity_count_authors
+      @data.group(:entity).count(:author, name: "n_authors")
+    end
+    # Group by entity and count revisions
+    def group_by_entity_count_revisions
+      @data.group(:entity).count(:revision, name: "n_revs")
+    end
+    # Group by author and sum churn metrics
+    def group_by_author_sum_churn
+      @data.group(:author).sum(%i[loc_added loc_deleted])
+    end
+    # Group by entity and sum churn metrics
+    def group_by_entity_sum_churn
+      @data.group(:entity).sum(%i[loc_added loc_deleted])
+    end
+    # Get all entities (files)
+    def entities
+      return [] if @data.empty?
+      @data[:entity].uniq
+    end
+    # Get all authors
+    def authors
+      return [] if @data.empty?
+      @data[:author].uniq
+    end
+    # Filter by minimum revisions
+    def filter_min_revisions(min_revs)
+      # Group by entity and count revisions
+      entity_revision_counts = {}
+      @data.to_a.each do |row|
+        entity = row["entity"]
+        revision = row["revision"]
+        entity_revision_counts[entity] ||= Set.new
+        entity_revision_counts[entity] << revision
+      end
+      # Find entities with enough revisions
+      entities_to_keep = entity_revision_counts.select { |_, revisions| revisions.size >= min_revs }.keys
+      # Filter data to only include those entities
+      filtered_records = []
+      @data.to_a.each do |row|
+        filtered_records << row if entities_to_keep.include?(row["entity"])
+      end
+      # Build new dataset from filtered records
+      change_records = filtered_records.map do |record|
+        ChangeRecord.new(
+          entity: record["entity"],
+          author: record["author"],
+          date: record["date"],
+          revision: record["revision"],
+          message: record["message"],
+          loc_added: record["loc_added"],
+          loc_deleted: record["loc_deleted"]
+        )
+      end
+      Dataset.from_changes(change_records)
+    end
+    # Get coupling pairs (combinations of entities that changed together)
+    def coupling_pairs
+      # Group by revision to find entities that changed together
+      revision_entities = {}
+      @data.to_a.each do |row|
+        revision = row["revision"]
+        entity = row["entity"]
+        revision_entities[revision] ||= []
+        revision_entities[revision] << entity unless revision_entities[revision].include?(entity)
+      end
+      pairs = []
+      revision_entities.each_value do |entities|
+        entities.combination(2) do |entity1, entity2|
+          pairs << [entity1, entity2]
+        end
+      end
+      pairs
+    end
+    # Count shared revisions between entity pairs
+    def shared_revisions_count(entity1, entity2)
+      entity1_revs = Set.new
+      entity2_revs = Set.new
+      @data.to_a.each do |row|
+        if row["entity"] == entity1
+          entity1_revs << row["revision"]
+        elsif row["entity"] == entity2
+          entity2_revs << row["revision"]
+        end
+      end
+      (entity1_revs & entity2_revs).size
+    end
+    # Get revision count for an entity
+    def revision_count(entity)
+      revisions = Set.new
+      @data.to_a.each do |row|
+        revisions << row["revision"] if row["entity"] == entity
+      end
+      revisions.size
+    end
+    # Get unique dates
+    def unique_dates
+      @data[:date].uniq.sort
+    end
+    # Filter by date range
+    def filter_date_range(start_date, end_date)
+      filtered_records = []
+      @data.each_row do |row|
+        next unless row[:date].between?(start_date, end_date)
+        filtered_records << ChangeRecord.new(
+          entity: row[:entity],
+          author: row[:author],
+          date: row[:date],
+          revision: row[:revision],
+          message: row[:message],
+          loc_added: row[:loc_added],
+          loc_deleted: row[:loc_deleted]
+        )
+      end
+      Dataset.from_changes(filtered_records)
+    end
+    # Get latest date for each entity (for age analysis)
+    def latest_date_by_entity
+      @data.group(:entity).max(:date)
+    end
+    def size
+      @data.count
+    end
+    def empty?
+      @data.empty?
+    end
+    private
+    def build_dataframe(change_records)
+      return Rover::DataFrame.new if change_records.empty?
+      data_hash = {
+        "entity" => [],
+        "author" => [],
+        "date" => [],
+        "revision" => [],
+        "message" => [],
+        "loc_added" => [],
+        "loc_deleted" => []
+      }
+      change_records.each do |record|
+        data_hash["entity"] << record.entity
+        data_hash["author"] << record.author
+        data_hash["date"] << record.date
+        data_hash["revision"] << record.revision
+        data_hash["message"] << record.message
+        data_hash["loc_added"] << record.loc_added
+        data_hash["loc_deleted"] << record.loc_deleted
+      end
+      Rover::DataFrame.new(data_hash)
+    end
+  end
+end

data/lib/ruby_maat/groupers/layer_grouper.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Groupers
+    # Layer grouper - maps individual files to architectural layers using regex patterns
+    class LayerGrouper
+      def initialize(grouping_file)
+        @grouping_file = grouping_file
+        @patterns = load_grouping_patterns
+      end
+      def group(change_records)
+        change_records.map do |record|
+          new_entity = map_entity_to_layer(record.entity)
+          # Create new record with mapped entity name
+          ChangeRecord.new(
+            entity: new_entity,
+            author: record.author,
+            date: record.date,
+            revision: record.revision,
+            message: record.message,
+            loc_added: record.loc_added,
+            loc_deleted: record.loc_deleted
+          )
+        end
+      end
+      private
+      def load_grouping_patterns
+        patterns = []
+        File.foreach(@grouping_file) do |line|
+          line = line.strip
+          next if line.empty? || line.start_with?("#")
+          if line.include?("=>")
+            pattern_str, layer_name = line.split("=>", 2)
+            pattern_str = pattern_str.strip
+            layer_name = layer_name.strip
+            begin
+              regex = Regexp.new(pattern_str)
+              patterns << {regex: regex, layer: layer_name}
+            rescue RegexpError => e
+              warn "Invalid regex pattern '#{pattern_str}': #{e.message}"
+            end
+          end
+        end
+        patterns
+      rescue => e
+        raise ArgumentError, "Failed to load grouping file #{@grouping_file}: #{e.message}"
+      end
+      def map_entity_to_layer(entity)
+        @patterns.each do |pattern_info|
+          return pattern_info[:layer] if entity.match?(pattern_info[:regex])
+        end
+        # If no pattern matches, return the original entity name
+        entity
+      end
+    end
+  end
+end

data/lib/ruby_maat/groupers/team_mapper.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+require "csv"
+module RubyMaat
+  module Groupers
+    # Team mapper - maps individual authors to teams
+    class TeamMapper
+      def initialize(team_map_file)
+        @team_map_file = team_map_file
+        @author_to_team = load_team_mapping
+      end
+      def map(change_records)
+        change_records.map do |record|
+          team_name = @author_to_team[record.author] || record.author
+          # Create new record with team name instead of individual author
+          ChangeRecord.new(
+            entity: record.entity,
+            author: team_name,
+            date: record.date,
+            revision: record.revision,
+            message: record.message,
+            loc_added: record.loc_added,
+            loc_deleted: record.loc_deleted
+          )
+        end
+      end
+      private
+      def load_team_mapping
+        mapping = {}
+        CSV.foreach(@team_map_file, headers: true) do |row|
+          author = row["author"] || row[0]
+          team = row["team"] || row[1]
+          next unless author && team
+          mapping[author.strip] = team.strip
+        end
+        mapping
+      rescue => e
+        raise ArgumentError, "Failed to load team mapping file #{@team_map_file}: #{e.message}"
+      end
+    end
+  end
+end

data/lib/ruby_maat/groupers/time_grouper.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Groupers
+    # Time grouper - aggregates commits within temporal periods
+    class TimeGrouper
+      def initialize(temporal_period)
+        @temporal_period = temporal_period
+      end
+      def group(change_records)
+        # For now, implement daily aggregation (group commits by day)
+        # The temporal_period parameter could be extended for other periods
+        grouped_by_date_and_entity = {}
+        change_records.each do |record|
+          date = record.date
+          entity = record.entity
+          key = [date, entity]
+          grouped_by_date_and_entity[key] ||= []
+          grouped_by_date_and_entity[key] << record
+        end
+        # Create aggregated records for each group
+        aggregated_records = []
+        grouped_by_date_and_entity.each do |(date, entity), records|
+          # Aggregate the records for this date/entity combination
+          aggregated_record = aggregate_records(records, date, entity)
+          aggregated_records << aggregated_record
+        end
+        aggregated_records
+      end
+      private
+      def aggregate_records(records, date, entity)
+        # Use the first record as the base
+        first_record = records.first
+        # Aggregate numeric values
+        total_added = records.sum { |r| r.loc_added || 0 }
+        total_deleted = records.sum { |r| r.loc_deleted || 0 }
+        # Combine commit messages
+        messages = records.filter_map(&:message).uniq
+        combined_message = messages.join("; ")
+        # Use first revision as representative (could be improved)
+        revision = first_record.revision
+        # Use first author (could be improved to handle multiple authors)
+        author = first_record.author
+        ChangeRecord.new(
+          entity: entity,
+          author: author,
+          date: date,
+          revision: revision,
+          message: combined_message,
+          loc_added: total_added,
+          loc_deleted: total_deleted
+        )
+      end
+    end
+  end
+end

data/lib/ruby_maat/output/csv_output.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+require "csv"
+module RubyMaat
+  module Output
+    # CSV output handler - formats and writes analysis results as CSV
+    class CsvOutput
+      def initialize(output_file = nil, max_rows = nil)
+        @output_file = output_file
+        @max_rows = max_rows
+      end
+      def write(dataframe)
+        # Convert dataframe to CSV
+        output_stream = @output_file ? File.open(@output_file, "w") : $stdout
+        begin
+          write_csv(dataframe, output_stream)
+        ensure
+          output_stream.close if @output_file
+        end
+      end
+      private
+      def write_csv(dataframe, stream)
+        # Write CSV
+        csv = CSV.new(stream)
+        # Get column names (even empty dataframes should have column structure)
+        columns = dataframe.keys
+        # Write header
+        csv << columns
+        # Write data rows (skip if empty)
+        return if dataframe.empty?
+        row_count = 0
+        dataframe.each_row do |row|
+          break if @max_rows && row_count >= @max_rows
+          csv_row = columns.map { |col| format_value(row[col]) }
+          csv << csv_row
+          row_count += 1
+        end
+      end
+      def format_value(value)
+        case value
+        when Date
+          value.strftime("%Y-%m-%d")
+        when Float
+          # Round floats to reasonable precision
+          value.round(3)
+        when NilClass
+          ""
+        else
+          value.to_s
+        end
+      end
+    end
+  end
+end

data/lib/ruby_maat/parsers/base_parser.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Parsers
+    # Base class for all VCS parsers
+    class BaseParser
+      def initialize(log_file, options = {})
+        @log_file = log_file
+        @options = options
+        @encoding = options[:input_encoding] || "UTF-8"
+      end
+      def parse
+        validate_file_exists!
+        content = read_log_file
+        parse_content(content)
+      rescue => e
+        handle_parse_error(e)
+      end
+      protected
+      def read_log_file
+        File.read(@log_file, encoding: @encoding)
+      rescue Encoding::InvalidByteSequenceError
+        raise ArgumentError, "Invalid encoding for log file. Try specifying --input-encoding"
+      end
+      def validate_file_exists!
+        return if File.exist?(@log_file)
+        raise ArgumentError, "Log file not found: #{@log_file}"
+      end
+      def parse_content(content)
+        raise NotImplementedError, "Subclasses must implement parse_content"
+      end
+      def handle_parse_error(error)
+        case error
+        when ArgumentError
+          raise error
+        else
+          vcs_name = self.class.name.split("::").last.gsub("Parser", "")
+          raise ArgumentError, "#{vcs_name}: Failed to parse the given file - is it a valid logfile? (#{error.message})"
+        end
+      end
+      def parse_date(date_str)
+        Date.parse(date_str)
+      rescue Date::Error
+        raise ArgumentError, "Invalid date format: #{date_str}"
+      end
+      # Helper to clean up binary file indicators and handle edge cases
+      def clean_numstat(value)
+        return nil if value.nil? || value.empty? || value == "-"
+        value.to_i
+      end
+    end
+  end
+end