RubyGems - ruby-maat - Versions diffs - 1.0.0 - Mend

ruby-maat 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

checksums.yaml +7 -0
data/.commitlintrc.json +44 -0
data/.mailmap +3 -0
data/.overcommit.yml +77 -0
data/.release-please-config.json +33 -0
data/.release-please-manifest.json +3 -0
data/.rspec +3 -0
data/.rubocop.yml +48 -0
data/CHANGELOG.md +46 -0
data/CI_CD_SETUP.md +180 -0
data/CLAUDE.md +130 -0
data/Dockerfile +40 -0
data/README.md +444 -0
data/README_RUBY.md +300 -0
data/RELEASE_PLEASE_SETUP.md +198 -0
data/RUBY_MAAT.md +227 -0
data/Rakefile +12 -0
data/doc/imgs/abs_churn_sample.png +0 -0
data/doc/imgs/code_age_sample.png +0 -0
data/doc/imgs/coupling_sample.png +0 -0
data/doc/imgs/crime_cover.jpg +0 -0
data/doc/imgs/tree_map_sample.png +0 -0
data/doc/intro.md +3 -0
data/exe/ruby-maat +6 -0
data/lib/ruby_maat/analysis/authors.rb +47 -0
data/lib/ruby_maat/analysis/base_analysis.rb +70 -0
data/lib/ruby_maat/analysis/churn.rb +255 -0
data/lib/ruby_maat/analysis/code_age.rb +53 -0
data/lib/ruby_maat/analysis/commit_messages.rb +58 -0
data/lib/ruby_maat/analysis/communication.rb +56 -0
data/lib/ruby_maat/analysis/effort.rb +150 -0
data/lib/ruby_maat/analysis/entities.rb +40 -0
data/lib/ruby_maat/analysis/identity.rb +12 -0
data/lib/ruby_maat/analysis/logical_coupling.rb +134 -0
data/lib/ruby_maat/analysis/sum_of_coupling.rb +43 -0
data/lib/ruby_maat/analysis/summary.rb +43 -0
data/lib/ruby_maat/app.rb +143 -0
data/lib/ruby_maat/change_record.rb +47 -0
data/lib/ruby_maat/cli.rb +187 -0
data/lib/ruby_maat/dataset.rb +205 -0
data/lib/ruby_maat/groupers/layer_grouper.rb +67 -0
data/lib/ruby_maat/groupers/team_mapper.rb +51 -0
data/lib/ruby_maat/groupers/time_grouper.rb +70 -0
data/lib/ruby_maat/output/csv_output.rb +65 -0
data/lib/ruby_maat/parsers/base_parser.rb +63 -0
data/lib/ruby_maat/parsers/git2_parser.rb +72 -0
data/lib/ruby_maat/parsers/git_parser.rb +66 -0
data/lib/ruby_maat/parsers/mercurial_parser.rb +64 -0
data/lib/ruby_maat/parsers/perforce_parser.rb +77 -0
data/lib/ruby_maat/parsers/svn_parser.rb +76 -0
data/lib/ruby_maat/parsers/tfs_parser.rb +103 -0
data/lib/ruby_maat/version.rb +5 -0
data/lib/ruby_maat.rb +44 -0
metadata +143 -0

data/lib/ruby_maat/analysis/effort.rb ADDED Viewed

@@ -0,0 +1,150 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Analysis
+    module Effort
+      # Entity effort analysis - revisions per author per entity
+      class ByRevisions < BaseAnalysis
+        def analyze(dataset, _options = {})
+          # Group by entity and author, count revisions
+          results = {}
+          dataset.to_df.each_row do |row|
+            entity = row["entity"]
+            author = row["author"]
+            revision = row["revision"]
+            key = [entity, author]
+            results[key] ||= {
+              entity: entity,
+              author: author,
+              author_revs: Set.new,
+              total_revs: Set.new
+            }
+            results[key][:author_revs] << revision
+          end
+          # Calculate total revisions per entity
+          entity_totals = {}
+          dataset.to_df.each_row do |row|
+            entity = row["entity"]
+            revision = row["revision"]
+            entity_totals[entity] ||= Set.new
+            entity_totals[entity] << revision
+          end
+          # Format results
+          formatted_results = results.map do |(entity, author), data|
+            {
+              entity: entity,
+              author: author,
+              "author-revs": data[:author_revs].size,
+              "total-revs": entity_totals[entity].size
+            }
+          end
+          # Sort by entity, then by author revisions descending
+          formatted_results.sort! do |a, b|
+            entity_comparison = a[:entity] <=> b[:entity]
+            entity_comparison.zero? ? b[:"author-revs"] <=> a[:"author-revs"] : entity_comparison
+          end
+          to_csv_data(formatted_results, %i[entity author author-revs total-revs])
+        end
+      end
+      # Main developer by revisions - primary contributor per entity (by commit count)
+      class MainDeveloperByRevisions < BaseAnalysis
+        def analyze(dataset, options = {})
+          min_revs = options[:min_revs] || 5
+          # Group by entity and author, count revisions
+          entity_authors = {}
+          dataset.to_df.each_row do |row|
+            entity = row["entity"]
+            author = row["author"]
+            revision = row["revision"]
+            entity_authors[entity] ||= {}
+            entity_authors[entity][author] ||= Set.new
+            entity_authors[entity][author] << revision
+          end
+          # Find main developer for each entity
+          results = []
+          entity_authors.each do |entity, authors|
+            total_revisions = authors.values.map(&:size).sum
+            next if total_revisions < min_revs
+            # Find author with most revisions
+            main_author, revisions = authors.max_by { |_author, revs| revs.size }
+            total_revisions = authors.values.map(&:size).sum
+            results << {
+              entity: entity,
+              "main-dev": main_author,
+              added: revisions.size, # Number of revisions by main dev
+              "total-added": total_revisions,
+              ownership: total_revisions.positive? ? (revisions.size.to_f / total_revisions).round(2) : 0.0
+            }
+          end
+          # Sort by number of revisions descending
+          results.sort_by! { |r| -r[:added] }
+          to_csv_data(results, %i[entity main-dev added total-added ownership])
+        end
+      end
+      # Fragmentation analysis - measures ownership distribution (fractal value)
+      class Fragmentation < BaseAnalysis
+        def analyze(dataset, options = {})
+          min_revs = options[:min_revs] || 5
+          # Group by entity, count contributions per author
+          entity_contributions = {}
+          dataset.to_df.each_row do |row|
+            entity = row["entity"]
+            author = row["author"]
+            revision = row["revision"]
+            entity_contributions[entity] ||= {}
+            entity_contributions[entity][author] ||= Set.new
+            entity_contributions[entity][author] << revision
+          end
+          # Calculate fragmentation (fractal value) for each entity
+          results = []
+          entity_contributions.each do |entity, authors|
+            total_revisions = authors.values.map(&:size).sum
+            next if total_revisions < min_revs
+            # Calculate fractal value: 1 - sum(p_i^2) where p_i is proportion of each author
+            sum_of_squares = authors.values.map do |revisions|
+              proportion = revisions.size.to_f / total_revisions
+              proportion**2
+            end.sum
+            fractal_value = 1.0 - sum_of_squares
+            results << {
+              entity: entity,
+              fractal_value: fractal_value.round(3)
+            }
+          end
+          # Sort by fractal value descending (most fragmented first)
+          results.sort_by! { |r| -r[:fractal_value] }
+          to_csv_data(results, %i[entity fractal_value])
+        end
+      end
+    end
+  end
+end

data/lib/ruby_maat/analysis/entities.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Analysis
+    # Entities analysis - counts revisions per entity
+    class Entities < BaseAnalysis
+      def analyze(dataset, options = {})
+        min_revs = options[:min_revs] || 1
+        # Group by entity and count revisions manually
+        entity_stats = {}
+        dataset.to_df.to_a.each do |row|
+          entity = row["entity"]
+          revision = row["revision"]
+          entity_stats[entity] ||= Set.new
+          entity_stats[entity] << revision
+        end
+        # Build results and apply minimum revisions filter
+        results = []
+        entity_stats.each do |entity, revisions|
+          n_revs = revisions.size
+          next if n_revs < min_revs
+          results << {
+            entity: entity,
+            "n-revs": n_revs
+          }
+        end
+        # Sort by number of revisions (descending)
+        results.sort! { |a, b| b[:"n-revs"] <=> a[:"n-revs"] }
+        to_csv_data(results, [:entity, :"n-revs"])
+      end
+    end
+  end
+end

data/lib/ruby_maat/analysis/identity.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Analysis
+    # Identity analysis - debugging analysis that just returns raw data
+    class Identity < BaseAnalysis
+      def analyze(dataset, _options = {})
+        dataset.to_df
+      end
+    end
+  end
+end

data/lib/ruby_maat/analysis/logical_coupling.rb ADDED Viewed

@@ -0,0 +1,134 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Analysis
+    # Logical coupling analysis - finds modules that tend to change together
+    # This identifies hidden dependencies between code modules
+    class LogicalCoupling < BaseAnalysis
+      def analyze(dataset, options = {})
+        min_revs = options[:min_revs] || 1
+        min_shared_revs = options[:min_shared_revs] || 1
+        min_coupling = options[:min_coupling] || 1
+        max_coupling = options[:max_coupling] || 100
+        max_changeset_size = options[:max_changeset_size] || 30
+        verbose_results = options[:verbose_results] || false
+        # Get co-changing entities by revision
+        co_changing_entities = get_co_changing_entities(dataset, max_changeset_size)
+        # Calculate coupling frequencies
+        coupling_frequencies = calculate_coupling_frequencies(co_changing_entities)
+        # Calculate revision counts per entity
+        entity_revisions = calculate_entity_revisions(dataset)
+        # Generate coupling results
+        results = []
+        coupling_frequencies.each do |(entity1, entity2), shared_revs|
+          entity1_revs = entity_revisions[entity1] || 0
+          entity2_revs = entity_revisions[entity2] || 0
+          avg_revs = average(entity1_revs, entity2_revs)
+          coupling_degree = percentage(shared_revs, avg_revs)
+          # Apply thresholds
+          next unless avg_revs >= min_revs
+          next unless shared_revs >= min_shared_revs
+          next unless coupling_degree >= min_coupling
+          next unless coupling_degree <= max_coupling
+          result = {
+            entity: entity1,
+            coupled: entity2,
+            degree: coupling_degree,
+            "average-revs": avg_revs.ceil
+          }
+          if verbose_results
+            result.merge!(
+              "first-entity-revisions": entity1_revs,
+              "second-entity-revisions": entity2_revs,
+              "shared-revisions": shared_revs
+            )
+          end
+          results << result
+        end
+        # Sort by coupling degree (descending), then by average revisions (descending)
+        results.sort! do |a, b|
+          comparison = b[:degree] <=> a[:degree]
+          comparison.zero? ? b[:"average-revs"] <=> a[:"average-revs"] : comparison
+        end
+        columns = [:entity, :coupled, :degree, :"average-revs"]
+        columns += [:"first-entity-revisions", :"second-entity-revisions", :"shared-revisions"] if verbose_results
+        to_csv_data(results, columns)
+      end
+      private
+      def get_co_changing_entities(dataset, max_changeset_size)
+        # Group changes by revision to find entities that changed together
+        by_revision = {}
+        dataset.to_df.to_a.each do |row|
+          revision = row["revision"]
+          entity = row["entity"]
+          by_revision[revision] ||= []
+          by_revision[revision] << entity
+        end
+        # Convert to co-changing pairs, filtering by changeset size
+        co_changing = []
+        by_revision.each_value do |entities|
+          # Skip large changesets to avoid noise
+          next if entities.size > max_changeset_size
+          # Get unique entities (remove duplicates)
+          unique_entities = entities.uniq
+          # Generate all combinations of 2 entities
+          unique_entities.combination(2) do |entity1, entity2|
+            # Sort to ensure consistent ordering
+            pair = [entity1, entity2].sort
+            co_changing << pair
+          end
+        end
+        co_changing
+      end
+      def calculate_coupling_frequencies(co_changing_entities)
+        # Count how many times each pair changed together
+        frequencies = Hash.new(0)
+        co_changing_entities.each do |pair|
+          frequencies[pair] += 1
+        end
+        frequencies
+      end
+      def calculate_entity_revisions(dataset)
+        # Count unique revisions per entity from the dataset
+        entity_revisions = {}
+        dataset.to_df.to_a.each do |row|
+          entity = row["entity"]
+          revision = row["revision"]
+          entity_revisions[entity] ||= Set.new
+          entity_revisions[entity] << revision
+        end
+        # Convert to counts
+        entity_revisions.transform_values(&:size)
+      end
+    end
+  end
+end

data/lib/ruby_maat/analysis/sum_of_coupling.rb ADDED Viewed

@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Analysis
+    # Sum of coupling analysis - aggregated coupling metrics per entity
+    class SumOfCoupling < BaseAnalysis
+      def analyze(dataset, options = {})
+        # First run the logical coupling analysis to get coupling data
+        coupling_analysis = LogicalCoupling.new
+        coupling_results = coupling_analysis.analyze(dataset, options)
+        # If no coupling results, return empty
+        return to_csv_data([], %i[entity soc]) if coupling_results.empty?
+        # Aggregate coupling degrees per entity
+        entity_coupling_sums = Hash.new(0)
+        coupling_results.each_row do |row|
+          entity = row["entity"]
+          coupled = row["coupled"]
+          degree = row["degree"]
+          # Add coupling for both directions
+          entity_coupling_sums[entity] += degree
+          entity_coupling_sums[coupled] += degree
+        end
+        # Calculate sum of coupling for each entity
+        results = entity_coupling_sums.map do |entity, total_coupling|
+          {
+            entity: entity,
+            soc: total_coupling
+          }
+        end
+        # Sort by sum of coupling descending
+        results.sort_by! { |r| -r[:soc] }
+        to_csv_data(results, %i[entity soc])
+      end
+    end
+  end
+end

data/lib/ruby_maat/analysis/summary.rb ADDED Viewed

@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+module RubyMaat
+  module Analysis
+    # Summary analysis - provides high-level overview of repository statistics
+    class Summary < BaseAnalysis
+      def analyze(dataset, _options = {})
+        df = dataset.to_df
+        if df.empty?
+          results = [
+            {statistic: "number-of-commits", value: 0},
+            {statistic: "number-of-entities", value: 0},
+            {statistic: "number-of-entities-changed", value: 0},
+            {statistic: "number-of-authors", value: 0}
+          ]
+        else
+          # Collect data manually to avoid DataFrame API issues
+          revisions = []
+          entities = []
+          authors = []
+          total_changes = 0
+          df.each_row do |row|
+            revisions << row["revision"]
+            entities << row["entity"]
+            authors << row["author"]
+            total_changes += 1
+          end
+          results = [
+            {statistic: "number-of-commits", value: revisions.uniq.size},
+            {statistic: "number-of-entities", value: entities.uniq.size},
+            {statistic: "number-of-entities-changed", value: total_changes},
+            {statistic: "number-of-authors", value: authors.uniq.size}
+          ]
+        end
+        to_csv_data(results, %i[statistic value])
+      end
+    end
+  end
+end

data/lib/ruby_maat/app.rb ADDED Viewed

@@ -0,0 +1,143 @@
+# frozen_string_literal: true
+require "date"
+module RubyMaat
+  # Main application orchestration
+  # This is the Ruby equivalent of code-maat.app.app namespace
+  class App
+    SUPPORTED_VCS = %w[git git2 svn hg p4 tfs].freeze
+    SUPPORTED_ANALYSES = {
+      "authors" => RubyMaat::Analysis::Authors,
+      "revisions" => RubyMaat::Analysis::Entities,
+      "coupling" => RubyMaat::Analysis::LogicalCoupling,
+      "soc" => RubyMaat::Analysis::SumOfCoupling,
+      "summary" => RubyMaat::Analysis::Summary,
+      "identity" => RubyMaat::Analysis::Identity,
+      "abs-churn" => RubyMaat::Analysis::Churn::Absolute,
+      "author-churn" => RubyMaat::Analysis::Churn::ByAuthor,
+      "entity-churn" => RubyMaat::Analysis::Churn::ByEntity,
+      "entity-ownership" => RubyMaat::Analysis::Churn::Ownership,
+      "main-dev" => RubyMaat::Analysis::Churn::MainDeveloper,
+      "refactoring-main-dev" => RubyMaat::Analysis::Churn::RefactoringMainDeveloper,
+      "entity-effort" => RubyMaat::Analysis::Effort::ByRevisions,
+      "main-dev-by-revs" => RubyMaat::Analysis::Effort::MainDeveloperByRevisions,
+      "fragmentation" => RubyMaat::Analysis::Effort::Fragmentation,
+      "communication" => RubyMaat::Analysis::Communication,
+      "messages" => RubyMaat::Analysis::CommitMessages,
+      "age" => RubyMaat::Analysis::CodeAge
+    }.freeze
+    def self.analysis_names
+      SUPPORTED_ANALYSES.keys.sort.join(", ")
+    end
+    def initialize(options = {})
+      @options = options
+      validate_options!
+    end
+    def run
+      # Parse VCS log file
+      parser = create_parser
+      change_records = parser.parse
+      # Apply data transformations
+      change_records = apply_grouping(change_records)
+      change_records = apply_temporal_grouping(change_records)
+      change_records = apply_team_mapping(change_records)
+      # Convert to dataset
+      dataset = Dataset.from_changes(change_records)
+      # Run analysis
+      analysis = create_analysis
+      results = analysis.analyze(dataset, @options)
+      # Output results
+      output_handler = create_output_handler
+      output_handler.write(results)
+    rescue => e
+      handle_error(e)
+    end
+    private
+    def validate_options!
+      raise ArgumentError, "Log file is required" unless @options[:log]
+      raise ArgumentError, "Version control system is required" unless @options[:version_control]
+      unless SUPPORTED_VCS.include?(@options[:version_control])
+        raise ArgumentError, "Invalid VCS: #{@options[:version_control]}. Supported: #{SUPPORTED_VCS.join(", ")}"
+      end
+      return if SUPPORTED_ANALYSES.key?(@options[:analysis] || "authors")
+      raise ArgumentError, "Invalid analysis: #{@options[:analysis]}. Supported: #{self.class.analysis_names}"
+    end
+    def create_parser
+      case @options[:version_control]
+      when "git"
+        RubyMaat::Parsers::GitParser.new(@options[:log], @options)
+      when "git2"
+        RubyMaat::Parsers::Git2Parser.new(@options[:log], @options)
+      when "svn"
+        RubyMaat::Parsers::SvnParser.new(@options[:log], @options)
+      when "hg"
+        RubyMaat::Parsers::MercurialParser.new(@options[:log], @options)
+      when "p4"
+        RubyMaat::Parsers::PerforceParser.new(@options[:log], @options)
+      when "tfs"
+        RubyMaat::Parsers::TfsParser.new(@options[:log], @options)
+      end
+    end
+    def apply_grouping(change_records)
+      return change_records unless @options[:group]
+      grouper = RubyMaat::Groupers::LayerGrouper.new(@options[:group])
+      grouper.group(change_records)
+    end
+    def apply_temporal_grouping(change_records)
+      return change_records unless @options[:temporal_period]
+      grouper = RubyMaat::Groupers::TimeGrouper.new(@options[:temporal_period])
+      grouper.group(change_records)
+    end
+    def apply_team_mapping(change_records)
+      return change_records unless @options[:team_map_file]
+      mapper = RubyMaat::Groupers::TeamMapper.new(@options[:team_map_file])
+      mapper.map(change_records)
+    end
+    def create_analysis
+      analysis_name = @options[:analysis] || "authors"
+      analysis_class = SUPPORTED_ANALYSES[analysis_name]
+      analysis_class.new
+    end
+    def create_output_handler
+      if @options[:outfile]
+        RubyMaat::Output::CsvOutput.new(@options[:outfile], @options[:rows])
+      else
+        RubyMaat::Output::CsvOutput.new(nil, @options[:rows]) # stdout
+      end
+    end
+    def handle_error(error)
+      case error
+      when ArgumentError
+        warn "Error: #{error.message}"
+      else
+        warn "Internal error: #{error.message}"
+        warn error.backtrace.join("\n") if @options[:verbose]
+      end
+      exit 1
+    end
+  end
+end

data/lib/ruby_maat/change_record.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+module RubyMaat
+  # Represents a single change/modification record from VCS
+  # This is the fundamental data structure that flows through the entire pipeline
+  class ChangeRecord
+    attr_reader :entity, :author, :date, :revision, :message, :loc_added, :loc_deleted
+    def initialize(entity:, author:, date:, revision:, message: nil, loc_added: nil, loc_deleted: nil)
+      @entity = entity
+      @author = author
+      @date = date.is_a?(Date) ? date : Date.parse(date)
+      @revision = revision
+      @message = message
+      @loc_added = loc_added.to_i if loc_added && (!loc_added.is_a?(Float) || !loc_added.nan?)
+      @loc_deleted = loc_deleted.to_i if loc_deleted && (!loc_deleted.is_a?(Float) || !loc_deleted.nan?)
+    end
+    def to_h
+      {
+        entity: entity,
+        author: author,
+        date: date,
+        revision: revision,
+        message: message,
+        loc_added: loc_added,
+        loc_deleted: loc_deleted
+      }
+    end
+    def ==(other)
+      other.is_a?(ChangeRecord) &&
+        entity == other.entity &&
+        author == other.author &&
+        date == other.date &&
+        revision == other.revision
+    end
+    def hash
+      [entity, author, date, revision].hash
+    end
+    def eql?(other)
+      self == other
+    end
+  end
+end