ruby-maat 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.commitlintrc.json +44 -0
  3. data/.mailmap +3 -0
  4. data/.overcommit.yml +77 -0
  5. data/.release-please-config.json +33 -0
  6. data/.release-please-manifest.json +3 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +48 -0
  9. data/CHANGELOG.md +46 -0
  10. data/CI_CD_SETUP.md +180 -0
  11. data/CLAUDE.md +130 -0
  12. data/Dockerfile +40 -0
  13. data/README.md +444 -0
  14. data/README_RUBY.md +300 -0
  15. data/RELEASE_PLEASE_SETUP.md +198 -0
  16. data/RUBY_MAAT.md +227 -0
  17. data/Rakefile +12 -0
  18. data/doc/imgs/abs_churn_sample.png +0 -0
  19. data/doc/imgs/code_age_sample.png +0 -0
  20. data/doc/imgs/coupling_sample.png +0 -0
  21. data/doc/imgs/crime_cover.jpg +0 -0
  22. data/doc/imgs/tree_map_sample.png +0 -0
  23. data/doc/intro.md +3 -0
  24. data/exe/ruby-maat +6 -0
  25. data/lib/ruby_maat/analysis/authors.rb +47 -0
  26. data/lib/ruby_maat/analysis/base_analysis.rb +70 -0
  27. data/lib/ruby_maat/analysis/churn.rb +255 -0
  28. data/lib/ruby_maat/analysis/code_age.rb +53 -0
  29. data/lib/ruby_maat/analysis/commit_messages.rb +58 -0
  30. data/lib/ruby_maat/analysis/communication.rb +56 -0
  31. data/lib/ruby_maat/analysis/effort.rb +150 -0
  32. data/lib/ruby_maat/analysis/entities.rb +40 -0
  33. data/lib/ruby_maat/analysis/identity.rb +12 -0
  34. data/lib/ruby_maat/analysis/logical_coupling.rb +134 -0
  35. data/lib/ruby_maat/analysis/sum_of_coupling.rb +43 -0
  36. data/lib/ruby_maat/analysis/summary.rb +43 -0
  37. data/lib/ruby_maat/app.rb +143 -0
  38. data/lib/ruby_maat/change_record.rb +47 -0
  39. data/lib/ruby_maat/cli.rb +187 -0
  40. data/lib/ruby_maat/dataset.rb +205 -0
  41. data/lib/ruby_maat/groupers/layer_grouper.rb +67 -0
  42. data/lib/ruby_maat/groupers/team_mapper.rb +51 -0
  43. data/lib/ruby_maat/groupers/time_grouper.rb +70 -0
  44. data/lib/ruby_maat/output/csv_output.rb +65 -0
  45. data/lib/ruby_maat/parsers/base_parser.rb +63 -0
  46. data/lib/ruby_maat/parsers/git2_parser.rb +72 -0
  47. data/lib/ruby_maat/parsers/git_parser.rb +66 -0
  48. data/lib/ruby_maat/parsers/mercurial_parser.rb +64 -0
  49. data/lib/ruby_maat/parsers/perforce_parser.rb +77 -0
  50. data/lib/ruby_maat/parsers/svn_parser.rb +76 -0
  51. data/lib/ruby_maat/parsers/tfs_parser.rb +103 -0
  52. data/lib/ruby_maat/version.rb +5 -0
  53. data/lib/ruby_maat.rb +44 -0
  54. metadata +143 -0
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
Binary file
Binary file
Binary file
Binary file
Binary file
data/doc/intro.md ADDED
@@ -0,0 +1,3 @@
1
+ # Introduction to code-maat
2
+
3
+ TODO: write [great documentation](http://jacobian.org/writing/great-documentation/what-to-write/)
data/exe/ruby-maat ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "../lib/ruby_maat"
5
+
6
+ RubyMaat::CLI.new.run(ARGV)
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Analysis
5
+ # Authors analysis - counts distinct authors per entity
6
+ # Research shows that the number of authors of a module is related to quality problems
7
+ class Authors < BaseAnalysis
8
+ def analyze(dataset, options = {})
9
+ min_revs = options[:min_revs] || 1
10
+
11
+ # Group by entity and count distinct authors and revisions manually
12
+ entity_stats = {}
13
+
14
+ dataset.to_df.to_a.each do |row|
15
+ entity = row["entity"]
16
+ author = row["author"]
17
+ revision = row["revision"]
18
+
19
+ entity_stats[entity] ||= {authors: Set.new, revisions: Set.new}
20
+ entity_stats[entity][:authors] << author
21
+ entity_stats[entity][:revisions] << revision
22
+ end
23
+
24
+ # Build results and apply minimum revisions filter
25
+ results = []
26
+ entity_stats.each do |entity, stats|
27
+ n_revs = stats[:revisions].size
28
+ next if n_revs < min_revs
29
+
30
+ results << {
31
+ entity: entity,
32
+ "n-authors": stats[:authors].size,
33
+ "n-revs": n_revs
34
+ }
35
+ end
36
+
37
+ # Sort by number of authors (descending), then by revisions (descending)
38
+ results.sort! do |a, b|
39
+ comparison = b[:"n-authors"] <=> a[:"n-authors"]
40
+ comparison.zero? ? b[:"n-revs"] <=> a[:"n-revs"] : comparison
41
+ end
42
+
43
+ to_csv_data(results, [:entity, :"n-authors", :"n-revs"])
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Analysis
5
+ # Base class for all analysis modules
6
+ class BaseAnalysis
7
+ def analyze(dataset, options = {})
8
+ raise NotImplementedError, "Subclasses must implement analyze method"
9
+ end
10
+
11
+ protected
12
+
13
+ # Filter dataset by minimum revisions threshold
14
+ def filter_by_min_revisions(dataset, min_revs)
15
+ return dataset if min_revs <= 1
16
+
17
+ dataset.filter_min_revisions(min_revs)
18
+ end
19
+
20
+ # Helper to convert analysis results to CSV-compatible format
21
+ def to_csv_data(results, columns)
22
+ if results.empty?
23
+ # Create empty dataframe with proper column structure
24
+ empty_data = {}
25
+ columns.each { |col| empty_data[col] = [] }
26
+ return Rover::DataFrame.new(empty_data)
27
+ end
28
+
29
+ if results.is_a?(Rover::DataFrame)
30
+ # Already a dataframe
31
+ results
32
+ elsif results.first.is_a?(Hash)
33
+ # Array of hashes
34
+ Rover::DataFrame.new(results)
35
+ else
36
+ # Custom data structure - convert to hash format
37
+ data = results.map { |item| format_row(item, columns) }
38
+ Rover::DataFrame.new(data)
39
+ end
40
+ end
41
+
42
+ def format_row(item, columns)
43
+ if item.respond_to?(:to_h)
44
+ item.to_h.slice(*columns)
45
+ else
46
+ # Assume item is an array matching column order
47
+ columns.zip(item).to_h
48
+ end
49
+ end
50
+
51
+ # Mathematical utilities
52
+ def safe_divide(numerator, denominator)
53
+ return 0 if denominator.nil? || denominator.zero?
54
+
55
+ (numerator.to_f / denominator).round(2)
56
+ end
57
+
58
+ def percentage(part, total)
59
+ (safe_divide(part, total) * 100).round(0)
60
+ end
61
+
62
+ # Calculate average of two numbers
63
+ def average(first_value, second_value)
64
+ return 0 if first_value.nil? || second_value.nil?
65
+
66
+ ((first_value + second_value) / 2.0).round(1)
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,255 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Analysis
5
+ module Churn
6
+ # Absolute churn analysis - code churn trends over time
7
+ class Absolute < BaseAnalysis
8
+ def analyze(dataset, _options = {})
9
+ # Group by date and sum churn metrics, count commits
10
+ results = {}
11
+
12
+ dataset.to_df.each_row do |row|
13
+ date = row["date"]
14
+ added = row["loc_added"] || 0
15
+ deleted = row["loc_deleted"] || 0
16
+ revision = row["revision"]
17
+
18
+ results[date] ||= {date: date, added: 0, deleted: 0, revisions: Set.new}
19
+ results[date][:added] += added
20
+ results[date][:deleted] += deleted
21
+ results[date][:revisions] << revision
22
+ end
23
+
24
+ # Convert to final format
25
+ final_results = results.values.map do |result|
26
+ {
27
+ date: result[:date],
28
+ added: result[:added],
29
+ deleted: result[:deleted],
30
+ commits: result[:revisions].size
31
+ }
32
+ end
33
+
34
+ # Sort by date
35
+ final_results.sort_by! { |r| r[:date] }
36
+
37
+ to_csv_data(final_results, %i[date added deleted commits])
38
+ end
39
+ end
40
+
41
+ # Author churn analysis - churn metrics per author
42
+ class ByAuthor < BaseAnalysis
43
+ def analyze(dataset, _options = {})
44
+ # Group by author and sum churn metrics, count commits
45
+ results = {}
46
+
47
+ dataset.to_df.each_row do |row|
48
+ author = row["author"]
49
+ added = row["loc_added"] || 0
50
+ deleted = row["loc_deleted"] || 0
51
+ revision = row["revision"]
52
+
53
+ results[author] ||= {author: author, added: 0, deleted: 0, revisions: Set.new}
54
+ results[author][:added] += added
55
+ results[author][:deleted] += deleted
56
+ results[author][:revisions] << revision
57
+ end
58
+
59
+ # Convert to final format
60
+ final_results = results.values.map do |result|
61
+ {
62
+ author: result[:author],
63
+ added: result[:added],
64
+ deleted: result[:deleted],
65
+ commits: result[:revisions].size
66
+ }
67
+ end
68
+
69
+ # Sort by total churn (added + deleted) descending, then by added lines descending, then by author
70
+ final_results.sort! do |a, b|
71
+ total_churn_b = b[:added] + b[:deleted]
72
+ total_churn_a = a[:added] + a[:deleted]
73
+ churn_comparison = total_churn_b <=> total_churn_a
74
+
75
+ if churn_comparison.zero?
76
+ added_comparison = b[:added] <=> a[:added]
77
+ added_comparison.zero? ? a[:author] <=> b[:author] : added_comparison
78
+ else
79
+ churn_comparison
80
+ end
81
+ end
82
+
83
+ to_csv_data(final_results, %i[author added deleted commits])
84
+ end
85
+ end
86
+
87
+ # Entity churn analysis - churn metrics per entity
88
+ class ByEntity < BaseAnalysis
89
+ def analyze(dataset, options = {})
90
+ min_revs = options[:min_revs] || 5
91
+
92
+ # Group by entity and sum churn metrics
93
+ results = {}
94
+
95
+ dataset.to_df.each_row do |row|
96
+ entity = row["entity"]
97
+ added = row["loc_added"] || 0
98
+ deleted = row["loc_deleted"] || 0
99
+ revision = row["revision"]
100
+
101
+ results[entity] ||= {entity: entity, added: 0, deleted: 0, revisions: Set.new}
102
+ results[entity][:added] += added
103
+ results[entity][:deleted] += deleted
104
+ results[entity][:revisions] << revision
105
+ end
106
+
107
+ # Filter by minimum revisions and format results
108
+ filtered_results = results.values.map do |result|
109
+ next if result[:revisions].size < min_revs
110
+
111
+ {
112
+ entity: result[:entity],
113
+ added: result[:added],
114
+ deleted: result[:deleted],
115
+ commits: result[:revisions].size
116
+ }
117
+ end.compact
118
+
119
+ # Sort by total churn descending
120
+ filtered_results.sort_by! { |r| -(r[:added] + r[:deleted]) }
121
+
122
+ to_csv_data(filtered_results, %i[entity added deleted commits])
123
+ end
124
+ end
125
+
126
+ # Ownership analysis - churn metrics per author per entity
127
+ class Ownership < BaseAnalysis
128
+ def analyze(dataset, _options = {})
129
+ # Group by entity and author
130
+ results = {}
131
+
132
+ dataset.to_df.each_row do |row|
133
+ entity = row["entity"]
134
+ author = row["author"]
135
+ added = row["loc_added"] || 0
136
+ deleted = row["loc_deleted"] || 0
137
+
138
+ key = [entity, author]
139
+ results[key] ||= {entity: entity, author: author, added: 0, deleted: 0}
140
+ results[key][:added] += added
141
+ results[key][:deleted] += deleted
142
+ end
143
+
144
+ # Sort by entity, then by total contribution descending
145
+ sorted_results = results.values.sort do |a, b|
146
+ entity_comparison = a[:entity] <=> b[:entity]
147
+ if entity_comparison.zero?
148
+ total_b = b[:added] + b[:deleted]
149
+ total_a = a[:added] + a[:deleted]
150
+ total_b <=> total_a
151
+ else
152
+ entity_comparison
153
+ end
154
+ end
155
+
156
+ to_csv_data(sorted_results, %i[entity author added deleted])
157
+ end
158
+ end
159
+
160
+ # Main developer analysis - primary contributor per entity (by lines)
161
+ class MainDeveloper < BaseAnalysis
162
+ def analyze(dataset, options = {})
163
+ min_revs = options[:min_revs] || 5
164
+
165
+ # Group contributions by entity and author
166
+ entity_contributions = {}
167
+ entity_totals = {}
168
+
169
+ dataset.to_df.each_row do |row|
170
+ entity = row["entity"]
171
+ author = row["author"]
172
+ added = row["loc_added"] || 0
173
+ row["loc_deleted"] || 0
174
+
175
+ entity_contributions[entity] ||= {}
176
+ entity_contributions[entity][author] ||= {added: 0, revisions: Set.new}
177
+ entity_contributions[entity][author][:added] += added
178
+ entity_contributions[entity][author][:revisions] << row["revision"]
179
+
180
+ entity_totals[entity] ||= 0
181
+ entity_totals[entity] += added
182
+ end
183
+
184
+ # Find main developer for each entity
185
+ results = []
186
+
187
+ entity_contributions.each do |entity, authors|
188
+ total_revisions = authors.values.map { |data| data[:revisions] }.reduce(Set.new, &:|).size
189
+ next if total_revisions < min_revs
190
+
191
+ # Find author with most added lines (tie-break by author name alphabetically)
192
+ main_author = authors.max_by { |author, data| [data[:added], author] }
193
+ next unless main_author
194
+
195
+ author_name, author_data = main_author
196
+ total_added = entity_totals[entity]
197
+ ownership = total_added.positive? ? (author_data[:added].to_f / total_added).round(2) : 0.0
198
+
199
+ results << {
200
+ entity: entity,
201
+ "main-dev": author_name,
202
+ added: author_data[:added],
203
+ "total-added": total_added,
204
+ ownership: ownership
205
+ }
206
+ end
207
+
208
+ # Sort by entity name
209
+ results.sort_by! { |r| r[:entity] }
210
+
211
+ to_csv_data(results, %i[entity main-dev added total-added ownership])
212
+ end
213
+ end
214
+
215
+ # Refactoring main developer - entities with frequent changes by main developer
216
+ class RefactoringMainDeveloper < BaseAnalysis
217
+ def analyze(dataset, options = {})
218
+ min_revs = options[:min_revs] || 5
219
+
220
+ # First find main developers
221
+ main_dev_analysis = MainDeveloper.new
222
+ main_devs_df = main_dev_analysis.analyze(dataset, options)
223
+
224
+ # Convert to hash for lookup
225
+ main_devs = {}
226
+ main_devs_df.each_row do |row|
227
+ main_devs[row[:entity]] = row[:main_dev]
228
+ end
229
+
230
+ # Count revisions by main developer per entity
231
+ results = []
232
+
233
+ main_devs.each do |entity, main_dev|
234
+ entity_data = dataset.to_df.filter { |row| row[:entity] == entity && row[:author] == main_dev }
235
+ main_dev_revisions = entity_data[:revision].uniq.size
236
+
237
+ next if main_dev_revisions < min_revs
238
+
239
+ results << {
240
+ entity: entity,
241
+ main_dev: main_dev,
242
+ added: main_dev_revisions, # Number of revisions by main dev
243
+ deleted: 0
244
+ }
245
+ end
246
+
247
+ # Sort by number of revisions descending
248
+ results.sort_by! { |r| -r[:added] }
249
+
250
+ to_csv_data(results, %i[entity main_dev added deleted])
251
+ end
252
+ end
253
+ end
254
+ end
255
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Analysis
5
+ # Code age analysis - measures how long since each entity was last modified
6
+ class CodeAge < BaseAnalysis
7
+ def analyze(dataset, options = {})
8
+ reference_date = options[:age_time_now] || Date.today
9
+
10
+ # Find the latest modification date for each entity
11
+ entity_latest_dates = {}
12
+
13
+ dataset.to_df.each_row do |row|
14
+ entity = row["entity"]
15
+ date = row["date"]
16
+
17
+ entity_latest_dates[entity] = date if entity_latest_dates[entity].nil? || date > entity_latest_dates[entity]
18
+ end
19
+
20
+ # Calculate age in months for each entity
21
+ results = entity_latest_dates.map do |entity, last_date|
22
+ months_old = calculate_months_between(last_date, reference_date)
23
+
24
+ {
25
+ entity: entity,
26
+ "age-months": months_old
27
+ }
28
+ end
29
+
30
+ # Sort by age descending (oldest first)
31
+ results.sort_by! { |r| -r[:"age-months"] }
32
+
33
+ to_csv_data(results, %i[entity age-months])
34
+ end
35
+
36
+ private
37
+
38
+ def calculate_months_between(start_date, end_date)
39
+ return 0 if start_date >= end_date
40
+
41
+ years = end_date.year - start_date.year
42
+ months = end_date.month - start_date.month
43
+
44
+ total_months = (years * 12) + months
45
+
46
+ # Adjust if the day hasn't been reached yet in the end month
47
+ total_months -= 1 if end_date.day < start_date.day
48
+
49
+ [total_months, 0].max
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Analysis
5
+ # Commit messages analysis - word frequency analysis of commit messages
6
+ class CommitMessages < BaseAnalysis
7
+ def analyze(dataset, options = {})
8
+ expression = options[:expression_to_match]
9
+
10
+ # Extract commit messages
11
+ messages = dataset.to_df[:message].compact
12
+
13
+ # Filter by regex if provided
14
+ if expression
15
+ regex = Regexp.new(expression, Regexp::IGNORECASE)
16
+ messages = messages.grep(regex)
17
+ end
18
+
19
+ # Tokenize and count words
20
+ word_frequencies = Hash.new(0)
21
+
22
+ messages.each do |message|
23
+ # Simple tokenization: split on whitespace and punctuation, convert to lowercase
24
+ words = message.downcase.split(/[^a-zA-Z0-9]+/).reject(&:empty?)
25
+
26
+ # Filter out common stop words and very short words
27
+ words = words.reject { |word| word.length < 3 || stop_words.include?(word) }
28
+
29
+ words.each { |word| word_frequencies[word] += 1 }
30
+ end
31
+
32
+ # Convert to results format
33
+ results = word_frequencies.map do |word, frequency|
34
+ {
35
+ word: word,
36
+ frequency: frequency
37
+ }
38
+ end
39
+
40
+ # Sort by frequency descending
41
+ results.sort_by! { |r| -r[:frequency] }
42
+
43
+ to_csv_data(results, %i[word frequency])
44
+ end
45
+
46
+ private
47
+
48
+ def stop_words
49
+ %w[
50
+ the and or but for with from that this will was are has have had been
51
+ can could would should may might must shall
52
+ not don't doesn't didn't won't wasn't weren't isn't aren't hasn't haven't
53
+ add fix update remove delete change modify refactor implement
54
+ ].to_set
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Analysis
5
+ # Communication analysis - identifies developer collaboration patterns
6
+ # Based on Conway's Law: organizations design systems that mirror their communication structure
7
+ class Communication < BaseAnalysis
8
+ def analyze(dataset, options = {})
9
+ min_revs = options[:min_revs] || 5
10
+ min_shared_revs = options[:min_shared_revs] || 5
11
+
12
+ # Group entities by author to find their work domains
13
+ author_entities = {}
14
+
15
+ dataset.to_df.each_row do |row|
16
+ author = row[:author]
17
+ entity = row[:entity]
18
+
19
+ author_entities[author] ||= Set.new
20
+ author_entities[author] << entity
21
+ end
22
+
23
+ # Find pairs of authors who work on shared entities
24
+ results = []
25
+ author_pairs = author_entities.keys.combination(2)
26
+
27
+ author_pairs.each do |author1, author2|
28
+ shared_entities = author_entities[author1] & author_entities[author2]
29
+ next if shared_entities.size < min_shared_revs
30
+
31
+ author1_entities = author_entities[author1].size
32
+ author2_entities = author_entities[author2].size
33
+
34
+ # Communication strength based on shared work
35
+ avg_entities = average(author1_entities, author2_entities)
36
+ next if avg_entities < min_revs
37
+
38
+ communication_strength = percentage(shared_entities.size, avg_entities)
39
+
40
+ results << {
41
+ author: author1,
42
+ peer: author2,
43
+ shared: shared_entities.size,
44
+ average: avg_entities.ceil,
45
+ strength: communication_strength
46
+ }
47
+ end
48
+
49
+ # Sort by communication strength descending
50
+ results.sort_by! { |r| -r[:strength] }
51
+
52
+ to_csv_data(results, %i[author peer shared average strength])
53
+ end
54
+ end
55
+ end
56
+ end