ruby-maat 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.commitlintrc.json +44 -0
- data/.mailmap +3 -0
- data/.overcommit.yml +77 -0
- data/.release-please-config.json +33 -0
- data/.release-please-manifest.json +3 -0
- data/.rspec +3 -0
- data/.rubocop.yml +48 -0
- data/CHANGELOG.md +46 -0
- data/CI_CD_SETUP.md +180 -0
- data/CLAUDE.md +130 -0
- data/Dockerfile +40 -0
- data/README.md +444 -0
- data/README_RUBY.md +300 -0
- data/RELEASE_PLEASE_SETUP.md +198 -0
- data/RUBY_MAAT.md +227 -0
- data/Rakefile +12 -0
- data/doc/imgs/abs_churn_sample.png +0 -0
- data/doc/imgs/code_age_sample.png +0 -0
- data/doc/imgs/coupling_sample.png +0 -0
- data/doc/imgs/crime_cover.jpg +0 -0
- data/doc/imgs/tree_map_sample.png +0 -0
- data/doc/intro.md +3 -0
- data/exe/ruby-maat +6 -0
- data/lib/ruby_maat/analysis/authors.rb +47 -0
- data/lib/ruby_maat/analysis/base_analysis.rb +70 -0
- data/lib/ruby_maat/analysis/churn.rb +255 -0
- data/lib/ruby_maat/analysis/code_age.rb +53 -0
- data/lib/ruby_maat/analysis/commit_messages.rb +58 -0
- data/lib/ruby_maat/analysis/communication.rb +56 -0
- data/lib/ruby_maat/analysis/effort.rb +150 -0
- data/lib/ruby_maat/analysis/entities.rb +40 -0
- data/lib/ruby_maat/analysis/identity.rb +12 -0
- data/lib/ruby_maat/analysis/logical_coupling.rb +134 -0
- data/lib/ruby_maat/analysis/sum_of_coupling.rb +43 -0
- data/lib/ruby_maat/analysis/summary.rb +43 -0
- data/lib/ruby_maat/app.rb +143 -0
- data/lib/ruby_maat/change_record.rb +47 -0
- data/lib/ruby_maat/cli.rb +187 -0
- data/lib/ruby_maat/dataset.rb +205 -0
- data/lib/ruby_maat/groupers/layer_grouper.rb +67 -0
- data/lib/ruby_maat/groupers/team_mapper.rb +51 -0
- data/lib/ruby_maat/groupers/time_grouper.rb +70 -0
- data/lib/ruby_maat/output/csv_output.rb +65 -0
- data/lib/ruby_maat/parsers/base_parser.rb +63 -0
- data/lib/ruby_maat/parsers/git2_parser.rb +72 -0
- data/lib/ruby_maat/parsers/git_parser.rb +66 -0
- data/lib/ruby_maat/parsers/mercurial_parser.rb +64 -0
- data/lib/ruby_maat/parsers/perforce_parser.rb +77 -0
- data/lib/ruby_maat/parsers/svn_parser.rb +76 -0
- data/lib/ruby_maat/parsers/tfs_parser.rb +103 -0
- data/lib/ruby_maat/version.rb +5 -0
- data/lib/ruby_maat.rb +44 -0
- metadata +143 -0
data/Rakefile
ADDED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/doc/intro.md
ADDED
data/exe/ruby-maat
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyMaat
|
4
|
+
module Analysis
|
5
|
+
# Authors analysis - counts distinct authors per entity
|
6
|
+
# Research shows that the number of authors of a module is related to quality problems
|
7
|
+
class Authors < BaseAnalysis
|
8
|
+
def analyze(dataset, options = {})
|
9
|
+
min_revs = options[:min_revs] || 1
|
10
|
+
|
11
|
+
# Group by entity and count distinct authors and revisions manually
|
12
|
+
entity_stats = {}
|
13
|
+
|
14
|
+
dataset.to_df.to_a.each do |row|
|
15
|
+
entity = row["entity"]
|
16
|
+
author = row["author"]
|
17
|
+
revision = row["revision"]
|
18
|
+
|
19
|
+
entity_stats[entity] ||= {authors: Set.new, revisions: Set.new}
|
20
|
+
entity_stats[entity][:authors] << author
|
21
|
+
entity_stats[entity][:revisions] << revision
|
22
|
+
end
|
23
|
+
|
24
|
+
# Build results and apply minimum revisions filter
|
25
|
+
results = []
|
26
|
+
entity_stats.each do |entity, stats|
|
27
|
+
n_revs = stats[:revisions].size
|
28
|
+
next if n_revs < min_revs
|
29
|
+
|
30
|
+
results << {
|
31
|
+
entity: entity,
|
32
|
+
"n-authors": stats[:authors].size,
|
33
|
+
"n-revs": n_revs
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Sort by number of authors (descending), then by revisions (descending)
|
38
|
+
results.sort! do |a, b|
|
39
|
+
comparison = b[:"n-authors"] <=> a[:"n-authors"]
|
40
|
+
comparison.zero? ? b[:"n-revs"] <=> a[:"n-revs"] : comparison
|
41
|
+
end
|
42
|
+
|
43
|
+
to_csv_data(results, [:entity, :"n-authors", :"n-revs"])
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyMaat
|
4
|
+
module Analysis
|
5
|
+
# Base class for all analysis modules
|
6
|
+
class BaseAnalysis
|
7
|
+
def analyze(dataset, options = {})
|
8
|
+
raise NotImplementedError, "Subclasses must implement analyze method"
|
9
|
+
end
|
10
|
+
|
11
|
+
protected
|
12
|
+
|
13
|
+
# Filter dataset by minimum revisions threshold
|
14
|
+
def filter_by_min_revisions(dataset, min_revs)
|
15
|
+
return dataset if min_revs <= 1
|
16
|
+
|
17
|
+
dataset.filter_min_revisions(min_revs)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Helper to convert analysis results to CSV-compatible format
|
21
|
+
def to_csv_data(results, columns)
|
22
|
+
if results.empty?
|
23
|
+
# Create empty dataframe with proper column structure
|
24
|
+
empty_data = {}
|
25
|
+
columns.each { |col| empty_data[col] = [] }
|
26
|
+
return Rover::DataFrame.new(empty_data)
|
27
|
+
end
|
28
|
+
|
29
|
+
if results.is_a?(Rover::DataFrame)
|
30
|
+
# Already a dataframe
|
31
|
+
results
|
32
|
+
elsif results.first.is_a?(Hash)
|
33
|
+
# Array of hashes
|
34
|
+
Rover::DataFrame.new(results)
|
35
|
+
else
|
36
|
+
# Custom data structure - convert to hash format
|
37
|
+
data = results.map { |item| format_row(item, columns) }
|
38
|
+
Rover::DataFrame.new(data)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def format_row(item, columns)
|
43
|
+
if item.respond_to?(:to_h)
|
44
|
+
item.to_h.slice(*columns)
|
45
|
+
else
|
46
|
+
# Assume item is an array matching column order
|
47
|
+
columns.zip(item).to_h
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Mathematical utilities
|
52
|
+
def safe_divide(numerator, denominator)
|
53
|
+
return 0 if denominator.nil? || denominator.zero?
|
54
|
+
|
55
|
+
(numerator.to_f / denominator).round(2)
|
56
|
+
end
|
57
|
+
|
58
|
+
def percentage(part, total)
|
59
|
+
(safe_divide(part, total) * 100).round(0)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Calculate average of two numbers
|
63
|
+
def average(first_value, second_value)
|
64
|
+
return 0 if first_value.nil? || second_value.nil?
|
65
|
+
|
66
|
+
((first_value + second_value) / 2.0).round(1)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,255 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyMaat
|
4
|
+
module Analysis
|
5
|
+
module Churn
|
6
|
+
# Absolute churn analysis - code churn trends over time
|
7
|
+
class Absolute < BaseAnalysis
|
8
|
+
def analyze(dataset, _options = {})
|
9
|
+
# Group by date and sum churn metrics, count commits
|
10
|
+
results = {}
|
11
|
+
|
12
|
+
dataset.to_df.each_row do |row|
|
13
|
+
date = row["date"]
|
14
|
+
added = row["loc_added"] || 0
|
15
|
+
deleted = row["loc_deleted"] || 0
|
16
|
+
revision = row["revision"]
|
17
|
+
|
18
|
+
results[date] ||= {date: date, added: 0, deleted: 0, revisions: Set.new}
|
19
|
+
results[date][:added] += added
|
20
|
+
results[date][:deleted] += deleted
|
21
|
+
results[date][:revisions] << revision
|
22
|
+
end
|
23
|
+
|
24
|
+
# Convert to final format
|
25
|
+
final_results = results.values.map do |result|
|
26
|
+
{
|
27
|
+
date: result[:date],
|
28
|
+
added: result[:added],
|
29
|
+
deleted: result[:deleted],
|
30
|
+
commits: result[:revisions].size
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
# Sort by date
|
35
|
+
final_results.sort_by! { |r| r[:date] }
|
36
|
+
|
37
|
+
to_csv_data(final_results, %i[date added deleted commits])
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Author churn analysis - churn metrics per author
|
42
|
+
class ByAuthor < BaseAnalysis
|
43
|
+
def analyze(dataset, _options = {})
|
44
|
+
# Group by author and sum churn metrics, count commits
|
45
|
+
results = {}
|
46
|
+
|
47
|
+
dataset.to_df.each_row do |row|
|
48
|
+
author = row["author"]
|
49
|
+
added = row["loc_added"] || 0
|
50
|
+
deleted = row["loc_deleted"] || 0
|
51
|
+
revision = row["revision"]
|
52
|
+
|
53
|
+
results[author] ||= {author: author, added: 0, deleted: 0, revisions: Set.new}
|
54
|
+
results[author][:added] += added
|
55
|
+
results[author][:deleted] += deleted
|
56
|
+
results[author][:revisions] << revision
|
57
|
+
end
|
58
|
+
|
59
|
+
# Convert to final format
|
60
|
+
final_results = results.values.map do |result|
|
61
|
+
{
|
62
|
+
author: result[:author],
|
63
|
+
added: result[:added],
|
64
|
+
deleted: result[:deleted],
|
65
|
+
commits: result[:revisions].size
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
# Sort by total churn (added + deleted) descending, then by added lines descending, then by author
|
70
|
+
final_results.sort! do |a, b|
|
71
|
+
total_churn_b = b[:added] + b[:deleted]
|
72
|
+
total_churn_a = a[:added] + a[:deleted]
|
73
|
+
churn_comparison = total_churn_b <=> total_churn_a
|
74
|
+
|
75
|
+
if churn_comparison.zero?
|
76
|
+
added_comparison = b[:added] <=> a[:added]
|
77
|
+
added_comparison.zero? ? a[:author] <=> b[:author] : added_comparison
|
78
|
+
else
|
79
|
+
churn_comparison
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
to_csv_data(final_results, %i[author added deleted commits])
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Entity churn analysis - churn metrics per entity
|
88
|
+
class ByEntity < BaseAnalysis
|
89
|
+
def analyze(dataset, options = {})
|
90
|
+
min_revs = options[:min_revs] || 5
|
91
|
+
|
92
|
+
# Group by entity and sum churn metrics
|
93
|
+
results = {}
|
94
|
+
|
95
|
+
dataset.to_df.each_row do |row|
|
96
|
+
entity = row["entity"]
|
97
|
+
added = row["loc_added"] || 0
|
98
|
+
deleted = row["loc_deleted"] || 0
|
99
|
+
revision = row["revision"]
|
100
|
+
|
101
|
+
results[entity] ||= {entity: entity, added: 0, deleted: 0, revisions: Set.new}
|
102
|
+
results[entity][:added] += added
|
103
|
+
results[entity][:deleted] += deleted
|
104
|
+
results[entity][:revisions] << revision
|
105
|
+
end
|
106
|
+
|
107
|
+
# Filter by minimum revisions and format results
|
108
|
+
filtered_results = results.values.map do |result|
|
109
|
+
next if result[:revisions].size < min_revs
|
110
|
+
|
111
|
+
{
|
112
|
+
entity: result[:entity],
|
113
|
+
added: result[:added],
|
114
|
+
deleted: result[:deleted],
|
115
|
+
commits: result[:revisions].size
|
116
|
+
}
|
117
|
+
end.compact
|
118
|
+
|
119
|
+
# Sort by total churn descending
|
120
|
+
filtered_results.sort_by! { |r| -(r[:added] + r[:deleted]) }
|
121
|
+
|
122
|
+
to_csv_data(filtered_results, %i[entity added deleted commits])
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Ownership analysis - churn metrics per author per entity
|
127
|
+
class Ownership < BaseAnalysis
|
128
|
+
def analyze(dataset, _options = {})
|
129
|
+
# Group by entity and author
|
130
|
+
results = {}
|
131
|
+
|
132
|
+
dataset.to_df.each_row do |row|
|
133
|
+
entity = row["entity"]
|
134
|
+
author = row["author"]
|
135
|
+
added = row["loc_added"] || 0
|
136
|
+
deleted = row["loc_deleted"] || 0
|
137
|
+
|
138
|
+
key = [entity, author]
|
139
|
+
results[key] ||= {entity: entity, author: author, added: 0, deleted: 0}
|
140
|
+
results[key][:added] += added
|
141
|
+
results[key][:deleted] += deleted
|
142
|
+
end
|
143
|
+
|
144
|
+
# Sort by entity, then by total contribution descending
|
145
|
+
sorted_results = results.values.sort do |a, b|
|
146
|
+
entity_comparison = a[:entity] <=> b[:entity]
|
147
|
+
if entity_comparison.zero?
|
148
|
+
total_b = b[:added] + b[:deleted]
|
149
|
+
total_a = a[:added] + a[:deleted]
|
150
|
+
total_b <=> total_a
|
151
|
+
else
|
152
|
+
entity_comparison
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
to_csv_data(sorted_results, %i[entity author added deleted])
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Main developer analysis - primary contributor per entity (by lines)
|
161
|
+
class MainDeveloper < BaseAnalysis
|
162
|
+
def analyze(dataset, options = {})
|
163
|
+
min_revs = options[:min_revs] || 5
|
164
|
+
|
165
|
+
# Group contributions by entity and author
|
166
|
+
entity_contributions = {}
|
167
|
+
entity_totals = {}
|
168
|
+
|
169
|
+
dataset.to_df.each_row do |row|
|
170
|
+
entity = row["entity"]
|
171
|
+
author = row["author"]
|
172
|
+
added = row["loc_added"] || 0
|
173
|
+
row["loc_deleted"] || 0
|
174
|
+
|
175
|
+
entity_contributions[entity] ||= {}
|
176
|
+
entity_contributions[entity][author] ||= {added: 0, revisions: Set.new}
|
177
|
+
entity_contributions[entity][author][:added] += added
|
178
|
+
entity_contributions[entity][author][:revisions] << row["revision"]
|
179
|
+
|
180
|
+
entity_totals[entity] ||= 0
|
181
|
+
entity_totals[entity] += added
|
182
|
+
end
|
183
|
+
|
184
|
+
# Find main developer for each entity
|
185
|
+
results = []
|
186
|
+
|
187
|
+
entity_contributions.each do |entity, authors|
|
188
|
+
total_revisions = authors.values.map { |data| data[:revisions] }.reduce(Set.new, &:|).size
|
189
|
+
next if total_revisions < min_revs
|
190
|
+
|
191
|
+
# Find author with most added lines (tie-break by author name alphabetically)
|
192
|
+
main_author = authors.max_by { |author, data| [data[:added], author] }
|
193
|
+
next unless main_author
|
194
|
+
|
195
|
+
author_name, author_data = main_author
|
196
|
+
total_added = entity_totals[entity]
|
197
|
+
ownership = total_added.positive? ? (author_data[:added].to_f / total_added).round(2) : 0.0
|
198
|
+
|
199
|
+
results << {
|
200
|
+
entity: entity,
|
201
|
+
"main-dev": author_name,
|
202
|
+
added: author_data[:added],
|
203
|
+
"total-added": total_added,
|
204
|
+
ownership: ownership
|
205
|
+
}
|
206
|
+
end
|
207
|
+
|
208
|
+
# Sort by entity name
|
209
|
+
results.sort_by! { |r| r[:entity] }
|
210
|
+
|
211
|
+
to_csv_data(results, %i[entity main-dev added total-added ownership])
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# Refactoring main developer - entities with frequent changes by main developer
|
216
|
+
class RefactoringMainDeveloper < BaseAnalysis
|
217
|
+
def analyze(dataset, options = {})
|
218
|
+
min_revs = options[:min_revs] || 5
|
219
|
+
|
220
|
+
# First find main developers
|
221
|
+
main_dev_analysis = MainDeveloper.new
|
222
|
+
main_devs_df = main_dev_analysis.analyze(dataset, options)
|
223
|
+
|
224
|
+
# Convert to hash for lookup
|
225
|
+
main_devs = {}
|
226
|
+
main_devs_df.each_row do |row|
|
227
|
+
main_devs[row[:entity]] = row[:main_dev]
|
228
|
+
end
|
229
|
+
|
230
|
+
# Count revisions by main developer per entity
|
231
|
+
results = []
|
232
|
+
|
233
|
+
main_devs.each do |entity, main_dev|
|
234
|
+
entity_data = dataset.to_df.filter { |row| row[:entity] == entity && row[:author] == main_dev }
|
235
|
+
main_dev_revisions = entity_data[:revision].uniq.size
|
236
|
+
|
237
|
+
next if main_dev_revisions < min_revs
|
238
|
+
|
239
|
+
results << {
|
240
|
+
entity: entity,
|
241
|
+
main_dev: main_dev,
|
242
|
+
added: main_dev_revisions, # Number of revisions by main dev
|
243
|
+
deleted: 0
|
244
|
+
}
|
245
|
+
end
|
246
|
+
|
247
|
+
# Sort by number of revisions descending
|
248
|
+
results.sort_by! { |r| -r[:added] }
|
249
|
+
|
250
|
+
to_csv_data(results, %i[entity main_dev added deleted])
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyMaat
|
4
|
+
module Analysis
|
5
|
+
# Code age analysis - measures how long since each entity was last modified
|
6
|
+
class CodeAge < BaseAnalysis
|
7
|
+
def analyze(dataset, options = {})
|
8
|
+
reference_date = options[:age_time_now] || Date.today
|
9
|
+
|
10
|
+
# Find the latest modification date for each entity
|
11
|
+
entity_latest_dates = {}
|
12
|
+
|
13
|
+
dataset.to_df.each_row do |row|
|
14
|
+
entity = row["entity"]
|
15
|
+
date = row["date"]
|
16
|
+
|
17
|
+
entity_latest_dates[entity] = date if entity_latest_dates[entity].nil? || date > entity_latest_dates[entity]
|
18
|
+
end
|
19
|
+
|
20
|
+
# Calculate age in months for each entity
|
21
|
+
results = entity_latest_dates.map do |entity, last_date|
|
22
|
+
months_old = calculate_months_between(last_date, reference_date)
|
23
|
+
|
24
|
+
{
|
25
|
+
entity: entity,
|
26
|
+
"age-months": months_old
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
# Sort by age descending (oldest first)
|
31
|
+
results.sort_by! { |r| -r[:"age-months"] }
|
32
|
+
|
33
|
+
to_csv_data(results, %i[entity age-months])
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def calculate_months_between(start_date, end_date)
|
39
|
+
return 0 if start_date >= end_date
|
40
|
+
|
41
|
+
years = end_date.year - start_date.year
|
42
|
+
months = end_date.month - start_date.month
|
43
|
+
|
44
|
+
total_months = (years * 12) + months
|
45
|
+
|
46
|
+
# Adjust if the day hasn't been reached yet in the end month
|
47
|
+
total_months -= 1 if end_date.day < start_date.day
|
48
|
+
|
49
|
+
[total_months, 0].max
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyMaat
|
4
|
+
module Analysis
|
5
|
+
# Commit messages analysis - word frequency analysis of commit messages
|
6
|
+
class CommitMessages < BaseAnalysis
|
7
|
+
def analyze(dataset, options = {})
|
8
|
+
expression = options[:expression_to_match]
|
9
|
+
|
10
|
+
# Extract commit messages
|
11
|
+
messages = dataset.to_df[:message].compact
|
12
|
+
|
13
|
+
# Filter by regex if provided
|
14
|
+
if expression
|
15
|
+
regex = Regexp.new(expression, Regexp::IGNORECASE)
|
16
|
+
messages = messages.grep(regex)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Tokenize and count words
|
20
|
+
word_frequencies = Hash.new(0)
|
21
|
+
|
22
|
+
messages.each do |message|
|
23
|
+
# Simple tokenization: split on whitespace and punctuation, convert to lowercase
|
24
|
+
words = message.downcase.split(/[^a-zA-Z0-9]+/).reject(&:empty?)
|
25
|
+
|
26
|
+
# Filter out common stop words and very short words
|
27
|
+
words = words.reject { |word| word.length < 3 || stop_words.include?(word) }
|
28
|
+
|
29
|
+
words.each { |word| word_frequencies[word] += 1 }
|
30
|
+
end
|
31
|
+
|
32
|
+
# Convert to results format
|
33
|
+
results = word_frequencies.map do |word, frequency|
|
34
|
+
{
|
35
|
+
word: word,
|
36
|
+
frequency: frequency
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
# Sort by frequency descending
|
41
|
+
results.sort_by! { |r| -r[:frequency] }
|
42
|
+
|
43
|
+
to_csv_data(results, %i[word frequency])
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def stop_words
|
49
|
+
%w[
|
50
|
+
the and or but for with from that this will was are has have had been
|
51
|
+
can could would should may might must shall
|
52
|
+
not don't doesn't didn't won't wasn't weren't isn't aren't hasn't haven't
|
53
|
+
add fix update remove delete change modify refactor implement
|
54
|
+
].to_set
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RubyMaat
|
4
|
+
module Analysis
|
5
|
+
# Communication analysis - identifies developer collaboration patterns
|
6
|
+
# Based on Conway's Law: organizations design systems that mirror their communication structure
|
7
|
+
class Communication < BaseAnalysis
|
8
|
+
def analyze(dataset, options = {})
|
9
|
+
min_revs = options[:min_revs] || 5
|
10
|
+
min_shared_revs = options[:min_shared_revs] || 5
|
11
|
+
|
12
|
+
# Group entities by author to find their work domains
|
13
|
+
author_entities = {}
|
14
|
+
|
15
|
+
dataset.to_df.each_row do |row|
|
16
|
+
author = row[:author]
|
17
|
+
entity = row[:entity]
|
18
|
+
|
19
|
+
author_entities[author] ||= Set.new
|
20
|
+
author_entities[author] << entity
|
21
|
+
end
|
22
|
+
|
23
|
+
# Find pairs of authors who work on shared entities
|
24
|
+
results = []
|
25
|
+
author_pairs = author_entities.keys.combination(2)
|
26
|
+
|
27
|
+
author_pairs.each do |author1, author2|
|
28
|
+
shared_entities = author_entities[author1] & author_entities[author2]
|
29
|
+
next if shared_entities.size < min_shared_revs
|
30
|
+
|
31
|
+
author1_entities = author_entities[author1].size
|
32
|
+
author2_entities = author_entities[author2].size
|
33
|
+
|
34
|
+
# Communication strength based on shared work
|
35
|
+
avg_entities = average(author1_entities, author2_entities)
|
36
|
+
next if avg_entities < min_revs
|
37
|
+
|
38
|
+
communication_strength = percentage(shared_entities.size, avg_entities)
|
39
|
+
|
40
|
+
results << {
|
41
|
+
author: author1,
|
42
|
+
peer: author2,
|
43
|
+
shared: shared_entities.size,
|
44
|
+
average: avg_entities.ceil,
|
45
|
+
strength: communication_strength
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
# Sort by communication strength descending
|
50
|
+
results.sort_by! { |r| -r[:strength] }
|
51
|
+
|
52
|
+
to_csv_data(results, %i[author peer shared average strength])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|