git_ownership_insights 0.1.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f2cc151f3f515c7ec241175034e18e611a1eedc4df8c322c383e15390f483c96
4
- data.tar.gz: 59774d973a6d0aa982f0a8bf426939c0c59caeaa9305c624f19e6cd42b7e92c8
3
+ metadata.gz: 4bca095a91102d5a60161e08921ba7af125dfc7a5ee528bd527ab57720814d62
4
+ data.tar.gz: 39e9afe47d9af3c4e1b49a084001c52bef8b55f8f8ee9057bd264c000e466fba
5
5
  SHA512:
6
- metadata.gz: f146f73f54759ea4e568961158c5b8f938e0f471d0fbc7cfdc84faf6b7597a3352d073e60b8417c672f1dc161c09b0d3dc0282e9e1a459daf7104a6b4bf50c94
7
- data.tar.gz: 80779de18c3db314f0ca78bc7b1c07fb8a2290b9720fa2bc2cac7d85ab6965c19f06496c4c1e19ae08bd673d80d9731bcae600d329ef77775924ad542e0b4a98
6
+ metadata.gz: 207a6053d21c89408c33ca31068ae8d2fb2a88b7bf4bd63d5cdbdafbcbffc6e3df05bee91142adb88030cb2befe7cbfe48258a9a70fd4a952cfa0f3ef2601aed
7
+ data.tar.gz: 10f59df955f3a5321cf046761303befc0260490069a81cbe40d505ffa82a19dd03d2b03896f7cdd882a39b3a29cc686c6cf911ff1c196abaceb7607ddacaab59
data/Gemfile.lock CHANGED
@@ -2,6 +2,7 @@ PATH
2
2
  remote: .
3
3
  specs:
4
4
  git_ownership_insights (0.1.4)
5
+ awesome_print
5
6
  date
6
7
  pry
7
8
 
@@ -9,6 +10,7 @@ GEM
9
10
  remote: https://rubygems.org/
10
11
  specs:
11
12
  ast (2.4.2)
13
+ awesome_print (1.9.2)
12
14
  coderay (1.1.3)
13
15
  date (3.3.4)
14
16
  diff-lcs (1.5.0)
@@ -13,15 +13,31 @@ OptionParser.new do |opts|
13
13
  options[:debug] = true
14
14
  end
15
15
 
16
- opts.on('--exclusions STRING', 'Comma-delimited list of exclusions [example: WEB,RAILS,MOBILE]') do |exclusions|
16
+ opts.on('--ci', 'Do not print the info messages for better CI text parsing [default: false]') do
17
+ options[:ci] = true
18
+ end
19
+
20
+ opts.on('--codeowners', 'Print CODEOWNERS info [default: false]') do
21
+ options[:codeowners] = true
22
+ end
23
+
24
+ opts.on('--hotspot-files', 'Print the found hotspot files (big files touched by many) [default: false]') do
25
+ options[:hotspot_files] = true
26
+ end
27
+
28
+ opts.on('--excluded-contributors STRING', 'Comma-delimited list of excluded contributors [example: WEB,RAILS,MOBILE]') do |exclusions|
17
29
  options[:exclusions] = exclusions
18
30
  end
19
31
 
32
+ opts.on('--excluded-files STRING', 'Comma-delimited list of excluded files [example: ViewController,AppDelegate.swift]') do |excluded_files|
33
+ options[:excluded_files] = excluded_files
34
+ end
35
+
20
36
  opts.on('--steps STRING', 'Number of steps the script will go into the past [default: 1]') do |steps|
21
37
  options[:steps] = steps
22
38
  end
23
39
 
24
- opts.on('--duration_in_days STRING',
40
+ opts.on('--duration-in-days STRING',
25
41
  'Number of days to aggregate the changes for [default: 30]') do |duration_in_days|
26
42
  options[:duration_in_days] = duration_in_days
27
43
  end
@@ -30,28 +46,40 @@ OptionParser.new do |opts|
30
46
  options[:path] = path
31
47
  end
32
48
 
33
- opts.on('--team_regex STRING', 'Regex that will identify the team name [default: "[A-Za-z]+"]') do |team_regex|
49
+ opts.on('--team-regex STRING', 'Regex that will identify the team name [default: "[A-Za-z]+"]') do |team_regex|
34
50
  options[:team_regex] = team_regex
35
51
  end
36
52
 
37
- opts.on('--top_contributing_team STRING', 'Limit of top contributed to the directory teams in codeownership data [default: 5]') do |top_contributing_team|
53
+ opts.on('--top-contributing-team STRING', 'Limit of top contributed to the directory teams in codeownership data [default: 5]') do |top_contributing_team|
38
54
  options[:top_contributing_team] = top_contributing_team
39
55
  end
40
56
 
41
- opts.on('--top_touched_files STRING', 'Limit of top touched files by individual contributors in codeownership data [default: 5]') do |top_touched_files|
57
+ opts.on('--top-touched-files STRING', 'Limit of top touched files by individual contributors in codeownership data [default: 5]') do |top_touched_files|
42
58
  options[:top_touched_files] = top_touched_files
43
59
  end
44
60
 
45
- opts.on('--codeowners_path STRING', 'Path to CODEOWNERS file [default: .github/CODEOWNERS]') do |codeowners_path|
61
+ opts.on('--codeowners-path STRING', 'Path to CODEOWNERS file [default: .github/CODEOWNERS]') do |codeowners_path|
46
62
  options[:codeowners_path] = codeowners_path
47
63
  end
48
64
 
65
+ opts.on('--big-file-size STRING', 'The amount of lines in the file to be considered big [default: 250]') do |big_file_size|
66
+ options[:big_file_size] = big_file_size
67
+ end
68
+
69
+ opts.on('--default-branch STRING', 'The default branch to pull and run metrics for [default: master]') do |default_branch|
70
+ options[:default_branch] = default_branch
71
+ end
72
+
73
+ opts.on('--code-extensions STRING', 'The file extensions that consider to be code [default: ".kt, .swift"]') do |code_extension|
74
+ options[:code_extension] = code_extension
75
+ end
76
+
49
77
  opts.on('-h', '--help', 'Display this help message') do
50
78
  puts opts
51
79
  puts <<~EXAMPLES
52
80
 
53
81
  Examples:
54
- git_ownership_insights --path src/test --exclusions WEB,RAILS --steps 2 --duration_in_days 90 --debug
82
+ git_ownership_insights --path src/test --exclusions WEB,RAILS --steps 2 --duration-in-days 90 --hotspot-files --debug
55
83
  EXAMPLES
56
84
  exit
57
85
  end
@@ -63,6 +91,18 @@ TEAM_REGEX = options[:team_regex] || '[A-Za-z]+'
63
91
  TOP_TOUCHED_FILES = options[:top_touched_files] || 5
64
92
  TOP_CONTRIBUTED_TEAMS = options[:top_contributing_team] || 5
65
93
  CODEOWNERS_PATH = options[:codeowners_path] || ".github/CODEOWNERS"
94
+ BIG_FILE_SIZE = options[:big_file_size] || 250
95
+ CI = options[:ci] || false
96
+ DEFAULT_BRANCH = options[:default_branch] || 'master'
97
+ CODEOWNERS = options[:codeowners] || false
98
+ HOTSPOT = options[:hotspot_files] || false
99
+ CODE_EXTENSIONS = options[:code_extension] ? options[:code_extension].split : ['.swift', '.kt']
100
+ EXCLUDED_FILES = options[:excluded_files]
101
+
102
+ def true?(obj)
103
+ obj.to_s.downcase == "true"
104
+ end
105
+
66
106
  def read_codeowners_file
67
107
  raise "CODEOWNERS file does not exist under #{CODEOWNERS_PATH}" unless File.exist?(CODEOWNERS_PATH)
68
108
 
@@ -79,17 +119,50 @@ def read_codeowners_file
79
119
  end
80
120
 
81
121
  def find_owners(file_path, codeowners)
82
- matching_patterns = codeowners.keys.select { |pattern| file_path.include?(pattern.sub(%r{^/+}, '').chomp('/')) }
122
+ matching_patterns = codeowners.keys.select do |pattern|
123
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}")
124
+ file_path =~ pattern_regex
125
+ end
126
+
83
127
  return ['unknown'] if matching_patterns.empty?
84
128
 
85
129
  # Sort patterns by length in descending order
86
130
  sorted_patterns = matching_patterns.sort_by(&:length).reverse
87
131
 
88
132
  # Find the most specific matching pattern
89
- best_match = sorted_patterns.find { |pattern| file_path.include?(pattern.sub(%r{^/+}, '').chomp('/')) }
133
+ best_match = sorted_patterns.find do |pattern|
134
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}")
135
+ file_path =~ pattern_regex
136
+ end
137
+
90
138
  codeowners[best_match].split(' ')
91
139
  end
92
140
 
141
+ def count_big_files(directory_path, size: BIG_FILE_SIZE)
142
+ # Get a list of all files in the specified directory
143
+ files = Dir.glob(File.join(directory_path, '**', '*')).select { |file| File.file?(file) }
144
+
145
+ code_files = files.select {|f|
146
+ extension = File.extname(f)
147
+ valid_extensions = ['.swift', '.kt']
148
+ valid_extensions.include?(extension)
149
+ }
150
+
151
+ # Initialize a counter for files that meet the criteria
152
+ count = 0
153
+
154
+ # Iterate through each file and check the line count
155
+ code_files.each do |file|
156
+ lines_count = File.foreach(file).reject { |line| line.match(/^\s*(\/\/|\/\*.*\*\/|\s*$)/) }.count
157
+
158
+ if lines_count > size
159
+ count += 1
160
+ end
161
+ end
162
+
163
+ puts " Total number of files longer than #{size} lines: #{count}"
164
+ end
165
+
93
166
  def contribution_message(directory_path:, duration_in_days:, begin_time:, debug: nil, steps: nil)
94
167
  duration_in_days = duration_in_days.to_i
95
168
  all_teams = []
@@ -97,26 +170,40 @@ def contribution_message(directory_path:, duration_in_days:, begin_time:, debug:
97
170
  total_changes = 0
98
171
  start_date = begin_time.to_time.to_i - duration_in_days * 86_400
99
172
  end_date = begin_time.to_time.to_i
100
- file_count = `git ls-tree -r --name-only $(git rev-list -1 --before="#{end_date}" HEAD) -- "#{directory_path}" | wc -l`.to_i
101
- files_with_changes = `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`.split.sort
102
- uniq_files_with_changes = files_with_changes.uniq
173
+ file_count = `git ls-tree -r --name-only $(git rev-list -1 --since="#{start_date}" --until="#{end_date}" HEAD) -- "#{directory_path}" | wc -l`.to_i
174
+ all_files_with_changes = `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`.split.sort
175
+ excluded_patterns = EXCLUDED_FILES.split(',')
176
+
177
+ code_files_with_changes = all_files_with_changes.select {|f|
178
+ extension = File.extname(f)
179
+ valid_extensions = CODE_EXTENSIONS
180
+ valid_extensions.include?(extension)
181
+ }.reject do |file|
182
+ excluded_patterns.any? { |pattern| file.include?(pattern) }
183
+ end
184
+
185
+ uniq_code_files_with_changes = code_files_with_changes.uniq
186
+
103
187
  file_team_map = {}
104
- uniq_files_with_changes.each do |file|
188
+ uniq_code_files_with_changes.each do |file|
105
189
  filename = File.basename(file)
106
190
  commit_count = `git log --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}" | grep -c '^commit'`.to_i
107
191
 
108
- next unless commit_count.positive?
109
-
110
192
  # Get the log of the file in the given duration
111
193
  git_log = `git log --pretty=format:"%s" --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}"`.split("\n")
112
194
  teams = git_log.map do |team|
113
195
  team.match(/#{TEAM_REGEX}/)[0].upcase
114
196
  end.reject { |e| EXCLUSIONS&.include?(e) }
115
- total_changes += teams.count
197
+
198
+ total_changes += commit_count
116
199
  all_teams << teams
117
200
  teams = teams.uniq
118
- files_changed_by_many_teams += 1 if teams.count > 1
119
- file_team_map.merge!("#{filename}" => teams)
201
+
202
+ if teams.count > 1
203
+ files_changed_by_many_teams += 1
204
+ file_team_map.merge!("#{file}" => [teams, commit_count])
205
+ end
206
+
120
207
  puts "\n#{filename} [#{commit_count}]:#{teams}\n" if debug
121
208
  end
122
209
 
@@ -124,61 +211,99 @@ def contribution_message(directory_path:, duration_in_days:, begin_time:, debug:
124
211
  sorted_occurrences = occurrences.sort_by { |element, count| [-count, element] }
125
212
  contributors = Hash[sorted_occurrences]
126
213
 
127
- puts "Timeframe: #{(begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{begin_time.strftime('%Y-%m-%d')}\n Files with a single contributor: #{(100 - ((files_changed_by_many_teams.to_f / files_with_changes.count) * 100)).round(2)}%\n Amount of commits: #{total_changes}\n Total files changed: #{files_with_changes.count}\n Total files in the folder: #{file_count}\n Contributors: #{contributors}\n"
214
+ churn_count = file_team_map.values.map { |value| value[1] }.sum
215
+ hotspot_changes_percentage = (churn_count.to_f / total_changes.to_f)*100
128
216
 
129
- touched_files = files_with_changes.flatten.compact.tally
130
- top_touched_files = touched_files.sort_by { |element, count| [-count, element] }.take(TOP_TOUCHED_FILES.to_i)
131
- codeowners = read_codeowners_file
217
+ puts "Timeframe: #{(begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{begin_time.strftime('%Y-%m-%d')}\n Code files with a single contributor: #{(100 - ((files_changed_by_many_teams.to_f / code_files_with_changes.count.to_f) * 100)).round(2)}%\n Hotspot code changes: #{churn_count} (#{hotspot_changes_percentage.round(2)}%)\n Amount of code changes: #{total_changes}\n Total files changed: #{code_files_with_changes.count}\n Total files in the folder: #{file_count}\n Contributors: #{contributors}\n"
132
218
 
133
- owners_data = Hash.new do |hash, key|
134
- hash[key] = { directories: Hash.new do |h, k|
135
- h[k] = { files: [] }
136
- end, total_count: 0 }
137
- end
219
+ # Filter files based on extension and size
220
+ filtered_files = file_team_map.select do |file_path|
221
+ next unless File.exist?(file_path)
138
222
 
139
- top_touched_files.each do |file, count|
140
- owners = find_owners(file, codeowners)
141
- owners.each do |owner|
142
- owners_data[owner][:total_count] += count
223
+ # Check if the file size is more than BIG_FILE_SIZE lines (excluding empty and commented lines)
224
+ File.foreach(file_path).reject { |line| line.match(/^\s*(\/\/|\/\*.*\*\/|\s*$)/) }.count > BIG_FILE_SIZE.to_i
225
+ end
143
226
 
144
- dir_path = File.dirname(file)
145
- owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: count }
227
+ filtered_top_touched_files = filtered_files.sort_by { |element, count| [-count.last, element] }
228
+ count_big_files(directory_path)
229
+ puts " Total files longer than #{BIG_FILE_SIZE} lines with multiple contributors: #{filtered_top_touched_files.count}\n"
230
+ if HOTSPOT
231
+ filtered_top_touched_files.each do |line|
232
+ puts " #{line.first.gsub(directory_path, '')} Contributors: #{line.last.first} Commits: #{line.last.last}"
146
233
  end
147
234
  end
235
+ puts "\n\n"
148
236
 
149
- # Sort owners_data by total count in descending order
150
- sorted_owners_data = owners_data.sort_by { |_, data| -data[:total_count] }
237
+ if CODEOWNERS
238
+ codeowners = read_codeowners_file
151
239
 
152
- # Take the last 5 elements
153
- top_owners_data = sorted_owners_data.last(TOP_CONTRIBUTED_TEAMS.to_i)
240
+ owners_data = Hash.new do |hash, key|
241
+ hash[key] = { directories: Hash.new do |h, k|
242
+ h[k] = { files: [] }
243
+ end, churn_count: 0 }
244
+ end
245
+
246
+ file_team_map.each do |file, count|
247
+ owners = find_owners(file, codeowners)
248
+ owners.each do |owner|
249
+ owners_data[owner][:churn_count] += count.last
154
250
 
155
- puts ' Codeownership data:'
156
- top_owners_data.each do |owner, data|
157
- puts " #{owner.split('/').last}:\n Total Count: #{data[:total_count]}"
158
- data[:directories].each do |dir, dir_data|
159
- puts " Directory: #{dir}\n Top files:"
160
- dir_data[:files].each do |file_data|
161
- puts " #{File.basename(file_data[:name])} - #{file_data[:count]} #{file_team_map[file_data[:name]].empty? ? "[ Excluded contributor ]" : file_team_map[file_data[:name]]}"
251
+ dir_path = File.dirname(file)
252
+ owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: count }
162
253
  end
163
254
  end
164
- end
165
255
 
256
+ # Sort owners_data by total count in descending order
257
+ sorted_owners_data = owners_data.sort_by { |_, data| -data[:churn_count] }
258
+
259
+ # Take the last 5 elements
260
+ top_owners_data = sorted_owners_data.last(TOP_CONTRIBUTED_TEAMS.to_i)
261
+
262
+ converted_team_map = file_team_map.transform_keys { |key| File.basename(key) }
263
+
264
+ puts ' Codeownership data:'
265
+ top_owners_data.each do |owner, data|
266
+ puts " #{owner.split('/').last}:\n Total Count: #{data[:churn_count]}"
267
+ data[:directories].each do |dir, dir_data|
268
+ puts " Directory: #{dir}\n Top files:"
269
+ dir_data[:files].each do |file_data|
270
+ next if converted_team_map[File.basename(file_data[:name])].nil?
271
+
272
+ contributors = converted_team_map[file_data[:name]]&.first&.empty? ? [ "Excluded contributor" ] : converted_team_map[file_data[:name]].first
273
+ puts " #{File.basename(file_data[:name])} - #{file_data[:count].last} #{contributors}}"
274
+ end
275
+ end
276
+ end
277
+ end
166
278
  steps -= 1
167
279
 
168
280
  return unless steps.positive?
169
281
 
282
+ system("git checkout `git rev-list -1 --before='#{(begin_time - duration_in_days).strftime("%B %d %Y")}' HEAD`", [ :out, :err ] => File::NULL)
170
283
  contribution_message(duration_in_days: duration_in_days, directory_path: directory_path,
171
284
  begin_time: begin_time - duration_in_days, steps: steps, debug: debug)
172
285
  end
173
286
 
174
- puts "\nDirectory: #{REPO_PATH}\n"
175
- puts "Time period that data is aggregated by: #{options[:duration_in_days]} days"
176
- puts "Steps to jump in the past: #{options[:steps]}"
177
- puts "Limit of the teams shown in codeownership data: #{TOP_CONTRIBUTED_TEAMS}"
178
- puts "Limit of the files shown in codeownership data: #{TOP_TOUCHED_FILES}"
179
- puts "Regex to detect the teams identifiers: #{TEAM_REGEX}"
180
- puts "Excluded contributors: #{EXCLUSIONS}\n" if EXCLUSIONS
181
- puts "Debug mode is: #{options[:debug] ? 'on' : 'off'}\n\n"
287
+ unless CI
288
+ puts "\nDirectory: #{REPO_PATH}\n"
289
+ puts "Time period that data is aggregated by: #{options[:duration_in_days]} days"
290
+ puts "Steps to jump in the past: #{options[:steps].to_i}"
291
+ puts "Runs against: #{DEFAULT_BRANCH}"
292
+ puts "Code extensions: #{CODE_EXTENSIONS}"
293
+ puts "Regex to detect the teams identifiers: #{TEAM_REGEX}"
294
+ puts "Excluded contributors: #{EXCLUSIONS}\n" if EXCLUSIONS
295
+ puts "Excluded file patterns: #{EXCLUDED_FILES.split(',')}\n" if EXCLUDED_FILES
296
+ puts "Lines of code limit (big files) for the hotspot calculation: #{BIG_FILE_SIZE}"
297
+ puts "Hotspot detailed output is: #{options[:hotspot_files] ? 'on' : 'off'}\n"
298
+ puts "CODEOWNERS output is: #{options[:codeowners] ? 'on' : 'off'}\n"
299
+ puts "Limit of the teams shown in codeownership data: #{TOP_CONTRIBUTED_TEAMS}"
300
+ puts "Limit of the files shown in codeownership data: #{TOP_TOUCHED_FILES}"
301
+ puts "CI mode is: #{options[:ci] ? 'on' : 'off'}\n"
302
+ puts "Debug mode is: #{options[:debug] ? 'on' : 'off'}\n\n"
303
+ end
304
+
305
+ system("git checkout #{DEFAULT_BRANCH}", [ :out ] => File::NULL)
306
+ system("git pull", [ :out ] => File::NULL)
182
307
 
183
308
  contribution_message(duration_in_days: options[:duration_in_days] || 30, directory_path: REPO_PATH,
184
309
  begin_time: DateTime.now, steps: options[:steps].to_i, debug: options[:debug])
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GitOwnershipInsights
4
- VERSION = '0.1.4'
4
+ VERSION = '1.0.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: git_ownership_insights
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Serghei Moret
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-17 00:00:00.000000000 Z
11
+ date: 2024-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: date