git_ownership_insights 0.1.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0317f67423ade18e56b854a07912a388bece13db418ab72dd8a678ee18c802a5
4
- data.tar.gz: f845947afad20fde877e4ed2568934a8bee8299d14d877a352386b5a899928d8
3
+ metadata.gz: 4bca095a91102d5a60161e08921ba7af125dfc7a5ee528bd527ab57720814d62
4
+ data.tar.gz: 39e9afe47d9af3c4e1b49a084001c52bef8b55f8f8ee9057bd264c000e466fba
5
5
  SHA512:
6
- metadata.gz: d3a3d4aedbf40a3928dadd38ab0442382828fe0a3a0712fbf6e15978105ccad2fcbac8102318bd6b70363e779036ff7da7de06e8d55b525858a4b0bf88dbb266
7
- data.tar.gz: b67f652e9e3f9676fa6455dc452a04700a48127aef953cb0d99ced1a722e3ad35dbed035fe37435561a349e72cd36e2e5881947f4b5128f2a29a334324144284
6
+ metadata.gz: 207a6053d21c89408c33ca31068ae8d2fb2a88b7bf4bd63d5cdbdafbcbffc6e3df05bee91142adb88030cb2befe7cbfe48258a9a70fd4a952cfa0f3ef2601aed
7
+ data.tar.gz: 10f59df955f3a5321cf046761303befc0260490069a81cbe40d505ffa82a19dd03d2b03896f7cdd882a39b3a29cc686c6cf911ff1c196abaceb7607ddacaab59
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- git_ownership_insights (0.1.3)
4
+ git_ownership_insights (0.1.4)
5
+ awesome_print
5
6
  date
6
7
  pry
7
8
 
@@ -9,6 +10,7 @@ GEM
9
10
  remote: https://rubygems.org/
10
11
  specs:
11
12
  ast (2.4.2)
13
+ awesome_print (1.9.2)
12
14
  coderay (1.1.3)
13
15
  date (3.3.4)
14
16
  diff-lcs (1.5.0)
@@ -13,15 +13,31 @@ OptionParser.new do |opts|
13
13
  options[:debug] = true
14
14
  end
15
15
 
16
- opts.on('--exclusions STRING', 'Comma-delimited list of exclusions [example: WEB,RAILS,MOBILE]') do |exclusions|
16
+ opts.on('--ci', 'Do not print the info messages for better CI text parsing [default: false]') do
17
+ options[:ci] = true
18
+ end
19
+
20
+ opts.on('--codeowners', 'Print CODEOWNERS info [default: false]') do
21
+ options[:codeowners] = true
22
+ end
23
+
24
+ opts.on('--hotspot-files', 'Print the found hotspot files (big files touched by many) [default: false]') do
25
+ options[:hotspot_files] = true
26
+ end
27
+
28
+ opts.on('--excluded-contributors STRING', 'Comma-delimited list of excluded contributors [example: WEB,RAILS,MOBILE]') do |exclusions|
17
29
  options[:exclusions] = exclusions
18
30
  end
19
31
 
32
+ opts.on('--excluded-files STRING', 'Comma-delimited list of excluded files [example: ViewController,AppDelegate.swift]') do |excluded_files|
33
+ options[:excluded_files] = excluded_files
34
+ end
35
+
20
36
  opts.on('--steps STRING', 'Number of steps the script will go into the past [default: 1]') do |steps|
21
37
  options[:steps] = steps
22
38
  end
23
39
 
24
- opts.on('--duration_in_days STRING',
40
+ opts.on('--duration-in-days STRING',
25
41
  'Number of days to aggregate the changes for [default: 30]') do |duration_in_days|
26
42
  options[:duration_in_days] = duration_in_days
27
43
  end
@@ -30,28 +46,40 @@ OptionParser.new do |opts|
30
46
  options[:path] = path
31
47
  end
32
48
 
33
- opts.on('--team_regex STRING', 'Regex that will identify the team name [default: "[A-Za-z]+"]') do |team_regex|
49
+ opts.on('--team-regex STRING', 'Regex that will identify the team name [default: "[A-Za-z]+"]') do |team_regex|
34
50
  options[:team_regex] = team_regex
35
51
  end
36
52
 
37
- opts.on('--top_contributing_team STRING', 'Limit of top contributed to the directory teams in codeownership data [default: 5]') do |top_contributing_team|
53
+ opts.on('--top-contributing-team STRING', 'Limit of top contributed to the directory teams in codeownership data [default: 5]') do |top_contributing_team|
38
54
  options[:top_contributing_team] = top_contributing_team
39
55
  end
40
56
 
41
- opts.on('--top_touched_files STRING', 'Limit of top touched files by individual contributors in codeownership data [default: 5]') do |top_touched_files|
57
+ opts.on('--top-touched-files STRING', 'Limit of top touched files by individual contributors in codeownership data [default: 5]') do |top_touched_files|
42
58
  options[:top_touched_files] = top_touched_files
43
59
  end
44
60
 
45
- opts.on('--codeowners_path STRING', 'Path to CODEOWNERS file [default: .github/CODEOWNERS]') do |codeowners_path|
61
+ opts.on('--codeowners-path STRING', 'Path to CODEOWNERS file [default: .github/CODEOWNERS]') do |codeowners_path|
46
62
  options[:codeowners_path] = codeowners_path
47
63
  end
48
64
 
65
+ opts.on('--big-file-size STRING', 'The amount of lines in the file to be considered big [default: 250]') do |big_file_size|
66
+ options[:big_file_size] = big_file_size
67
+ end
68
+
69
+ opts.on('--default-branch STRING', 'The default branch to pull and run metrics for [default: master]') do |default_branch|
70
+ options[:default_branch] = default_branch
71
+ end
72
+
73
+ opts.on('--code-extensions STRING', 'The file extensions that consider to be code [default: ".kt, .swift"]') do |code_extension|
74
+ options[:code_extension] = code_extension
75
+ end
76
+
49
77
  opts.on('-h', '--help', 'Display this help message') do
50
78
  puts opts
51
79
  puts <<~EXAMPLES
52
80
 
53
81
  Examples:
54
- git_ownership_insights --path src/test --exclusions WEB,RAILS --steps 2 --duration_in_days 90 --debug
82
+ git_ownership_insights --path src/test --exclusions WEB,RAILS --steps 2 --duration-in-days 90 --hotspot-files --debug
55
83
  EXAMPLES
56
84
  exit
57
85
  end
@@ -63,6 +91,18 @@ TEAM_REGEX = options[:team_regex] || '[A-Za-z]+'
63
91
  TOP_TOUCHED_FILES = options[:top_touched_files] || 5
64
92
  TOP_CONTRIBUTED_TEAMS = options[:top_contributing_team] || 5
65
93
  CODEOWNERS_PATH = options[:codeowners_path] || ".github/CODEOWNERS"
94
+ BIG_FILE_SIZE = options[:big_file_size] || 250
95
+ CI = options[:ci] || false
96
+ DEFAULT_BRANCH = options[:default_branch] || 'master'
97
+ CODEOWNERS = options[:codeowners] || false
98
+ HOTSPOT = options[:hotspot_files] || false
99
+ CODE_EXTENSIONS = options[:code_extension] ? options[:code_extension].split : ['.swift', '.kt']
100
+ EXCLUDED_FILES = options[:excluded_files]
101
+
102
+ def true?(obj)
103
+ obj.to_s.downcase == "true"
104
+ end
105
+
66
106
  def read_codeowners_file
67
107
  raise "CODEOWNERS file does not exist under #{CODEOWNERS_PATH}" unless File.exist?(CODEOWNERS_PATH)
68
108
 
@@ -79,17 +119,50 @@ def read_codeowners_file
79
119
  end
80
120
 
81
121
  def find_owners(file_path, codeowners)
82
- matching_patterns = codeowners.keys.select { |pattern| file_path.include?(pattern.sub(%r{^/+}, '').chomp('/')) }
122
+ matching_patterns = codeowners.keys.select do |pattern|
123
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}")
124
+ file_path =~ pattern_regex
125
+ end
126
+
83
127
  return ['unknown'] if matching_patterns.empty?
84
128
 
85
129
  # Sort patterns by length in descending order
86
130
  sorted_patterns = matching_patterns.sort_by(&:length).reverse
87
131
 
88
132
  # Find the most specific matching pattern
89
- best_match = sorted_patterns.find { |pattern| file_path.include?(pattern.sub(%r{^/+}, '').chomp('/')) }
133
+ best_match = sorted_patterns.find do |pattern|
134
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}")
135
+ file_path =~ pattern_regex
136
+ end
137
+
90
138
  codeowners[best_match].split(' ')
91
139
  end
92
140
 
141
+ def count_big_files(directory_path, size: BIG_FILE_SIZE)
142
+ # Get a list of all files in the specified directory
143
+ files = Dir.glob(File.join(directory_path, '**', '*')).select { |file| File.file?(file) }
144
+
145
+ code_files = files.select {|f|
146
+ extension = File.extname(f)
147
+ valid_extensions = ['.swift', '.kt']
148
+ valid_extensions.include?(extension)
149
+ }
150
+
151
+ # Initialize a counter for files that meet the criteria
152
+ count = 0
153
+
154
+ # Iterate through each file and check the line count
155
+ code_files.each do |file|
156
+ lines_count = File.foreach(file).reject { |line| line.match(/^\s*(\/\/|\/\*.*\*\/|\s*$)/) }.count
157
+
158
+ if lines_count > size
159
+ count += 1
160
+ end
161
+ end
162
+
163
+ puts " Total number of files longer than #{size} lines: #{count}"
164
+ end
165
+
93
166
  def contribution_message(directory_path:, duration_in_days:, begin_time:, debug: nil, steps: nil)
94
167
  duration_in_days = duration_in_days.to_i
95
168
  all_teams = []
@@ -97,26 +170,40 @@ def contribution_message(directory_path:, duration_in_days:, begin_time:, debug:
97
170
  total_changes = 0
98
171
  start_date = begin_time.to_time.to_i - duration_in_days * 86_400
99
172
  end_date = begin_time.to_time.to_i
100
- file_count = `git ls-tree -r --name-only $(git rev-list -1 --before="#{end_date}" HEAD) -- "#{directory_path}" | wc -l`.to_i
101
- files_with_changes = `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`.split.sort
102
- uniq_files_with_changes = files_with_changes.uniq
173
+ file_count = `git ls-tree -r --name-only $(git rev-list -1 --since="#{start_date}" --until="#{end_date}" HEAD) -- "#{directory_path}" | wc -l`.to_i
174
+ all_files_with_changes = `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`.split.sort
175
+ excluded_patterns = EXCLUDED_FILES.split(',')
176
+
177
+ code_files_with_changes = all_files_with_changes.select {|f|
178
+ extension = File.extname(f)
179
+ valid_extensions = CODE_EXTENSIONS
180
+ valid_extensions.include?(extension)
181
+ }.reject do |file|
182
+ excluded_patterns.any? { |pattern| file.include?(pattern) }
183
+ end
184
+
185
+ uniq_code_files_with_changes = code_files_with_changes.uniq
186
+
103
187
  file_team_map = {}
104
- uniq_files_with_changes.each do |file|
188
+ uniq_code_files_with_changes.each do |file|
105
189
  filename = File.basename(file)
106
190
  commit_count = `git log --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}" | grep -c '^commit'`.to_i
107
191
 
108
- next unless commit_count.positive?
109
-
110
192
  # Get the log of the file in the given duration
111
193
  git_log = `git log --pretty=format:"%s" --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}"`.split("\n")
112
194
  teams = git_log.map do |team|
113
195
  team.match(/#{TEAM_REGEX}/)[0].upcase
114
196
  end.reject { |e| EXCLUSIONS&.include?(e) }
115
- total_changes += teams.count
197
+
198
+ total_changes += commit_count
116
199
  all_teams << teams
117
200
  teams = teams.uniq
118
- files_changed_by_many_teams += 1 if teams.count > 1
119
- file_team_map.merge!("#{filename}" => teams)
201
+
202
+ if teams.count > 1
203
+ files_changed_by_many_teams += 1
204
+ file_team_map.merge!("#{file}" => [teams, commit_count])
205
+ end
206
+
120
207
  puts "\n#{filename} [#{commit_count}]:#{teams}\n" if debug
121
208
  end
122
209
 
@@ -124,61 +211,99 @@ def contribution_message(directory_path:, duration_in_days:, begin_time:, debug:
124
211
  sorted_occurrences = occurrences.sort_by { |element, count| [-count, element] }
125
212
  contributors = Hash[sorted_occurrences]
126
213
 
127
- puts "Timeframe: #{(begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{begin_time.strftime('%Y-%m-%d')}\n Files with a single contributor: #{(100 - ((files_changed_by_many_teams.to_f / files_with_changes.count) * 100)).round(2)}%\n Amount of commits: #{total_changes}\n Total files changed: #{files_with_changes.count}\n Total files in the folder: #{file_count}\n Contributors: #{contributors}\n"
214
+ churn_count = file_team_map.values.map { |value| value[1] }.sum
215
+ hotspot_changes_percentage = (churn_count.to_f / total_changes.to_f)*100
128
216
 
129
- touched_files = files_with_changes.flatten.compact.tally
130
- top_touched_files = touched_files.sort_by { |element, count| [-count, element] }.take(TOP_TOUCHED_FILES.to_i)
131
- codeowners = read_codeowners_file
217
+ puts "Timeframe: #{(begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{begin_time.strftime('%Y-%m-%d')}\n Code files with a single contributor: #{(100 - ((files_changed_by_many_teams.to_f / code_files_with_changes.count.to_f) * 100)).round(2)}%\n Hotspot code changes: #{churn_count} (#{hotspot_changes_percentage.round(2)}%)\n Amount of code changes: #{total_changes}\n Total files changed: #{code_files_with_changes.count}\n Total files in the folder: #{file_count}\n Contributors: #{contributors}\n"
132
218
 
133
- owners_data = Hash.new do |hash, key|
134
- hash[key] = { directories: Hash.new do |h, k|
135
- h[k] = { files: [] }
136
- end, total_count: 0 }
137
- end
219
+ # Filter files based on extension and size
220
+ filtered_files = file_team_map.select do |file_path|
221
+ next unless File.exist?(file_path)
138
222
 
139
- top_touched_files.each do |file, count|
140
- owners = find_owners(file, codeowners)
141
- owners.each do |owner|
142
- owners_data[owner][:total_count] += count
223
+ # Check if the file size is more than BIG_FILE_SIZE lines (excluding empty and commented lines)
224
+ File.foreach(file_path).reject { |line| line.match(/^\s*(\/\/|\/\*.*\*\/|\s*$)/) }.count > BIG_FILE_SIZE.to_i
225
+ end
143
226
 
144
- dir_path = File.dirname(file)
145
- owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: count }
227
+ filtered_top_touched_files = filtered_files.sort_by { |element, count| [-count.last, element] }
228
+ count_big_files(directory_path)
229
+ puts " Total files longer than #{BIG_FILE_SIZE} lines with multiple contributors: #{filtered_top_touched_files.count}\n"
230
+ if HOTSPOT
231
+ filtered_top_touched_files.each do |line|
232
+ puts " #{line.first.gsub(directory_path, '')} Contributors: #{line.last.first} Commits: #{line.last.last}"
146
233
  end
147
234
  end
235
+ puts "\n\n"
148
236
 
149
- # Sort owners_data by total count in descending order
150
- sorted_owners_data = owners_data.sort_by { |_, data| -data[:total_count] }
237
+ if CODEOWNERS
238
+ codeowners = read_codeowners_file
151
239
 
152
- # Take the last 5 elements
153
- top_owners_data = sorted_owners_data.last(TOP_CONTRIBUTED_TEAMS.to_i)
240
+ owners_data = Hash.new do |hash, key|
241
+ hash[key] = { directories: Hash.new do |h, k|
242
+ h[k] = { files: [] }
243
+ end, churn_count: 0 }
244
+ end
245
+
246
+ file_team_map.each do |file, count|
247
+ owners = find_owners(file, codeowners)
248
+ owners.each do |owner|
249
+ owners_data[owner][:churn_count] += count.last
154
250
 
155
- puts ' Codeownership data:'
156
- top_owners_data.each do |owner, data|
157
- puts " #{owner.split('/').last}:\n Total Count: #{data[:total_count]}"
158
- data[:directories].each do |dir, dir_data|
159
- puts " Directory: #{dir}\n Top files:"
160
- dir_data[:files].each do |file_data|
161
- puts " #{File.basename(file_data[:name])} - #{file_data[:count]} #{file_team_map[file_data[:name]]}"
251
+ dir_path = File.dirname(file)
252
+ owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: count }
162
253
  end
163
254
  end
164
- end
165
255
 
256
+ # Sort owners_data by total count in descending order
257
+ sorted_owners_data = owners_data.sort_by { |_, data| -data[:churn_count] }
258
+
259
+ # Take the last 5 elements
260
+ top_owners_data = sorted_owners_data.last(TOP_CONTRIBUTED_TEAMS.to_i)
261
+
262
+ converted_team_map = file_team_map.transform_keys { |key| File.basename(key) }
263
+
264
+ puts ' Codeownership data:'
265
+ top_owners_data.each do |owner, data|
266
+ puts " #{owner.split('/').last}:\n Total Count: #{data[:churn_count]}"
267
+ data[:directories].each do |dir, dir_data|
268
+ puts " Directory: #{dir}\n Top files:"
269
+ dir_data[:files].each do |file_data|
270
+ next if converted_team_map[File.basename(file_data[:name])].nil?
271
+
272
+ contributors = converted_team_map[file_data[:name]]&.first&.empty? ? [ "Excluded contributor" ] : converted_team_map[file_data[:name]].first
273
+ puts " #{File.basename(file_data[:name])} - #{file_data[:count].last} #{contributors}}"
274
+ end
275
+ end
276
+ end
277
+ end
166
278
  steps -= 1
167
279
 
168
280
  return unless steps.positive?
169
281
 
282
+ system("git checkout `git rev-list -1 --before='#{(begin_time - duration_in_days).strftime("%B %d %Y")}' HEAD`", [ :out, :err ] => File::NULL)
170
283
  contribution_message(duration_in_days: duration_in_days, directory_path: directory_path,
171
284
  begin_time: begin_time - duration_in_days, steps: steps, debug: debug)
172
285
  end
173
286
 
174
- puts "\nDirectory: #{REPO_PATH}\n"
175
- puts "Time period that data is aggregated by: #{options[:duration_in_days]} days"
176
- puts "Steps to jump in the past: #{options[:steps]}"
177
- puts "Limit of the teams shown in codeownership data: #{TOP_CONTRIBUTED_TEAMS}"
178
- puts "Limit of the files shown in codeownership data: #{TOP_TOUCHED_FILES}"
179
- puts "Regex to detect the teams identifiers: #{TEAM_REGEX}"
180
- puts "Excluded contributors: #{EXCLUSIONS}\n" if EXCLUSIONS
181
- puts "Debug mode is: #{options[:debug] ? 'on' : 'off'}\n\n"
287
+ unless CI
288
+ puts "\nDirectory: #{REPO_PATH}\n"
289
+ puts "Time period that data is aggregated by: #{options[:duration_in_days]} days"
290
+ puts "Steps to jump in the past: #{options[:steps].to_i}"
291
+ puts "Runs against: #{DEFAULT_BRANCH}"
292
+ puts "Code extensions: #{CODE_EXTENSIONS}"
293
+ puts "Regex to detect the teams identifiers: #{TEAM_REGEX}"
294
+ puts "Excluded contributors: #{EXCLUSIONS}\n" if EXCLUSIONS
295
+ puts "Excluded file patterns: #{EXCLUDED_FILES.split(',')}\n" if EXCLUDED_FILES
296
+ puts "Lines of code limit (big files) for the hotspot calculation: #{BIG_FILE_SIZE}"
297
+ puts "Hotspot detailed output is: #{options[:hotspot_files] ? 'on' : 'off'}\n"
298
+ puts "CODEOWNERS output is: #{options[:codeowners] ? 'on' : 'off'}\n"
299
+ puts "Limit of the teams shown in codeownership data: #{TOP_CONTRIBUTED_TEAMS}"
300
+ puts "Limit of the files shown in codeownership data: #{TOP_TOUCHED_FILES}"
301
+ puts "CI mode is: #{options[:ci] ? 'on' : 'off'}\n"
302
+ puts "Debug mode is: #{options[:debug] ? 'on' : 'off'}\n\n"
303
+ end
304
+
305
+ system("git checkout #{DEFAULT_BRANCH}", [ :out ] => File::NULL)
306
+ system("git pull", [ :out ] => File::NULL)
182
307
 
183
308
  contribution_message(duration_in_days: options[:duration_in_days] || 30, directory_path: REPO_PATH,
184
309
  begin_time: DateTime.now, steps: options[:steps].to_i, debug: options[:debug])
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GitOwnershipInsights
4
- VERSION = '0.1.3'
4
+ VERSION = '1.0.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: git_ownership_insights
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Serghei Moret
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-17 00:00:00.000000000 Z
11
+ date: 2024-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: date