github_repo_statistics 2.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,290 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pry'
4
+ require 'date'
5
+
6
+ class GithubRepoStatistics
7
+ def initialize(directory_path:, duration_in_days:, begin_time:, debug: nil, steps: 1)
8
+ @directory_path = directory_path
9
+ @duration_in_days = duration_in_days
10
+ @begin_time = begin_time
11
+ @debug = debug
12
+ @steps = steps
13
+ end
14
+
15
+ def true?(obj)
16
+ obj.to_s.downcase == 'true'
17
+ end
18
+
19
+ def read_codeowners_file
20
+ raise "CODEOWNERS file does not exist under #{CODEOWNERS_PATH}" unless File.exist?(CODEOWNERS_PATH)
21
+
22
+ codeowners = {}
23
+ File.readlines(CODEOWNERS_PATH).each do |line|
24
+ next if line.strip.empty? || line.start_with?('#') # Skip comments and empty lines
25
+
26
+ parts = line.split(/\s+/)
27
+ directory_pattern = parts[0]
28
+ owner = parts[1..].map { |o| o.start_with?('@') ? o[1..] : o }.join(' ') # Remove leading '@' from team names
29
+ codeowners[directory_pattern] = owner
30
+ end
31
+ codeowners
32
+ end
33
+
34
+ def find_owners(file_path, codeowners)
35
+ matching_patterns = codeowners.keys.select do |pattern|
36
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**',
37
+ '.*?')}")
38
+ file_path =~ pattern_regex
39
+ end
40
+
41
+ return ['unknown'] if matching_patterns.empty?
42
+
43
+ # Sort patterns by length in descending order
44
+ sorted_patterns = matching_patterns.sort_by(&:length).reverse
45
+
46
+ # Find the most specific matching pattern
47
+ best_match = sorted_patterns.find do |pattern|
48
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**',
49
+ '.*?')}")
50
+ file_path =~ pattern_regex
51
+ end
52
+
53
+ codeowners[best_match].split(' ')
54
+ end
55
+
56
+ def handle_codeowners(file_team_map:)
57
+ output = "\n *Code ownership data:*\n"
58
+ codeowners = read_codeowners_file
59
+
60
+ owners_data = Hash.new do |hash, key|
61
+ hash[key] = { directories: Hash.new do |h, k|
62
+ h[k] = { files: [] }
63
+ end, churn_count: 0 }
64
+ end
65
+
66
+ file_team_map.each do |file, count|
67
+ owners = find_owners(file, codeowners)
68
+ owners.each do |owner|
69
+ owners_data[owner][:churn_count] += count.last
70
+
71
+ dir_path = File.dirname(file)
72
+ owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: }
73
+ end
74
+ end
75
+
76
+ # Sort owners_data by total count in descending order
77
+ sorted_owners_data = owners_data.sort_by { |_, data| -data[:churn_count] }
78
+ converted_team_map = file_team_map.transform_keys { |key| File.basename(key) }
79
+
80
+ sorted_owners_data.each do |owner, data|
81
+ output += "\n #{owner.split('/').last}:\n Total Count: #{data[:churn_count]}\n"
82
+ data[:directories].each do |dir, dir_data|
83
+ output += " Directory: #{dir}\n Top files:\n"
84
+ dir_data[:files].each do |file_data|
85
+ next if converted_team_map[File.basename(file_data[:name])].nil?
86
+
87
+ contributors = converted_team_map[file_data[:name]]&.first&.empty? ? ['Excluded contributor'] : converted_team_map[file_data[:name]].first
88
+ output += " #{File.basename(file_data[:name])} - #{file_data[:count].last} #{contributors}}\n"
89
+ end
90
+ end
91
+ end
92
+
93
+ if FILE_OUTPUT
94
+ File.open('codeowners.txt', 'w') do |f|
95
+ f.puts output
96
+ end
97
+ else
98
+ puts output
99
+ end
100
+ end
101
+
102
+ def find_owner(file:)
103
+ codeowners = read_codeowners_file
104
+ find_owners(file, codeowners)
105
+ end
106
+
107
+ def count_big_files(directory_path, size: BIG_FILE_SIZE)
108
+ size = size.to_i
109
+ # Get a list of all files in the specified directory
110
+ files = Dir.glob(File.join(directory_path, '**', '*')).select { |file| File.file?(file) }
111
+
112
+ code_files = files.select do |f|
113
+ extension = File.extname(f)
114
+ valid_extensions = CODE_EXTENSIONS
115
+ valid_extensions.include?(extension)
116
+ end
117
+
118
+ # Initialize a counter for files that meet the criteria
119
+ count = 0
120
+ # Iterate through each file and check the line count
121
+ code_files.each do |file|
122
+ lines_count = File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count
123
+
124
+ count += 1 if lines_count > size
125
+ end
126
+
127
+ puts " *Current total number of code files longer than #{size} lines:* #{count}"
128
+ end
129
+
130
+ def count_hotspot_lines(files)
131
+ code_files = files.select do |f|
132
+ extension = File.extname(f)
133
+ valid_extensions = CODE_EXTENSIONS
134
+ valid_extensions.include?(extension)
135
+ end
136
+
137
+ count = 0
138
+
139
+ code_files.each do |file|
140
+ lines_count = File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count
141
+
142
+ count += lines_count
143
+ end
144
+
145
+ puts " *Total lines of hotspot code:* #{count}"
146
+ end
147
+
148
+ def filter_existing_code_files(files)
149
+ files.select do |f|
150
+ next unless File.exist?(f)
151
+
152
+ if EXCLUDED_FILES
153
+ excluded_patterns = EXCLUDED_FILES.split(',')
154
+ next if excluded_patterns.any? { |pattern| f.include?(pattern) }
155
+ end
156
+
157
+ extension = File.extname(f)
158
+ valid_extensions = CODE_EXTENSIONS
159
+ valid_extensions.include?(extension)
160
+ end
161
+ end
162
+
163
+ def git_files(directory_path:)
164
+ `git ls-tree -r --name-only $(git rev-list -1 HEAD) -- "#{directory_path}"`
165
+ end
166
+
167
+ def files_with_changes(directory_path:, start_date:, end_date:)
168
+ `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`
169
+ end
170
+
171
+ def git_commit_count(file:, start_date:, end_date:)
172
+ `git log --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}" | grep -c '^commit'`
173
+ end
174
+
175
+ def git_commit_info(file:, start_date:, end_date:)
176
+ `git log --pretty=format:"%s" --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}"`
177
+ end
178
+
179
+ def analyze_changed_files(uniq_code_files_with_changes:, start_date:, end_date:)
180
+ all_teams = []
181
+ cross_teams_count = 0
182
+ single_ownership_teams_count = 0
183
+ files_changed_by_many_teams = 0
184
+ total_changes = 0
185
+ file_team_map = {}
186
+ uniq_code_files_with_changes.each do |file|
187
+ filename = File.basename(file)
188
+ commit_count = git_commit_count(file:, start_date:, end_date:).to_i
189
+ git_log = git_commit_info(file:, start_date:, end_date:).split("\n")
190
+ teams = git_log.map do |team|
191
+ team.match(/#{TEAM_REGEX}/)[0].upcase
192
+ end.reject { |e| EXCLUSIONS&.include?(e) }
193
+
194
+ total_changes += commit_count
195
+ all_teams << teams
196
+ teams = teams.uniq
197
+
198
+ if teams.count > 1
199
+ files_changed_by_many_teams += 1
200
+ file_team_map.merge!(file.to_s => [teams, commit_count])
201
+ cross_teams_count += teams.count
202
+ else
203
+ single_ownership_teams_count += 1
204
+ end
205
+
206
+ puts "\n#{filename} [#{commit_count}]:#{teams}\n" if @debug
207
+ end
208
+ [all_teams, cross_teams_count, single_ownership_teams_count, files_changed_by_many_teams, total_changes, file_team_map]
209
+ end
210
+
211
+ def filter_files(file_team_map:)
212
+ file_team_map.select do |file_path|
213
+ next unless File.exist?(file_path)
214
+
215
+ # Check if the file size is more than BIG_FILE_SIZE lines (excluding empty and commented lines)
216
+ File.foreach(file_path).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count > BIG_FILE_SIZE.to_i
217
+ end
218
+ end
219
+
220
+ def contribution_message
221
+ duration_in_days = @duration_in_days.to_i
222
+ start_date = @begin_time.to_time.to_i - duration_in_days * 86_400 - 30 * 86_400
223
+ end_date = @begin_time.to_time.to_i - 30 * 86_400
224
+ git_ls = git_files(directory_path: @directory_path)
225
+ file_count = filter_existing_code_files(git_ls.split).count
226
+ all_files_with_changes = files_with_changes(directory_path: @directory_path, start_date:, end_date:).split.sort
227
+ code_files_with_changes = filter_existing_code_files(all_files_with_changes)
228
+ uniq_code_files_with_changes = code_files_with_changes.uniq
229
+ all_teams, cross_teams_count, single_ownership_teams_count, files_changed_by_many_teams, total_changes, file_team_map = analyze_changed_files(uniq_code_files_with_changes:, start_date:, end_date:)
230
+ occurrences = all_teams.flatten.compact.tally
231
+ sorted_occurrences = occurrences.sort_by { |element, count| [-count, element] }
232
+ contributors = Hash[sorted_occurrences]
233
+ churn_count = file_team_map.values.map { |value| value[1] }.sum
234
+ hotspot_changes_percentage = (churn_count.to_f / total_changes) * 100
235
+ # Filter files based on extension, existence and size
236
+ filtered_files = filter_files(file_team_map:)
237
+ filtered_top_touched_files = filtered_files.sort_by { |element, count| [-count.last, element] }
238
+
239
+ puts ''
240
+ puts "*Timeframe:* #{(@begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{@begin_time.strftime('%Y-%m-%d')}"
241
+ puts " *Code files with a single contributor:* #{(100 - ((files_changed_by_many_teams.to_f / file_count) * 100)).round(2)}%"
242
+ puts " *Existing files changed by many teams:* #{files_changed_by_many_teams}"
243
+ puts " *Current existing #{CODE_EXTENSIONS} files:* #{file_count}"
244
+ puts ' *Cross-Squad Dependency:*'
245
+ puts " *Contributions by multiple squads to the same files:* #{cross_teams_count}"
246
+ puts " *Contributions by single squads contributing to single files:* #{single_ownership_teams_count}"
247
+ puts " *Hotspot Code Changes:* #{hotspot_changes_percentage.round(2)}%"
248
+ puts " *Churn count(commits to files by multiple teams):* #{churn_count}"
249
+ puts " *Total amount of commits:* #{total_changes}"
250
+ count_hotspot_lines(filtered_files.keys)
251
+ puts " *#{CODE_EXTENSIONS} files with multiple contributors:* #{file_team_map.count}"
252
+ puts " *#{CODE_EXTENSIONS} files exceeding #{BIG_FILE_SIZE} lines with multiple contributors:* #{filtered_top_touched_files.count}"
253
+ puts " *Total amount of commits to #{CODE_EXTENSIONS} files:* #{total_changes}"
254
+ puts " *Total #{CODE_EXTENSIONS} files changed:* #{uniq_code_files_with_changes.count}"
255
+ count_big_files(@directory_path)
256
+ puts " *Current total of #{CODE_EXTENSIONS} files in the folder:* #{file_count}"
257
+ puts " *Contributors:* #{contributors}"
258
+
259
+ if HOTSPOT
260
+ hotspot_output = "\n *Hotspot files(#{filtered_top_touched_files.count}):*\n"
261
+
262
+ filtered_top_touched_files.each do |line|
263
+ hotspot_output += "\n"
264
+ file = line.first
265
+ contributors = line.last.first
266
+ commits = line.last.last
267
+ hotspot_output += " #{file.gsub(@directory_path, '')} Contributors: #{contributors} Commits: #{commits} Owner: #{find_owner(file:)}\n"
268
+ end
269
+
270
+ if FILE_OUTPUT
271
+ File.open('hotspot.txt', 'w') do |f|
272
+ f.puts hotspot_output
273
+ end
274
+ else
275
+ puts hotspot_output
276
+ end
277
+ end
278
+
279
+ handle_codeowners(file_team_map:) if CODEOWNERS
280
+
281
+ @steps -= 1
282
+
283
+ return unless @steps.positive?
284
+
285
+ system("git checkout `git rev-list -1 --before='#{(@begin_time - duration_in_days).strftime('%B %d %Y')}' HEAD`",
286
+ %i[out err] => File::NULL)
287
+ @begin_time -= duration_in_days
288
+ contribution_message
289
+ end
290
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class GithubRepoStatistics
4
+ VERSION = '2.0.8'
5
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'github_repo_statistics/version'
4
+
5
+ class GithubRepoStatistics
6
+ class Error < StandardError; end
7
+ # Your code goes here...
8
+ end
@@ -0,0 +1,4 @@
1
+ class GithubRepoStatistics
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
File without changes
@@ -0,0 +1,333 @@
1
+ line
2
+ line
3
+ line
4
+ line
5
+ line
6
+ line
7
+ line
8
+ line
9
+ line
10
+ line
11
+ line
12
+ line
13
+ line
14
+ line
15
+ line
16
+ line
17
+ line
18
+ line
19
+ line
20
+ line
21
+ line
22
+ line
23
+ line
24
+ line
25
+ line
26
+ line
27
+ line
28
+ line
29
+ line
30
+ line
31
+ line
32
+ line
33
+ line
34
+ line
35
+ line
36
+ line
37
+ line
38
+ line
39
+ line
40
+ line
41
+ line
42
+ line
43
+ line
44
+ line
45
+ line
46
+ line
47
+ line
48
+ line
49
+ line
50
+ line
51
+ line
52
+ line
53
+ line
54
+ line
55
+ line
56
+ line
57
+ line
58
+ line
59
+ line
60
+ line
61
+ line
62
+ line
63
+ line
64
+ line
65
+ line
66
+ line
67
+ line
68
+ line
69
+ line
70
+ line
71
+ line
72
+ line
73
+ line
74
+ line
75
+ line
76
+ line
77
+ line
78
+ line
79
+ line
80
+ line
81
+ line
82
+ line
83
+ line
84
+ line
85
+ line
86
+ line
87
+ line
88
+ line
89
+ line
90
+ line
91
+ line
92
+ line
93
+ line
94
+ line
95
+ line
96
+ line
97
+ line
98
+ line
99
+ line
100
+ line
101
+ line
102
+ line
103
+ line
104
+ line
105
+ line
106
+ line
107
+ line
108
+ line
109
+ line
110
+ line
111
+ line
112
+ line
113
+ line
114
+ line
115
+ line
116
+ line
117
+ line
118
+ line
119
+ line
120
+ line
121
+ line
122
+ line
123
+ line
124
+ line
125
+ line
126
+ line
127
+ line
128
+ line
129
+ line
130
+ line
131
+ line
132
+ line
133
+ line
134
+ line
135
+ line
136
+ line
137
+ line
138
+ line
139
+ line
140
+ line
141
+ line
142
+ line
143
+ line
144
+ line
145
+ line
146
+ line
147
+ line
148
+ line
149
+ line
150
+ line
151
+ line
152
+ line
153
+ line
154
+ line
155
+ line
156
+ line
157
+ line
158
+ line
159
+ line
160
+ line
161
+ line
162
+ line
163
+ line
164
+ line
165
+ line
166
+ line
167
+ line
168
+ line
169
+ line
170
+ line
171
+ line
172
+ line
173
+ line
174
+ line
175
+ line
176
+ line
177
+ line
178
+ line
179
+ line
180
+ line
181
+ line
182
+ line
183
+ line
184
+ line
185
+ line
186
+ line
187
+ line
188
+ line
189
+ line
190
+ line
191
+ line
192
+ line
193
+ line
194
+ line
195
+ line
196
+ line
197
+ line
198
+ line
199
+ line
200
+ line
201
+ line
202
+ line
203
+ line
204
+ line
205
+ line
206
+ line
207
+ line
208
+ line
209
+ line
210
+ line
211
+ line
212
+ line
213
+ line
214
+ line
215
+ line
216
+ line
217
+ line
218
+ line
219
+ line
220
+ line
221
+ line
222
+ line
223
+ line
224
+ line
225
+ line
226
+ line
227
+ line
228
+ line
229
+ line
230
+ line
231
+ line
232
+ line
233
+ line
234
+ line
235
+ line
236
+ line
237
+ line
238
+ line
239
+ line
240
+ line
241
+ line
242
+ line
243
+ line
244
+ line
245
+ line
246
+ line
247
+ line
248
+ line
249
+ line
250
+ line
251
+ line
252
+ line
253
+ line
254
+ line
255
+ line
256
+ line
257
+ line
258
+ line
259
+ line
260
+ line
261
+ line
262
+ line
263
+ line
264
+ line
265
+ line
266
+ line
267
+ line
268
+ line
269
+ line
270
+ line
271
+ line
272
+ line
273
+ line
274
+ line
275
+ line
276
+ line
277
+ line
278
+ line
279
+ line
280
+ line
281
+ line
282
+ line
283
+ line
284
+ line
285
+ line
286
+ line
287
+ line
288
+ line
289
+ line
290
+ line
291
+ line
292
+ line
293
+ line
294
+ line
295
+ line
296
+ line
297
+ line
298
+ line
299
+ line
300
+ line
301
+ line
302
+ line
303
+ line
304
+ line
305
+ line
306
+ line
307
+ line
308
+ line
309
+ line
310
+ line
311
+ line
312
+ line
313
+ line
314
+ line
315
+ line
316
+ line
317
+ line
318
+ line
319
+ line
320
+ line
321
+ line
322
+ // comment
323
+
324
+
325
+ line
326
+ line
327
+ line
328
+ line
329
+ line
330
+ line
331
+ line
332
+ line
333
+ line