github_repo_statistics 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,290 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pry'
4
+ require 'date'
5
+
6
+ class GithubRepoStatistics
7
+ def initialize(directory_path:, duration_in_days:, begin_time:, debug: nil, steps: 1)
8
+ @directory_path = directory_path
9
+ @duration_in_days = duration_in_days
10
+ @begin_time = begin_time
11
+ @debug = debug
12
+ @steps = steps
13
+ end
14
+
15
+ def true?(obj)
16
+ obj.to_s.downcase == 'true'
17
+ end
18
+
19
+ def read_codeowners_file
20
+ raise "CODEOWNERS file does not exist under #{CODEOWNERS_PATH}" unless File.exist?(CODEOWNERS_PATH)
21
+
22
+ codeowners = {}
23
+ File.readlines(CODEOWNERS_PATH).each do |line|
24
+ next if line.strip.empty? || line.start_with?('#') # Skip comments and empty lines
25
+
26
+ parts = line.split(/\s+/)
27
+ directory_pattern = parts[0]
28
+ owner = parts[1..].map { |o| o.start_with?('@') ? o[1..] : o }.join(' ') # Remove leading '@' from team names
29
+ codeowners[directory_pattern] = owner
30
+ end
31
+ codeowners
32
+ end
33
+
34
+ def find_owners(file_path, codeowners)
35
+ matching_patterns = codeowners.keys.select do |pattern|
36
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**',
37
+ '.*?')}")
38
+ file_path =~ pattern_regex
39
+ end
40
+
41
+ return ['unknown'] if matching_patterns.empty?
42
+
43
+ # Sort patterns by length in descending order
44
+ sorted_patterns = matching_patterns.sort_by(&:length).reverse
45
+
46
+ # Find the most specific matching pattern
47
+ best_match = sorted_patterns.find do |pattern|
48
+ pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**',
49
+ '.*?')}")
50
+ file_path =~ pattern_regex
51
+ end
52
+
53
+ codeowners[best_match].split(' ')
54
+ end
55
+
56
+ def handle_codeowners(file_team_map:)
57
+ output = "\n *Code ownership data:*\n"
58
+ codeowners = read_codeowners_file
59
+
60
+ owners_data = Hash.new do |hash, key|
61
+ hash[key] = { directories: Hash.new do |h, k|
62
+ h[k] = { files: [] }
63
+ end, churn_count: 0 }
64
+ end
65
+
66
+ file_team_map.each do |file, count|
67
+ owners = find_owners(file, codeowners)
68
+ owners.each do |owner|
69
+ owners_data[owner][:churn_count] += count.last
70
+
71
+ dir_path = File.dirname(file)
72
+ owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: }
73
+ end
74
+ end
75
+
76
+ # Sort owners_data by total count in descending order
77
+ sorted_owners_data = owners_data.sort_by { |_, data| -data[:churn_count] }
78
+ converted_team_map = file_team_map.transform_keys { |key| File.basename(key) }
79
+
80
+ sorted_owners_data.each do |owner, data|
81
+ output += "\n #{owner.split('/').last}:\n Total Count: #{data[:churn_count]}\n"
82
+ data[:directories].each do |dir, dir_data|
83
+ output += " Directory: #{dir}\n Top files:\n"
84
+ dir_data[:files].each do |file_data|
85
+ next if converted_team_map[File.basename(file_data[:name])].nil?
86
+
87
+ contributors = converted_team_map[file_data[:name]]&.first&.empty? ? ['Excluded contributor'] : converted_team_map[file_data[:name]].first
88
+ output += " #{File.basename(file_data[:name])} - #{file_data[:count].last} #{contributors}}\n"
89
+ end
90
+ end
91
+ end
92
+
93
+ if FILE_OUTPUT
94
+ File.open('codeowners.txt', 'w') do |f|
95
+ f.puts output
96
+ end
97
+ else
98
+ puts output
99
+ end
100
+ end
101
+
102
+ def find_owner(file:)
103
+ codeowners = read_codeowners_file
104
+ find_owners(file, codeowners)
105
+ end
106
+
107
+ def count_big_files(directory_path, size: BIG_FILE_SIZE)
108
+ size = size.to_i
109
+ # Get a list of all files in the specified directory
110
+ files = Dir.glob(File.join(directory_path, '**', '*')).select { |file| File.file?(file) }
111
+
112
+ code_files = files.select do |f|
113
+ extension = File.extname(f)
114
+ valid_extensions = CODE_EXTENSIONS
115
+ valid_extensions.include?(extension)
116
+ end
117
+
118
+ # Initialize a counter for files that meet the criteria
119
+ count = 0
120
+ # Iterate through each file and check the line count
121
+ code_files.each do |file|
122
+ lines_count = File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count
123
+
124
+ count += 1 if lines_count > size
125
+ end
126
+
127
+ puts " *Current total number of code files longer than #{size} lines:* #{count}"
128
+ end
129
+
130
+ def count_hotspot_lines(files)
131
+ code_files = files.select do |f|
132
+ extension = File.extname(f)
133
+ valid_extensions = CODE_EXTENSIONS
134
+ valid_extensions.include?(extension)
135
+ end
136
+
137
+ count = 0
138
+
139
+ code_files.each do |file|
140
+ lines_count = File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count
141
+
142
+ count += lines_count
143
+ end
144
+
145
+ puts " *Total lines of hotspot code:* #{count}"
146
+ end
147
+
148
+ def filter_existing_code_files(files)
149
+ files.select do |f|
150
+ next unless File.exist?(f)
151
+
152
+ if EXCLUDED_FILES
153
+ excluded_patterns = EXCLUDED_FILES.split(',')
154
+ next if excluded_patterns.any? { |pattern| f.include?(pattern) }
155
+ end
156
+
157
+ extension = File.extname(f)
158
+ valid_extensions = CODE_EXTENSIONS
159
+ valid_extensions.include?(extension)
160
+ end
161
+ end
162
+
163
+ def git_files(directory_path:)
164
+ `git ls-tree -r --name-only $(git rev-list -1 HEAD) -- "#{directory_path}"`
165
+ end
166
+
167
+ def files_with_changes(directory_path:, start_date:, end_date:)
168
+ `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`
169
+ end
170
+
171
+ def git_commit_count(file:, start_date:, end_date:)
172
+ `git log --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}" | grep -c '^commit'`
173
+ end
174
+
175
+ def git_commit_info(file:, start_date:, end_date:)
176
+ `git log --pretty=format:"%s" --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}"`
177
+ end
178
+
179
+ def analyze_changed_files(uniq_code_files_with_changes:, start_date:, end_date:)
180
+ all_teams = []
181
+ cross_teams_count = 0
182
+ single_ownership_teams_count = 0
183
+ files_changed_by_many_teams = 0
184
+ total_changes = 0
185
+ file_team_map = {}
186
+ uniq_code_files_with_changes.each do |file|
187
+ filename = File.basename(file)
188
+ commit_count = git_commit_count(file:, start_date:, end_date:).to_i
189
+ git_log = git_commit_info(file:, start_date:, end_date:).split("\n")
190
+ teams = git_log.map do |team|
191
+ team.match(/#{TEAM_REGEX}/)[0].upcase
192
+ end.reject { |e| EXCLUSIONS&.include?(e) }
193
+
194
+ total_changes += commit_count
195
+ all_teams << teams
196
+ teams = teams.uniq
197
+
198
+ if teams.count > 1
199
+ files_changed_by_many_teams += 1
200
+ file_team_map.merge!(file.to_s => [teams, commit_count])
201
+ cross_teams_count += teams.count
202
+ else
203
+ single_ownership_teams_count += 1
204
+ end
205
+
206
+ puts "\n#{filename} [#{commit_count}]:#{teams}\n" if @debug
207
+ end
208
+ [all_teams, cross_teams_count, single_ownership_teams_count, files_changed_by_many_teams, total_changes, file_team_map]
209
+ end
210
+
211
+ def filter_files(file_team_map:)
212
+ file_team_map.select do |file_path|
213
+ next unless File.exist?(file_path)
214
+
215
+ # Check if the file size is more than BIG_FILE_SIZE lines (excluding empty and commented lines)
216
+ File.foreach(file_path).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count > BIG_FILE_SIZE.to_i
217
+ end
218
+ end
219
+
220
+ def contribution_message
221
+ duration_in_days = @duration_in_days.to_i
222
+ start_date = @begin_time.to_time.to_i - duration_in_days * 86_400 - 30 * 86_400
223
+ end_date = @begin_time.to_time.to_i - 30 * 86_400
224
+ git_ls = git_files(directory_path: @directory_path)
225
+ file_count = filter_existing_code_files(git_ls.split).count
226
+ all_files_with_changes = files_with_changes(directory_path: @directory_path, start_date:, end_date:).split.sort
227
+ code_files_with_changes = filter_existing_code_files(all_files_with_changes)
228
+ uniq_code_files_with_changes = code_files_with_changes.uniq
229
+ all_teams, cross_teams_count, single_ownership_teams_count, files_changed_by_many_teams, total_changes, file_team_map = analyze_changed_files(uniq_code_files_with_changes:, start_date:, end_date:)
230
+ occurrences = all_teams.flatten.compact.tally
231
+ sorted_occurrences = occurrences.sort_by { |element, count| [-count, element] }
232
+ contributors = Hash[sorted_occurrences]
233
+ churn_count = file_team_map.values.map { |value| value[1] }.sum
234
+ hotspot_changes_percentage = (churn_count.to_f / total_changes) * 100
235
+ # Filter files based on extension, existence and size
236
+ filtered_files = filter_files(file_team_map:)
237
+ filtered_top_touched_files = filtered_files.sort_by { |element, count| [-count.last, element] }
238
+
239
+ puts ''
240
+ puts "*Timeframe:* #{(@begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{@begin_time.strftime('%Y-%m-%d')}"
241
+ puts " *Code files with a single contributor:* #{(100 - ((files_changed_by_many_teams.to_f / file_count) * 100)).round(2)}%"
242
+ puts " *Existing files changed by many teams:* #{files_changed_by_many_teams}"
243
+ puts " *Current existing #{CODE_EXTENSIONS} files:* #{file_count}"
244
+ puts ' *Cross-Squad Dependency:*'
245
+ puts " *Contributions by multiple squads to the same files:* #{cross_teams_count}"
246
+ puts " *Contributions by single squads contributing to single files:* #{single_ownership_teams_count}"
247
+ puts " *Hotspot Code Changes:* #{hotspot_changes_percentage.round(2)}%"
248
+ puts " *Churn count(commits to files by multiple teams):* #{churn_count}"
249
+ puts " *Total amount of commits:* #{total_changes}"
250
+ count_hotspot_lines(filtered_files.keys)
251
+ puts " *#{CODE_EXTENSIONS} files with multiple contributors:* #{file_team_map.count}"
252
+ puts " *#{CODE_EXTENSIONS} files exceeding #{BIG_FILE_SIZE} lines with multiple contributors:* #{filtered_top_touched_files.count}"
253
+ puts " *Total amount of commits to #{CODE_EXTENSIONS} files:* #{total_changes}"
254
+ puts " *Total #{CODE_EXTENSIONS} files changed:* #{uniq_code_files_with_changes.count}"
255
+ count_big_files(@directory_path)
256
+ puts " *Current total of #{CODE_EXTENSIONS} files in the folder:* #{file_count}"
257
+ puts " *Contributors:* #{contributors}"
258
+
259
+ if HOTSPOT
260
+ hotspot_output = "\n *Hotspot files(#{filtered_top_touched_files.count}):*\n"
261
+
262
+ filtered_top_touched_files.each do |line|
263
+ hotspot_output += "\n"
264
+ file = line.first
265
+ contributors = line.last.first
266
+ commits = line.last.last
267
+ hotspot_output += " #{file.gsub(@directory_path, '')} Contributors: #{contributors} Commits: #{commits} Owner: #{find_owner(file:)}\n"
268
+ end
269
+
270
+ if FILE_OUTPUT
271
+ File.open('hotspot.txt', 'w') do |f|
272
+ f.puts hotspot_output
273
+ end
274
+ else
275
+ puts hotspot_output
276
+ end
277
+ end
278
+
279
+ handle_codeowners(file_team_map:) if CODEOWNERS
280
+
281
+ @steps -= 1
282
+
283
+ return unless @steps.positive?
284
+
285
+ system("git checkout `git rev-list -1 --before='#{(@begin_time - duration_in_days).strftime('%B %d %Y')}' HEAD`",
286
+ %i[out err] => File::NULL)
287
+ @begin_time -= duration_in_days
288
+ contribution_message
289
+ end
290
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class GithubRepoStatistics
4
+ VERSION = '2.0.8'
5
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'github_repo_statistics/version'
4
+
5
+ class GithubRepoStatistics
6
+ class Error < StandardError; end
7
+ # Your code goes here...
8
+ end
@@ -0,0 +1,4 @@
1
+ class GithubRepoStatistics
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
File without changes
@@ -0,0 +1,333 @@
1
+ line
2
+ line
3
+ line
4
+ line
5
+ line
6
+ line
7
+ line
8
+ line
9
+ line
10
+ line
11
+ line
12
+ line
13
+ line
14
+ line
15
+ line
16
+ line
17
+ line
18
+ line
19
+ line
20
+ line
21
+ line
22
+ line
23
+ line
24
+ line
25
+ line
26
+ line
27
+ line
28
+ line
29
+ line
30
+ line
31
+ line
32
+ line
33
+ line
34
+ line
35
+ line
36
+ line
37
+ line
38
+ line
39
+ line
40
+ line
41
+ line
42
+ line
43
+ line
44
+ line
45
+ line
46
+ line
47
+ line
48
+ line
49
+ line
50
+ line
51
+ line
52
+ line
53
+ line
54
+ line
55
+ line
56
+ line
57
+ line
58
+ line
59
+ line
60
+ line
61
+ line
62
+ line
63
+ line
64
+ line
65
+ line
66
+ line
67
+ line
68
+ line
69
+ line
70
+ line
71
+ line
72
+ line
73
+ line
74
+ line
75
+ line
76
+ line
77
+ line
78
+ line
79
+ line
80
+ line
81
+ line
82
+ line
83
+ line
84
+ line
85
+ line
86
+ line
87
+ line
88
+ line
89
+ line
90
+ line
91
+ line
92
+ line
93
+ line
94
+ line
95
+ line
96
+ line
97
+ line
98
+ line
99
+ line
100
+ line
101
+ line
102
+ line
103
+ line
104
+ line
105
+ line
106
+ line
107
+ line
108
+ line
109
+ line
110
+ line
111
+ line
112
+ line
113
+ line
114
+ line
115
+ line
116
+ line
117
+ line
118
+ line
119
+ line
120
+ line
121
+ line
122
+ line
123
+ line
124
+ line
125
+ line
126
+ line
127
+ line
128
+ line
129
+ line
130
+ line
131
+ line
132
+ line
133
+ line
134
+ line
135
+ line
136
+ line
137
+ line
138
+ line
139
+ line
140
+ line
141
+ line
142
+ line
143
+ line
144
+ line
145
+ line
146
+ line
147
+ line
148
+ line
149
+ line
150
+ line
151
+ line
152
+ line
153
+ line
154
+ line
155
+ line
156
+ line
157
+ line
158
+ line
159
+ line
160
+ line
161
+ line
162
+ line
163
+ line
164
+ line
165
+ line
166
+ line
167
+ line
168
+ line
169
+ line
170
+ line
171
+ line
172
+ line
173
+ line
174
+ line
175
+ line
176
+ line
177
+ line
178
+ line
179
+ line
180
+ line
181
+ line
182
+ line
183
+ line
184
+ line
185
+ line
186
+ line
187
+ line
188
+ line
189
+ line
190
+ line
191
+ line
192
+ line
193
+ line
194
+ line
195
+ line
196
+ line
197
+ line
198
+ line
199
+ line
200
+ line
201
+ line
202
+ line
203
+ line
204
+ line
205
+ line
206
+ line
207
+ line
208
+ line
209
+ line
210
+ line
211
+ line
212
+ line
213
+ line
214
+ line
215
+ line
216
+ line
217
+ line
218
+ line
219
+ line
220
+ line
221
+ line
222
+ line
223
+ line
224
+ line
225
+ line
226
+ line
227
+ line
228
+ line
229
+ line
230
+ line
231
+ line
232
+ line
233
+ line
234
+ line
235
+ line
236
+ line
237
+ line
238
+ line
239
+ line
240
+ line
241
+ line
242
+ line
243
+ line
244
+ line
245
+ line
246
+ line
247
+ line
248
+ line
249
+ line
250
+ line
251
+ line
252
+ line
253
+ line
254
+ line
255
+ line
256
+ line
257
+ line
258
+ line
259
+ line
260
+ line
261
+ line
262
+ line
263
+ line
264
+ line
265
+ line
266
+ line
267
+ line
268
+ line
269
+ line
270
+ line
271
+ line
272
+ line
273
+ line
274
+ line
275
+ line
276
+ line
277
+ line
278
+ line
279
+ line
280
+ line
281
+ line
282
+ line
283
+ line
284
+ line
285
+ line
286
+ line
287
+ line
288
+ line
289
+ line
290
+ line
291
+ line
292
+ line
293
+ line
294
+ line
295
+ line
296
+ line
297
+ line
298
+ line
299
+ line
300
+ line
301
+ line
302
+ line
303
+ line
304
+ line
305
+ line
306
+ line
307
+ line
308
+ line
309
+ line
310
+ line
311
+ line
312
+ line
313
+ line
314
+ line
315
+ line
316
+ line
317
+ line
318
+ line
319
+ line
320
+ line
321
+ line
322
+ // comment
323
+
324
+
325
+ line
326
+ line
327
+ line
328
+ line
329
+ line
330
+ line
331
+ line
332
+ line
333
+ line