github_repo_statistics 2.3.5 → 2.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 538bcf5774623f5443c771c731035806d40bc7ecced742899a95168dca809094
4
- data.tar.gz: '063853f3714c6a18e70d9eb936e88d2ef7c592203936412f6f9981960ae5db7f'
3
+ metadata.gz: 722258cfcbda685136e97c5b844f38b4da2937dbf3693b5131c628aad5131672
4
+ data.tar.gz: e0377225ed1e088b3eef9ff283ebf254225b36546eae619378f28df4f64cd800
5
5
  SHA512:
6
- metadata.gz: 7073aca1430b973a1a4ef07be6436843d532539bcd0aa5e12caaa22cf6b9c2e6152538d366934e4436aae95d3a64a6fecbe408ad5c9770e1ca6dda3a3aadaafe
7
- data.tar.gz: 603ecfda33d86d85f8c41a4ea40860830c6e2311b37cb4e8c31d9d58b1a80fd1204c961a2b0326329df42298a9a7b95dea7a05d39da711792f2ccd4119ad450c
6
+ metadata.gz: 37cc51e5dd9c5c40ee4b816fcbfa39e0c288fe6fa17c298890e6f6dbc508923dd094f2bd75969bc906695c69a6e5bec6d36ccdb380b8c1d9a455247b86ce2af4
7
+ data.tar.gz: 355b86a47bb3f55ac5426256a840e3e4a77320a093524f77c93627d73b08c53df3762a5c9c8bab34992f182ba7a74575806442933855858a4f17d245d3986cf2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- github_repo_statistics (2.3.5)
4
+ github_repo_statistics (2.3.7)
5
5
  date
6
6
  faraday-retry
7
7
  google-cloud-bigquery
@@ -78,6 +78,16 @@ OptionParser.new do |opts|
78
78
  options[:code_extension] = code_extension
79
79
  end
80
80
 
81
+ opts.on('--team-to-focus STRING',
82
+ 'The team identifier to focus for the metric collection [default: ""]') do |team_to_focus|
83
+ options[:team_to_focus] = team_to_focus
84
+ end
85
+
86
+ opts.on('--codeowner-to-focus STRING',
87
+ 'The codeowner identifier to focus for the metric collection [default: ""]') do |codeowner_to_focus|
88
+ options[:codeowner_to_focus] = codeowner_to_focus
89
+ end
90
+
81
91
  opts.on('--output-to-files',
82
92
  'Puts the output for hotspot and codeowners into files instead of the STDOUT (useful for CI and big amount of data) [default:false]') do
83
93
  options[:file_output] = true
@@ -112,6 +122,8 @@ FILE_OUTPUT = options[:file_output] || false
112
122
  CODE_EXTENSIONS = options[:code_extension] ? options[:code_extension].split(',') : ['.swift', '.kt']
113
123
  EXCLUDED_FILES = options[:excluded_files]
114
124
  EXCLUDED_PRS = options[:excluded_prs]
125
+ TEAM_TO_FOCUS = options[:team_to_focus]
126
+ CODEOWNER_TO_FOCUS = options[:codeowner_to_focus]
115
127
 
116
128
  unless CI
117
129
  puts "\nDirectory: #{REPO_PATH}\n"
@@ -150,10 +150,24 @@ class GithubRepoStatistics
150
150
  File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count
151
151
  end
152
152
 
153
- def filter_existing_code_files(files)
153
+ def filter_existing_code_files(files, start_date, end_date)
154
154
  files.select do |f|
155
155
  next unless File.exist?(f)
156
156
 
157
+ git_log = git_commit_info(file: f, start_date:, end_date:).split("\n")
158
+
159
+ teams = git_log.map do |team|
160
+ team.match(/#{TEAM_REGEX}/)[0].upcase
161
+ end.reject { |e| EXCLUSIONS&.include?(e) }
162
+
163
+ if TEAM_TO_FOCUS && CODEOWNER_TO_FOCUS
164
+ next if !teams.include?(TEAM_TO_FOCUS) && !find_owner(file: f).include?(CODEOWNER_TO_FOCUS)
165
+ elsif TEAM_TO_FOCUS
166
+ next unless teams.include?(TEAM_TO_FOCUS)
167
+ elsif CODEOWNER_TO_FOCUS
168
+ next unless find_owner(file: f).include?(CODEOWNER_TO_FOCUS)
169
+ end
170
+
157
171
  if EXCLUDED_FILES
158
172
  excluded_patterns = EXCLUDED_FILES.split(',')
159
173
  next if excluded_patterns.any? { |pattern| f.include?(pattern) }
@@ -213,9 +227,10 @@ class GithubRepoStatistics
213
227
  team.match(/#{TEAM_REGEX}/)[0].upcase
214
228
  end.reject { |e| EXCLUSIONS&.include?(e) }
215
229
 
230
+ teams = calculate_percentile(teams, 90)
231
+
216
232
  total_changes += commit_count
217
233
  all_teams << teams
218
- teams = teams.uniq
219
234
 
220
235
  if teams.count > 1
221
236
  files_changed_by_many_teams += 1
@@ -231,6 +246,30 @@ class GithubRepoStatistics
231
246
  file_team_map]
232
247
  end
233
248
 
249
+ def calculate_percentile(arr, percentile)
250
+ # Count occurrences of each unique element
251
+ counts = arr.each_with_object(Hash.new(0)) { |item, hash| hash[item] += 1 }
252
+
253
+ # Sort elements by their counts in descending order
254
+ sorted_counts = counts.sort_by { |k, v| -v }.to_h
255
+
256
+ # Calculate the cut-off for the percentile
257
+ total_count = arr.size
258
+ cutoff = total_count * (percentile / 100.0)
259
+
260
+ # Select elements that meet the percentile criteria
261
+ selected_elements = []
262
+ cumulative_count = 0
263
+
264
+ sorted_counts.each do |item, count|
265
+ cumulative_count += count
266
+ selected_elements << item
267
+ break if cumulative_count >= cutoff
268
+ end
269
+
270
+ selected_elements
271
+ end
272
+
234
273
  def filter_files(file_team_map:, size: BIG_FILE_SIZE)
235
274
  file_team_map.select do |file_path|
236
275
  next unless File.exist?(file_path)
@@ -245,10 +284,11 @@ class GithubRepoStatistics
245
284
  start_date = @begin_time.to_time.to_i - duration_in_days * 86_400
246
285
  end_date = @begin_time.to_time.to_i
247
286
  git_ls = git_files(directory_path: @directory_path)
248
- file_count = filter_existing_code_files(git_ls.split).count
287
+ file_count = filter_existing_code_files(git_ls.split, start_date, end_date).count
249
288
  all_files_with_changes = files_with_changes(directory_path: @directory_path, start_date:, end_date:).split.sort
250
- code_files_with_changes = filter_existing_code_files(all_files_with_changes)
289
+ code_files_with_changes = filter_existing_code_files(all_files_with_changes, start_date, end_date)
251
290
  uniq_code_files_with_changes = code_files_with_changes.uniq
291
+
252
292
  all_teams, cross_teams_count, single_ownership_teams_count, files_changed_by_many_teams, total_changes, file_team_map = analyze_changed_files(
253
293
  uniq_code_files_with_changes:, start_date:, end_date:
254
294
  )
@@ -20,7 +20,7 @@ class ReleaseMergeReport
20
20
  puts "#{branch}: #{count}"
21
21
  end
22
22
 
23
- # ENV['BQ_CREDENTIALS'] = `cat /Users/serghei.moret/.config/gcloud/application_default_credentials.json`
23
+ ENV['BQ_CREDENTIALS'] = `cat /Users/serghei.moret/.config/gcloud/application_default_credentials.json`
24
24
 
25
25
  export_to_bigquery(grouped_branch_counts) if ENV['BQ_CREDENTIALS']
26
26
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class GithubRepoStatistics
4
- VERSION = '2.3.5'
4
+ VERSION = '2.3.7'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: github_repo_statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.5
4
+ version: 2.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Serghei Moret
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-22 00:00:00.000000000 Z
11
+ date: 2024-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: date