github_repo_statistics 2.3.5 → 2.3.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 538bcf5774623f5443c771c731035806d40bc7ecced742899a95168dca809094
4
- data.tar.gz: '063853f3714c6a18e70d9eb936e88d2ef7c592203936412f6f9981960ae5db7f'
3
+ metadata.gz: 722258cfcbda685136e97c5b844f38b4da2937dbf3693b5131c628aad5131672
4
+ data.tar.gz: e0377225ed1e088b3eef9ff283ebf254225b36546eae619378f28df4f64cd800
5
5
  SHA512:
6
- metadata.gz: 7073aca1430b973a1a4ef07be6436843d532539bcd0aa5e12caaa22cf6b9c2e6152538d366934e4436aae95d3a64a6fecbe408ad5c9770e1ca6dda3a3aadaafe
7
- data.tar.gz: 603ecfda33d86d85f8c41a4ea40860830c6e2311b37cb4e8c31d9d58b1a80fd1204c961a2b0326329df42298a9a7b95dea7a05d39da711792f2ccd4119ad450c
6
+ metadata.gz: 37cc51e5dd9c5c40ee4b816fcbfa39e0c288fe6fa17c298890e6f6dbc508923dd094f2bd75969bc906695c69a6e5bec6d36ccdb380b8c1d9a455247b86ce2af4
7
+ data.tar.gz: 355b86a47bb3f55ac5426256a840e3e4a77320a093524f77c93627d73b08c53df3762a5c9c8bab34992f182ba7a74575806442933855858a4f17d245d3986cf2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- github_repo_statistics (2.3.5)
4
+ github_repo_statistics (2.3.7)
5
5
  date
6
6
  faraday-retry
7
7
  google-cloud-bigquery
@@ -78,6 +78,16 @@ OptionParser.new do |opts|
78
78
  options[:code_extension] = code_extension
79
79
  end
80
80
 
81
+ opts.on('--team-to-focus STRING',
82
+ 'The team identifier to focus for the metric collection [default: ""]') do |team_to_focus|
83
+ options[:team_to_focus] = team_to_focus
84
+ end
85
+
86
+ opts.on('--codeowner-to-focus STRING',
87
+ 'The codeowner identifier to focus for the metric collection [default: ""]') do |codeowner_to_focus|
88
+ options[:codeowner_to_focus] = codeowner_to_focus
89
+ end
90
+
81
91
  opts.on('--output-to-files',
82
92
  'Puts the output for hotspot and codeowners into files instead of the STDOUT (useful for CI and big amount of data) [default:false]') do
83
93
  options[:file_output] = true
@@ -112,6 +122,8 @@ FILE_OUTPUT = options[:file_output] || false
112
122
  CODE_EXTENSIONS = options[:code_extension] ? options[:code_extension].split(',') : ['.swift', '.kt']
113
123
  EXCLUDED_FILES = options[:excluded_files]
114
124
  EXCLUDED_PRS = options[:excluded_prs]
125
+ TEAM_TO_FOCUS = options[:team_to_focus]
126
+ CODEOWNER_TO_FOCUS = options[:codeowner_to_focus]
115
127
 
116
128
  unless CI
117
129
  puts "\nDirectory: #{REPO_PATH}\n"
@@ -150,10 +150,24 @@ class GithubRepoStatistics
150
150
  File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count
151
151
  end
152
152
 
153
- def filter_existing_code_files(files)
153
+ def filter_existing_code_files(files, start_date, end_date)
154
154
  files.select do |f|
155
155
  next unless File.exist?(f)
156
156
 
157
+ git_log = git_commit_info(file: f, start_date:, end_date:).split("\n")
158
+
159
+ teams = git_log.map do |team|
160
+ team.match(/#{TEAM_REGEX}/)[0].upcase
161
+ end.reject { |e| EXCLUSIONS&.include?(e) }
162
+
163
+ if TEAM_TO_FOCUS && CODEOWNER_TO_FOCUS
164
+ next if !teams.include?(TEAM_TO_FOCUS) && !find_owner(file: f).include?(CODEOWNER_TO_FOCUS)
165
+ elsif TEAM_TO_FOCUS
166
+ next unless teams.include?(TEAM_TO_FOCUS)
167
+ elsif CODEOWNER_TO_FOCUS
168
+ next unless find_owner(file: f).include?(CODEOWNER_TO_FOCUS)
169
+ end
170
+
157
171
  if EXCLUDED_FILES
158
172
  excluded_patterns = EXCLUDED_FILES.split(',')
159
173
  next if excluded_patterns.any? { |pattern| f.include?(pattern) }
@@ -213,9 +227,10 @@ class GithubRepoStatistics
213
227
  team.match(/#{TEAM_REGEX}/)[0].upcase
214
228
  end.reject { |e| EXCLUSIONS&.include?(e) }
215
229
 
230
+ teams = calculate_percentile(teams, 90)
231
+
216
232
  total_changes += commit_count
217
233
  all_teams << teams
218
- teams = teams.uniq
219
234
 
220
235
  if teams.count > 1
221
236
  files_changed_by_many_teams += 1
@@ -231,6 +246,30 @@ class GithubRepoStatistics
231
246
  file_team_map]
232
247
  end
233
248
 
249
+ def calculate_percentile(arr, percentile)
250
+ # Count occurrences of each unique element
251
+ counts = arr.each_with_object(Hash.new(0)) { |item, hash| hash[item] += 1 }
252
+
253
+ # Sort elements by their counts in descending order
254
+ sorted_counts = counts.sort_by { |k, v| -v }.to_h
255
+
256
+ # Calculate the cut-off for the percentile
257
+ total_count = arr.size
258
+ cutoff = total_count * (percentile / 100.0)
259
+
260
+ # Select elements that meet the percentile criteria
261
+ selected_elements = []
262
+ cumulative_count = 0
263
+
264
+ sorted_counts.each do |item, count|
265
+ cumulative_count += count
266
+ selected_elements << item
267
+ break if cumulative_count >= cutoff
268
+ end
269
+
270
+ selected_elements
271
+ end
272
+
234
273
  def filter_files(file_team_map:, size: BIG_FILE_SIZE)
235
274
  file_team_map.select do |file_path|
236
275
  next unless File.exist?(file_path)
@@ -245,10 +284,11 @@ class GithubRepoStatistics
245
284
  start_date = @begin_time.to_time.to_i - duration_in_days * 86_400
246
285
  end_date = @begin_time.to_time.to_i
247
286
  git_ls = git_files(directory_path: @directory_path)
248
- file_count = filter_existing_code_files(git_ls.split).count
287
+ file_count = filter_existing_code_files(git_ls.split, start_date, end_date).count
249
288
  all_files_with_changes = files_with_changes(directory_path: @directory_path, start_date:, end_date:).split.sort
250
- code_files_with_changes = filter_existing_code_files(all_files_with_changes)
289
+ code_files_with_changes = filter_existing_code_files(all_files_with_changes, start_date, end_date)
251
290
  uniq_code_files_with_changes = code_files_with_changes.uniq
291
+
252
292
  all_teams, cross_teams_count, single_ownership_teams_count, files_changed_by_many_teams, total_changes, file_team_map = analyze_changed_files(
253
293
  uniq_code_files_with_changes:, start_date:, end_date:
254
294
  )
@@ -20,7 +20,7 @@ class ReleaseMergeReport
20
20
  puts "#{branch}: #{count}"
21
21
  end
22
22
 
23
- # ENV['BQ_CREDENTIALS'] = `cat /Users/serghei.moret/.config/gcloud/application_default_credentials.json`
23
+ ENV['BQ_CREDENTIALS'] = `cat /Users/serghei.moret/.config/gcloud/application_default_credentials.json`
24
24
 
25
25
  export_to_bigquery(grouped_branch_counts) if ENV['BQ_CREDENTIALS']
26
26
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class GithubRepoStatistics
4
- VERSION = '2.3.5'
4
+ VERSION = '2.3.7'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: github_repo_statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.5
4
+ version: 2.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Serghei Moret
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-22 00:00:00.000000000 Z
11
+ date: 2024-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: date