oss-stats 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1048 @@
1
+ require 'base64'
2
+ require 'date'
3
+ require 'deep_merge'
4
+ require 'octokit'
5
+ require 'optparse'
6
+ require 'set'
7
+ require 'yaml'
8
+
9
+ require_relative 'buildkite_client'
10
+ require_relative 'buildkite_token'
11
+ require_relative 'config/repo_stats'
12
+ require_relative 'github_token'
13
+ require_relative 'log'
14
+
15
+ module OssStats
16
+ module RepoStats
17
+ def rate_limited_sleep
18
+ limit_gh_ops_per_minute =
19
+ OssStats::Config::RepoStats.limit_gh_ops_per_minute
20
+ if limit_gh_ops_per_minute&.positive?
21
+ sleep_time = 60.0 / limit_gh_ops_per_minute
22
+ log.debug("Sleeping for #{sleep_time.round(2)}s to honor rate-limit")
23
+ sleep(sleep_time)
24
+ end
25
+ end
26
+
27
+ # Fetches and processes Pull Request and Issue statistics for a given
28
+ # repository from GitHub within a specified number of days.
29
+ #
30
+ # @param gh_client [Octokit::Client] The Octokit client for GitHub API
31
+ # interaction
32
+ # @param options [Hash] A hash containing options like :org, :repo, and
33
+ # :days
34
+ # @return [Hash] A hash containing processed PR and issue statistics and
35
+ # lists
36
+ def get_pr_and_issue_stats(gh_client, options)
37
+ repo = "#{options[:org]}/#{options[:repo]}"
38
+ cutoff_date = Date.today - options[:days]
39
+ pr_stats = {
40
+ open: 0,
41
+ closed: 0,
42
+ total_close_time: 0.0,
43
+ oldest_open: nil,
44
+ oldest_open_days: 0,
45
+ oldest_open_last_activity: 0,
46
+ stale_count: 0,
47
+ opened_this_period: 0,
48
+ }
49
+ issue_stats = {
50
+ open: 0,
51
+ closed: 0,
52
+ total_close_time: 0.0,
53
+ oldest_open: nil,
54
+ oldest_open_days: 0,
55
+ oldest_open_last_activity: 0,
56
+ stale_count: 0,
57
+ opened_this_period: 0,
58
+ }
59
+ prs = { open: [], closed: [] }
60
+ issues = { open: [], closed: [] }
61
+ stale_cutoff = Date.today - 30
62
+ page = 1
63
+
64
+ loop do
65
+ items = gh_client.issues(repo, state: 'all', per_page: 100, page:)
66
+ break if items.empty?
67
+
68
+ all_items_before_cutoff = true
69
+
70
+ items.each do |item|
71
+ created_date = item.created_at.to_date
72
+ closed_date = item.closed_at&.to_date
73
+ is_pr = !item.pull_request.nil?
74
+ last_comment_date = item.updated_at.to_date
75
+ labels = item.labels.map(&:name)
76
+ days_open = (Date.today - created_date).to_i
77
+ days_since_last_activity = (Date.today - last_comment_date).to_i
78
+
79
+ log.debug(
80
+ "Checking item: #{is_pr ? 'PR' : 'Issue'}, " +
81
+ "Created at #{created_date}, Closed at #{closed_date || 'N/A'}",
82
+ )
83
+
84
+ stats = is_pr ? pr_stats : issue_stats
85
+ list = is_pr ? prs : issues
86
+
87
+ # we count open as open and not waiting on contributor
88
+ if closed_date.nil? &&
89
+ !labels.include?('Status: Waiting on Contributor')
90
+ if stats[:oldest_open].nil? || created_date < stats[:oldest_open]
91
+ stats[:oldest_open] = created_date
92
+ stats[:oldest_open_days] = days_open
93
+ stats[:oldest_open_last_activity] = days_since_last_activity
94
+ end
95
+
96
+ stats[:stale_count] += 1 if last_comment_date < stale_cutoff
97
+ stats[:open] += 1
98
+ # Count those opened recently separately
99
+ if created_date >= cutoff_date
100
+ stats[:opened_this_period] += 1
101
+ list[:open] << item
102
+ all_items_before_cutoff = false
103
+ end
104
+ end
105
+
106
+ # Only count as closed if it was actually closed within the cutoff
107
+ # window
108
+ next unless closed_date && closed_date >= cutoff_date
109
+
110
+ # if it's a PR make sure it was closed by merging
111
+ next unless !is_pr || item.pull_request.merged_at
112
+
113
+ # anything closed this week counts as closed regardless of when it
114
+ # was opened
115
+ list[:closed] << item
116
+ stats[:closed] += 1
117
+ stats[:total_close_time] +=
118
+ (item.closed_at - item.created_at) / 3600.0
119
+ all_items_before_cutoff = false
120
+ end
121
+
122
+ page += 1
123
+ break if all_items_before_cutoff
124
+ end
125
+ pr_stats[:avg_time_to_close_hours] =
126
+ if pr_stats[:closed].zero?
127
+ 0
128
+ else
129
+ pr_stats[:total_close_time] / pr_stats[:closed]
130
+ end
131
+ issue_stats[:avg_time_to_close_hours] =
132
+ if issue_stats[:closed].zero?
133
+ 0
134
+ else
135
+ issue_stats[:total_close_time] / issue_stats[:closed]
136
+ end
137
+ { pr: pr_stats, issue: issue_stats, pr_list: prs, issue_list: issues }
138
+ end
139
+
140
+ def pipelines_from_readme(readme, bk_client)
141
+ pipelines = []
142
+ # Regex to find Buildkite badge markdown and capture the pipeline slug
143
+ # from the link URL. Example:
144
+ # [![Build Status](badge.svg)](https://buildkite.com/org/pipeline)
145
+ # Captures:
146
+ # 1: Full URL (https://buildkite.com/org/pipeline)
147
+ # 2: Org slug (org)
148
+ # 3: Pipeline slug (pipeline)
149
+ buildkite_badge_regex =
150
+ %r{\)\]\((https://buildkite\.com\/([^\/]+)\/([^\/\)]+))\)}
151
+ matches = readme.scan(buildkite_badge_regex)
152
+ if matches.length.zero?
153
+ log.debug('no BK pipelines found in readme')
154
+ return pipelines
155
+ end
156
+
157
+ matches.each do |match|
158
+ buildkite_org = match[1]
159
+ pipeline = match[2]
160
+ pk = bk_client.get_pipeline(buildkite_org, pipeline)
161
+ next unless pk
162
+ pipelines << {
163
+ pipeline:,
164
+ org: buildkite_org,
165
+ url: pk['url'],
166
+ }
167
+
168
+ log.debug(
169
+ "Found Buildkite pipeline: #{buildkite_org}/#{pipeline} in README",
170
+ )
171
+ end
172
+
173
+ pipelines
174
+ end
175
+
176
+ def get_bk_failed_tests(
177
+ gh_client, bk_client, repo, bk_pipelines_by_repo, settings, branches
178
+ )
179
+ failed_tests = {}
180
+ pipelines_to_check = Set.new
181
+ pipelines_to_check.merge(
182
+ bk_pipelines_by_repo.fetch("https://github.com/#{repo}", []).map do |x|
183
+ {
184
+ org: OssStats::Config::RepoStats.buildkite_org,
185
+ pipeline: x[:slug],
186
+ url: x[:url],
187
+ }
188
+ end,
189
+ )
190
+
191
+ begin
192
+ readme = Base64.decode64(gh_client.readme(repo).content)
193
+ rate_limited_sleep
194
+
195
+ pipelines_to_check.merge(pipelines_from_readme(readme, bk_client))
196
+ rescue Octokit::NotFound
197
+ log.warn(
198
+ "README.md not found for repo #{repo}. Skipping Buildkite check.",
199
+ )
200
+ end
201
+
202
+ from_date = Date.today - settings[:days]
203
+ today = Date.today
204
+ pipelines_to_check.each do |pl|
205
+ branches.each do |branch|
206
+ log.debug(
207
+ "Fetching Buildkite builds for #{pl}, branch: #{branch}",
208
+ )
209
+ api_builds = bk_client.get_pipeline_builds(
210
+ pl[:org], pl[:pipeline], from_date, today, branch
211
+ )
212
+ if api_builds.length.zero?
213
+ log.debug("No builds for #{pl} on #{branch}")
214
+ next
215
+ end
216
+
217
+ failed_tests[branch] ||= {}
218
+
219
+ # Sort builds by createdAt timestamp to process chronologically
220
+ # rubocop:disable Style/MultilineBlockChain
221
+ sorted_builds = api_builds.select do |b_edge|
222
+ b_edge&.dig('node', 'createdAt')
223
+ end.sort_by { |b_edge| DateTime.parse(b_edge['node']['createdAt']) }
224
+ # rubocop:enable Style/MultilineBlockChain
225
+
226
+ last_failure_date_bk = {}
227
+
228
+ sorted_builds.each do |build_edge|
229
+ build = build_edge['node']
230
+ id = build['id']
231
+ log.debug("Build #{id} for #{pl}")
232
+ begin
233
+ build_date = DateTime.parse(build['createdAt']).to_date
234
+ rescue ArgumentError, TypeError
235
+ log.warn(
236
+ "Invalid createdAt date for build in #{pl}: " +
237
+ "'#{build['createdAt']}'. Skipping this build.",
238
+ )
239
+ next
240
+ end
241
+
242
+ # Ensure build is within the processing date range
243
+ if build_date < from_date
244
+ log.debug('Build before time we care about, skipping')
245
+ next
246
+ end
247
+
248
+ unless build['state']
249
+ log.debug('no build state, skipping')
250
+ next
251
+ end
252
+ job_key = "[BK] #{pl[:org]}/#{pl[:pipeline]}"
253
+
254
+ if build['state'] == 'FAILED'
255
+ # we link to the pipeline, not the specific build
256
+ failed_tests[branch][job_key] ||= {
257
+ url: pl[:url], dates: Set.new
258
+ }
259
+ failed_tests[branch][job_key][:dates] << build_date
260
+ log.debug("Marking #{job_key} as failed (#{id} on #{build_date})")
261
+ last_failure_date_bk[job_key] = build_date
262
+ elsif build['state'] == 'PASSED'
263
+ # If a job passes, and it had a recorded failure on or before this
264
+ # build's date, clear it from ongoing failures.
265
+ if last_failure_date_bk[job_key] &&
266
+ last_failure_date_bk[job_key] <= build_date
267
+ log.debug(
268
+ "Unmarking #{job_key} as failed (#{id} on #{build_date})",
269
+ )
270
+ last_failure_date_bk.delete(job_key)
271
+ else
272
+ log.debug(
273
+ "Ignoring #{job_key} success earlier than last failure" +
274
+ " (#{id} on #{build_date})",
275
+ )
276
+ end
277
+ else
278
+ log.debug("State is #{build['state']}, ignoring")
279
+ end
280
+ end
281
+
282
+ # Propagate ongoing failures: if a job failed and didn't pass later,
283
+ # mark all subsequent days until today as failed.
284
+ last_failure_date_bk.each do |job_key, last_fail_date|
285
+ (last_fail_date + 1..today).each do |date|
286
+ failed_tests[branch][job_key][:dates] << date
287
+ end
288
+ end
289
+ end
290
+ end
291
+
292
+ failed_tests
293
+ rescue StandardError => e
294
+ log.error("Error during Buildkite integration for #{repo}: #{e.message}")
295
+ log.debug(e.backtrace.join("\n"))
296
+ # we may have captured some, return what we got
297
+ failed_tests
298
+ end
299
+
300
+ def get_gh_failed_tests(gh_client, repo, settings, branches)
301
+ failed_tests = {}
302
+ cutoff_date = Date.today - settings[:days]
303
+ today = Date.today
304
+ branches.each do |branch|
305
+ log.debug("Checking GHA workflow runs for #{repo}, branch: #{branch}")
306
+ failed_tests[branch] ||= {}
307
+ workflows = gh_client.workflows(repo).workflows
308
+ rate_limited_sleep
309
+ workflows.each do |workflow|
310
+ log.debug("Workflow: #{workflow.name}")
311
+ workflow_runs = []
312
+ page = 1
313
+ loop do
314
+ log.debug(" Acquiring page #{page}")
315
+ runs = gh_client.workflow_runs(
316
+ repo, workflow.id, branch:, status: 'completed', per_page: 100,
317
+ page:
318
+ )
319
+ rate_limited_sleep
320
+
321
+ break if runs.workflow_runs.empty?
322
+
323
+ workflow_runs.concat(runs.workflow_runs)
324
+
325
+ break if workflow_runs.last.created_at.to_date < cutoff_date
326
+
327
+ page += 1
328
+ end
329
+
330
+ workflow_runs.sort_by!(&:created_at).reverse!
331
+ last_failure_date = {}
332
+ workflow_runs.each do |run|
333
+ log.debug(" Looking at workflow run #{run.id}")
334
+ run_date = run.created_at.to_date
335
+ next if run_date < cutoff_date
336
+
337
+ jobs = gh_client.workflow_run_jobs(repo, run.id, per_page: 100).jobs
338
+ rate_limited_sleep
339
+
340
+ jobs.each do |job|
341
+ log.debug(" Looking at job #{job.name} [#{job.conclusion}]")
342
+ job_name_key = "#{workflow.name} / #{job.name}"
343
+ if job.conclusion == 'failure'
344
+ log.debug("Marking #{job_name_key} as failed (#{run_date})")
345
+ # we want to link to the _workflow_ on the relevant branch.
346
+ # If we link to a job, it's only on that given run, which
347
+ # isn't relevant to our reports, we want people to go see
348
+ # the current status and all the passes and failures.
349
+ #
350
+ # However, the link to the workflow is to the file that defines
351
+ # it, which is not what we want, but it's easy to munge.
352
+ url = workflow.html_url.gsub("blob/#{branch}", 'actions')
353
+ url << "?query=branch%3A#{branch}"
354
+ failed_tests[branch][job_name_key] ||= {
355
+ # link to the workflow, not this specific run
356
+ url:,
357
+ dates: Set.new,
358
+ }
359
+ failed_tests[branch][job_name_key][:dates] << run_date
360
+ last_failure_date[job_name_key] = run_date
361
+ elsif job.conclusion == 'success'
362
+ if last_failure_date[job_name_key] &&
363
+ last_failure_date[job_name_key] <= run_date
364
+ log.debug("Unmarking #{job_name_key} as failed (#{run_date})")
365
+ last_failure_date.delete(job_name_key)
366
+ else
367
+ log.debug(
368
+ "Ignoring #{job_name_key} success early then last failure" +
369
+ "(#{run_date})",
370
+ )
371
+ end
372
+ end
373
+ end
374
+ end
375
+ last_failure_date.each do |job_key, last_fail_date|
376
+ (last_fail_date + 1..today).each do |date|
377
+ failed_tests[branch][job_key][:dates] << date
378
+ end
379
+ end
380
+ end
381
+ end
382
+
383
+ failed_tests
384
+ rescue Octokit::NotFound => e
385
+ log.warn(
386
+ "Workflow API returned 404 for #{repo} branch " +
387
+ "#{branch}: #{e.message}.",
388
+ )
389
+ rescue Octokit::Error, StandardError => e
390
+ log.error(
391
+ "Error processing branch #{branch} for repo " +
392
+ "#{repo}: #{e.message}",
393
+ )
394
+ log.debug(e.backtrace.join("\n"))
395
+ end
396
+
397
+ # Fetches failed test results from CI systems (GitHub Actions and Buildkite)
398
+ # for a given repository and branches.
399
+ #
400
+ # For GitHub Actions, it queries workflow runs and their associated jobs.
401
+ # For Buildkite, it parses the README for a Buildkite badge, then queries
402
+ # the Buildkite API for pipeline builds and jobs.
403
+ #
404
+ # It implements logic to track ongoing failures: if a job fails and is not
405
+ # subsequently fixed by a successful run on the same branch, it's considered
406
+ # to be continuously failing.
407
+ #
408
+ # @param gh_client [Octokit::Client] The Octokit client for GitHub API
409
+ # interaction.
410
+ # @param bk_client [BuildkiteClient] A buildkite client
411
+ # @param settings [Hash] A hash containing settings like :org, :repo, :days,
412
+ # and :branches.
413
+ # @param bk_piplines_by_repo [Hash] A hash of repo -> list of BK pipelines
414
+ # @return [Hash] A hash where keys are branch names, and values are hashes
415
+ # of job names to a Set of dates the job failed.
416
+ def get_failed_tests_from_ci(
417
+ gh_client, bk_client, settings, bk_pipelines_by_repo
418
+ )
419
+ repo = "#{settings[:org]}/#{settings[:repo]}"
420
+ branches_to_check = settings[:branches]
421
+ processed_branches = if branches_to_check.is_a?(String)
422
+ branches_to_check.split(',').map(&:strip)
423
+ else
424
+ Array(branches_to_check).map(&:strip)
425
+ end
426
+
427
+ failed_tests = get_gh_failed_tests(
428
+ gh_client, repo, settings, processed_branches
429
+ )
430
+
431
+ if bk_client
432
+ failed_tests.deep_merge!(
433
+ get_bk_failed_tests(
434
+ gh_client,
435
+ bk_client,
436
+ repo,
437
+ bk_pipelines_by_repo,
438
+ settings,
439
+ processed_branches,
440
+ ),
441
+ )
442
+ end
443
+
444
+ failed_tests
445
+ end
446
+
447
+ # Prints formatted Pull Request or Issue statistics.
448
+ #
449
+ # @param data [Hash] The hash containing PR/Issue stats and lists from
450
+ # `get_pr_and_issue_stats`.
451
+ # @param type [String] The type of item to print ("PR" or "Issue").
452
+ # @param include_list [Boolean] Whether to include lists of individual
453
+ # PRs/Issues.
454
+ def print_pr_or_issue_stats(data, type, include_list)
455
+ stats = data[type.downcase.to_sym]
456
+ list = data["#{type.downcase}_list".to_sym]
457
+ type_plural = type + 's'
458
+ log.info("\n* #{type} Stats:")
459
+ log.info(" * Closed #{type_plural}: #{stats[:closed]}")
460
+ if include_list
461
+ list[:closed].each do |item|
462
+ if OssStats::Config::RepoStats.no_links
463
+ log.info(
464
+ " * #{item.title} (##{item.number}) - @#{item.user.login}",
465
+ )
466
+ else
467
+ log.info(
468
+ " * [#{item.title} (##{item.number})](#{item.html_url}) " +
469
+ "- @#{item.user.login}",
470
+ )
471
+ end
472
+ end
473
+ end
474
+ log.info(
475
+ " * Open #{type_plural}: #{stats[:open]} " +
476
+ "(#{include_list ? 'listing ' : ''}#{stats[:opened_this_period]} " +
477
+ 'opened this period)',
478
+ )
479
+ if include_list && stats[:opened_this_period].positive?
480
+ list[:open].each do |item|
481
+ if OssStats::Config::RepoStats.no_links
482
+ log.info(
483
+ " * #{item.title} (##{item.number}) - @#{item.user.login}",
484
+ )
485
+ else
486
+ log.info(
487
+ " * [#{item.title} (##{item.number})](#{item.html_url}) " +
488
+ "- @#{item.user.login}",
489
+ )
490
+ end
491
+ end
492
+ end
493
+ if stats[:oldest_open]
494
+ log.info(
495
+ " * Oldest Open #{type}: #{stats[:oldest_open]} " +
496
+ "(#{stats[:oldest_open_days]} days open, " +
497
+ "last activity #{stats[:oldest_open_last_activity]} days ago)",
498
+ )
499
+ end
500
+ log.info(
501
+ " * Stale #{type} (>30 days without comment): " +
502
+ stats[:stale_count],
503
+ )
504
+ avg_time = stats[:avg_time_to_close_hours]
505
+ avg_time_str =
506
+ if avg_time > 24
507
+ "#{(avg_time / 24).round(2)} days"
508
+ else
509
+ "#{avg_time.round(2)} hours"
510
+ end
511
+ log.info(" * Avg Time to Close #{type_plural}: #{avg_time_str}")
512
+ end
513
+
514
+ # Prints formatted CI status (failed tests).
515
+ #
516
+ # @param test_failures [Hash] The hash of test failures from
517
+ # `get_failed_tests_from_ci`.
518
+ # @param _options [Hash] Additional options (currently unused).
519
+ def print_ci_status(test_failures)
520
+ log.info("\n* CI Stats:")
521
+ test_failures.each do |branch, jobs|
522
+ line = " * Branch: `#{branch}`"
523
+ if jobs.empty?
524
+ log.info(line + ': No job failures found! :tada:')
525
+ else
526
+ log.info(line + ' has the following failures:')
527
+ jobs.sort.each do |job_name, job_data|
528
+ if OssStats::Config::RepoStats.no_links
529
+ log.info(" * #{job_name}: #{job_data[:dates].size} days")
530
+ else
531
+ log.info(
532
+ " * [#{job_name}](#{job_data[:url]}):" +
533
+ " #{job_data[:dates].size} days",
534
+ )
535
+ end
536
+ end
537
+ end
538
+ end
539
+ end
540
+
541
+ # Helper function to parse a value that can be an integer or percentage
542
+ def parse_value_or_percentage(value_str)
543
+ if value_str.end_with?('%')
544
+ # Convert percentage to a float representation (e.g., "5%" -> 0.05)
545
+ value_str.chomp('%').to_f / 100.0
546
+ else
547
+ value_str.to_i
548
+ end
549
+ end
550
+
551
+ def parse_options
552
+ options = {}
553
+ valid_modes = %w{ci pr issue all}
554
+ OptionParser.new do |opts|
555
+ opts.banner = "Usage: #{File.basename($PROGRAM_NAME)} [options]"
556
+
557
+ opts.on(
558
+ '-b BRANCHES',
559
+ '--branches BRANCHES',
560
+ Array,
561
+ 'Comma-separated list of branches. Overrides specific org or repo' +
562
+ ' configs',
563
+ ) { |v| options[:branches] = v }
564
+
565
+ opts.on(
566
+ '-B BRANCHES',
567
+ '--default-branches BRANCHES',
568
+ Array,
569
+ 'Comma-separated list of branches. will be overriden by specific' +
570
+ ' org or repo configs',
571
+ ) { |v| options[:default_branches] = v }
572
+
573
+ opts.on(
574
+ '--buildkite-token TOKEN',
575
+ String,
576
+ 'Buildkite API token',
577
+ ) { |v| options[:buildkite_token] = v }
578
+
579
+ opts.on(
580
+ '--buildkite-org ORG',
581
+ String,
582
+ 'Buildkite org to find pipelines in. If specified any pipeline in ' +
583
+ 'that org associated with any repos we report on will be included.',
584
+ ) { |v| options[:buildkite_org] = v }
585
+
586
+ opts.on(
587
+ '-c FILE',
588
+ '--config FILE_PATH',
589
+ String,
590
+ 'Config file to load. [default: will look for' +
591
+ ' `repo_stats_config.rb` in `./`, `~/.config/oss_stats`, and' +
592
+ ' `/etc`]',
593
+ ) { |c| options[:config] = c }
594
+
595
+ opts.on(
596
+ '-d DAYS',
597
+ '--days DAYS',
598
+ Integer,
599
+ 'Number of days to analyze. Overrides specific org or repo configs',
600
+ ) { |v| options[:days] = v }
601
+
602
+ opts.on(
603
+ '-D DAYS',
604
+ '--default-days DAYS',
605
+ Integer,
606
+ 'Default number of days to analyze. Will be overriden by specific' +
607
+ ' org or repo configs',
608
+ ) { |v| options[:default_days] = v }
609
+
610
+ opts.on(
611
+ '--ci-timeout TIMEOUT',
612
+ Integer,
613
+ 'Timeout for CI processing in seconds',
614
+ ) { |v| options[:ci_timeout] = v }
615
+
616
+ opts.on(
617
+ '--github-api-endpoint ENDPOINT',
618
+ String,
619
+ 'GitHub API endpoint',
620
+ ) { |v| options[:github_api_endpoint] = v }
621
+
622
+ opts.on(
623
+ '--github-org ORG_NAME',
624
+ String,
625
+ 'GitHub organization name',
626
+ ) { |v| options[:github_org] = v }
627
+
628
+ opts.on(
629
+ '--github-repo REPO_NAME',
630
+ String,
631
+ 'GitHub repository name',
632
+ ) { |v| options[:github_repo] = v }
633
+
634
+ opts.on(
635
+ '--github-token TOKEN',
636
+ 'GitHub personal access token',
637
+ ) { |v| options[:gh_token] = v }
638
+
639
+ opts.on(
640
+ '--include-list',
641
+ 'Include list of relevant PRs/Issues (default: false)',
642
+ ) { options[:include_list] = true }
643
+
644
+ opts.on(
645
+ '--limit-gh-ops-per-minute RATE',
646
+ Float,
647
+ 'Rate limit GitHub API operations to this number per minute',
648
+ ) { |v| options[:limit_gh_ops_per_minute] = v }
649
+
650
+ opts.on(
651
+ '-l LEVEL',
652
+ '--log-level LEVEL',
653
+ %i{trace debug info warn error fatal},
654
+ 'Set logging level to LEVEL. [default: info]',
655
+ ) { |level| options[:log_level] = level }
656
+
657
+ opts.on(
658
+ '--no-links',
659
+ 'Disable Markdown links in the output (default: false)',
660
+ ) { options[:no_links] = true }
661
+
662
+ opts.on(
663
+ '--mode MODE',
664
+ Array,
665
+ 'Comma-separated list of modes: ci,issue,pr, or all (default: all)',
666
+ ) do |v|
667
+ invalid_modes = v.map(&:downcase) - valid_modes
668
+ unless invalid_modes.empty?
669
+ raise OptionParser::InvalidArgument,
670
+ "Invalid mode(s): #{invalid_modes.join(', ')}." +
671
+ "Valid modes are: #{valid_modes.join(', ')}"
672
+ end
673
+ options[:mode] = v.map(&:downcase)
674
+ end
675
+
676
+ opts.on(
677
+ '--top-n-stale N',
678
+ String,
679
+ 'Top N or N% stale PRs/Issues',
680
+ ) { |v| options[:top_n_stale] = parse_value_or_percentage(v) }
681
+
682
+ opts.on(
683
+ '--top-n-oldest N', String, 'Top N or N% oldest PRs/Issues') do |v|
684
+ options[:top_n_oldest] = parse_value_or_percentage(v)
685
+ end
686
+
687
+ opts.on(
688
+ '--top-n-time-to-close N',
689
+ String,
690
+ 'Top N or N% PRs/Issues by time to close',
691
+ ) { |v| options[:top_n_time_to_close] = parse_value_or_percentage(v) }
692
+
693
+ opts.on(
694
+ '--top-n-most-broken-ci-days N',
695
+ String,
696
+ 'Top N or N% CI jobs by broken days',
697
+ ) do |v|
698
+ options[:top_n_most_broken_ci_days] = parse_value_or_percentage(v)
699
+ end
700
+
701
+ opts.on(
702
+ '--top-n-most-broken-ci-jobs N',
703
+ String,
704
+ 'Top N or N% CI jobs by number of failures',
705
+ ) do |v|
706
+ options[:top_n_most_broken_ci_jobs] = parse_value_or_percentage(v)
707
+ end
708
+
709
+ opts.on(
710
+ '--top-n-stale-pr N',
711
+ String,
712
+ 'Top N or N% stale PRs (PR-specific)',
713
+ ) { |v| options[:top_n_stale_pr] = parse_value_or_percentage(v) }
714
+
715
+ opts.on(
716
+ '--top-n-stale-issue N',
717
+ String,
718
+ 'Top N or N% stale Issues (Issue-specific)',
719
+ ) { |v| options[:top_n_stale_issue] = parse_value_or_percentage(v) }
720
+
721
+ opts.on(
722
+ '--top-n-oldest-pr N',
723
+ String,
724
+ 'Top N or N% oldest PRs (PR-specific)',
725
+ ) { |v| options[:top_n_oldest_pr] = parse_value_or_percentage(v) }
726
+
727
+ opts.on(
728
+ '--top-n-oldest-issue N',
729
+ String,
730
+ 'Top N or N% oldest Issues (Issue-specific)',
731
+ ) { |v| options[:top_n_oldest_issue] = parse_value_or_percentage(v) }
732
+
733
+ opts.on(
734
+ '--top-n-time-to-close-pr N',
735
+ String,
736
+ 'Top N or N% PRs by time to close (PR-specific)',
737
+ ) do |v|
738
+ options[:top_n_time_to_close_pr] = parse_value_or_percentage(v)
739
+ end
740
+
741
+ opts.on(
742
+ '--top-n-time-to-close-issue N',
743
+ String,
744
+ 'Top N or N% Issues by time to close (Issue-specific)',
745
+ ) do |v|
746
+ options[:top_n_time_to_close_issue] = parse_value_or_percentage(v)
747
+ end
748
+ end.parse!
749
+
750
+ # Set log level from CLI options first if provided
751
+ log.level = options[:log_level] if options[:log_level]
752
+ config = OssStats::Config::RepoStats
753
+
754
+ # Determine config file path.
755
+ config_file_to_load = options[:config] || config.config_file
756
+
757
+ # Load config from file if found
758
+ if config_file_to_load && File.exist?(config_file_to_load)
759
+ expanded_config_path = File.expand_path(config_file_to_load)
760
+ log.info("Loaded configuration from: #{expanded_config_path}")
761
+ config.from_file(expanded_config_path)
762
+ elsif options[:config] # Config file specified via CLI but not found
763
+ log.fatal("Specified config file '#{options[:config]}' not found.")
764
+ exit 1
765
+ end
766
+
767
+ # Merge CLI options into the configuration. CLI options take precedence.
768
+ config.merge!(options)
769
+
770
+ # Set final log level from potentially merged config
771
+ log.level = config.log_level
772
+
773
+ if config.github_repo && !config.github_org
774
+ raise ArgumentError, '--github-repo requires --github-org'
775
+ end
776
+ end
777
+
778
+ # Construct effective settings for a repository by merging global,
779
+ # org-level, and repo-level configurations.
780
+ def get_effective_repo_settings(org, repo, org_conf = {}, repo_conf = {})
781
+ effective = { org:, repo: }
782
+ config = OssStats::Config::RepoStats
783
+
784
+ # we allow somone to override days or branches for the entire run
785
+ # which is different from the fallback (default_{days,branches})
786
+ effective[:days] = config.days || repo_conf['days'] ||
787
+ org_conf['days'] || config.default_days
788
+ branches_setting = config.branches || repo_conf['branches'] ||
789
+ org_conf['branches'] || config.default_branches
790
+ effective[:branches] =
791
+ if branches_setting.is_a?(String)
792
+ branches_setting.split(',').map(&:strip)
793
+ else
794
+ Array(branches_setting).map(&:strip)
795
+ end
796
+ effective
797
+ end
798
+
799
+ def determine_orgs_to_process
800
+ config = OssStats::Config::RepoStats
801
+ relevant_orgs = {}
802
+ # Handle org/repo specified via CLI: overrides any config file orgs/repos.
803
+ if config.github_org || config.github_repo
804
+ # we already validated that if repo is set, so is org, so we can assume
805
+ # org is set...
806
+ if config.organizations[config.github_org]
807
+ log.debug("Limiting config to #{config.github_org} org")
808
+ relevant_orgs[config.github_org] =
809
+ config.organizations[config.github_org].dup
810
+ else
811
+ log.debug(
812
+ "Initialzing config structure for #{config.github_org} org" +
813
+ ' requested on the command line, but not in config.',
814
+ )
815
+ relevant_orgs[config.github_org] = { 'repositories' => {} }
816
+ end
817
+
818
+ if config.github_repo
819
+ repo_config =
820
+ relevant_orgs[config.github_org]['repositories'][config.github_repo]
821
+ if repo_config
822
+ log.debug("Limiting config to #{config.github_repo} repo")
823
+ relevant_orgs[config.github_org]['repositories'] = {
824
+ config.github_repo => repo_config,
825
+ }
826
+ else
827
+ log.debug(
828
+ "Initializing config structure for #{config.github_repo} repo" +
829
+ ' requested on the command line, but not in config',
830
+ )
831
+ relevant_orgs[config.github_org]['repositories'] = {
832
+ config.github_repo => {},
833
+ }
834
+ end
835
+ end
836
+ else
837
+ relevant_orgs = config.organizations
838
+ end
839
+ relevant_orgs
840
+ end
841
+
842
+ def filter_repositories(all_repo_stats, config)
843
+ # If no filter options are set, return all stats
844
+ active_filters = %i{
845
+ top_n_stale top_n_oldest top_n_time_to_close
846
+ top_n_most_broken_ci_days top_n_most_broken_ci_jobs
847
+ top_n_stale_pr top_n_stale_issue
848
+ top_n_oldest_pr top_n_oldest_issue
849
+ top_n_time_to_close_pr top_n_time_to_close_issue
850
+ }.select { |opt| config[opt] }
851
+
852
+ return all_repo_stats if active_filters.empty?
853
+
854
+ log.debug(
855
+ "Filtering repositories based on filters: #{active_filters}",
856
+ )
857
+
858
+ selected_repos = Set.new
859
+ total_repos = all_repo_stats.size
860
+
861
+ # Helper to calculate N based on integer or percentage
862
+ calculate_n = lambda do |value|
863
+ if value.is_a?(Float)
864
+ # Percentage
865
+ (value * total_repos).ceil
866
+ else
867
+ # Integer
868
+ value
869
+ end
870
+ end
871
+
872
+ if %w{issue pr}.any? { |m| config.mode.include?(m) }
873
+ # Filter by top_n_stale (max of issue or PR)
874
+ if config.top_n_stale
875
+ n = calculate_n.call(config.top_n_stale)
876
+ get_stale_count = lambda do |data|
877
+ pr_stale = data.dig(:pr_issue_stats, :pr, :stale_count) || 0
878
+ issue_stale = data.dig(:pr_issue_stats, :issue, :stale_count) || 0
879
+ [pr_stale, issue_stale].max
880
+ end
881
+ all_repo_stats
882
+ .sort_by { |data| -get_stale_count.call(data) }.first(n)
883
+ .each do |r|
884
+ selected_repos.add(r) if get_stale_count.call(r).positive?
885
+ end
886
+ end
887
+
888
+ # Filter by top_n_oldest (max of issue or PR)
889
+ if config.top_n_oldest
890
+ n = calculate_n.call(config.top_n_oldest)
891
+ get_oldest_days = lambda do |data|
892
+ pr_days = data.dig(:pr_issue_stats, :pr, :oldest_open_days) || 0
893
+ issue_days =
894
+ data.dig(:pr_issue_stats, :issue, :oldest_open_days) || 0
895
+ [pr_days, issue_days].max
896
+ end
897
+ all_repo_stats
898
+ .sort_by { |data| -get_oldest_days.call(data) }.first(n)
899
+ .each do |r|
900
+ selected_repos.add(r) if get_oldest_days.call(r).positive?
901
+ end
902
+ end
903
+
904
+ # Filter by top_n_time_to_close (max of issue or PR)
905
+ if config.top_n_time_to_close
906
+ n = calculate_n.call(config.top_n_time_to_close)
907
+ get_avg_close_time = lambda do |data|
908
+ pr_avg =
909
+ data.dig(:pr_issue_stats, :pr, :avg_time_to_close_hours) || 0
910
+ issue_avg = data.dig(
911
+ :pr_issue_stats, :issue, :avg_time_to_close_hours
912
+ ) || 0
913
+ [pr_avg, issue_avg].max
914
+ end
915
+ all_repo_stats
916
+ .sort_by { |data| -get_avg_close_time.call(data) }.first(n)
917
+ .each do |r|
918
+ selected_repos.add(r) if get_avg_close_time.call(r).positive?
919
+ end
920
+ end
921
+ end
922
+
923
+ if config.mode.include?('pr')
924
+ # Filter by top_n_stale_pr
925
+ if config.top_n_stale_pr
926
+ n = calculate_n.call(config.top_n_stale_pr)
927
+ get_stale_pr_count = lambda do |data|
928
+ data.dig(:pr_issue_stats, :pr, :stale_count) || 0
929
+ end
930
+ all_repo_stats
931
+ .sort_by { |data| -get_stale_pr_count.call(data) }.first(n)
932
+ .each do |r|
933
+ selected_repos.add(r) if get_stale_pr_count.call(r).positive?
934
+ end
935
+ end
936
+
937
+ # Filter by top_n_oldest_pr
938
+ if config.top_n_oldest_pr
939
+ n = calculate_n.call(config.top_n_oldest_pr)
940
+ get_oldest_pr_days = lambda do |data|
941
+ data.dig(:pr_issue_stats, :pr, :oldest_open_days) || 0
942
+ end
943
+ all_repo_stats
944
+ .sort_by { |data| -get_oldest_pr_days.call(data) }.first(n)
945
+ .each do |r|
946
+ selected_repos.add(r) if get_oldest_pr_days.call(r).positive?
947
+ end
948
+ end
949
+
950
+ # Filter by top_n_time_to_close_pr
951
+ if config.top_n_time_to_close_pr
952
+ n = calculate_n.call(config.top_n_time_to_close_pr)
953
+ get_avg_close_time_pr = lambda do |data|
954
+ data.dig(:pr_issue_stats, :pr, :avg_time_to_close_hours) || 0
955
+ end
956
+ all_repo_stats
957
+ .sort_by { |data| -get_avg_close_time_pr.call(data) }
958
+ .first(n)
959
+ .each do |r|
960
+ selected_repos.add(r) if get_avg_close_time_pr.call(r).positive?
961
+ end
962
+ end
963
+ end
964
+
965
+ if config.mode.include?('issue')
966
+ # Filter by top_n_stale_issue
967
+ if config.top_n_stale_issue
968
+ n = calculate_n.call(config.top_n_stale_issue)
969
+ get_stale_issue_count = lambda do |data|
970
+ data.dig(:pr_issue_stats, :issue, :stale_count) || 0
971
+ end
972
+ all_repo_stats
973
+ .sort_by { |data| -get_stale_issue_count.call(data) }
974
+ .first(n)
975
+ .each do |r|
976
+ selected_repos.add(r) if get_stale_issue_count.call(r).positive?
977
+ end
978
+ end
979
+
980
+ # Filter by top_n_oldest_issue
981
+ if config.top_n_oldest_issue
982
+ n = calculate_n.call(config.top_n_oldest_issue)
983
+ get_oldest_issue_days = lambda do |data|
984
+ data.dig(:pr_issue_stats, :issue, :oldest_open_days) || 0
985
+ end
986
+ all_repo_stats
987
+ .sort_by { |data| -get_oldest_issue_days.call(data) }
988
+ .first(n)
989
+ .each do |r|
990
+ selected_repos.add(r) if get_oldest_issue_days.call(r).positive?
991
+ end
992
+ end
993
+
994
+ # Filter by top_n_time_to_close_issue
995
+ if config.top_n_time_to_close_issue && config.mode.include?('issue')
996
+ n = calculate_n.call(config.top_n_time_to_close_issue)
997
+ get_avg_close_time_issue = lambda do |data|
998
+ data.dig(:pr_issue_stats, :issue, :avg_time_to_close_hours) || 0
999
+ end
1000
+ all_repo_stats
1001
+ .sort_by { |data| -get_avg_close_time_issue.call(data) }
1002
+ .first(n)
1003
+ .each do |r|
1004
+ if get_avg_close_time_issue.call(r).positive?
1005
+ selected_repos.add(r)
1006
+ end
1007
+ end
1008
+ end
1009
+ end
1010
+
1011
+ if config.mode.include?('ci')
1012
+ # Filter by top_n_most_broken_ci_days
1013
+ if config.top_n_most_broken_ci_days
1014
+ n = calculate_n.call(config.top_n_most_broken_ci_days)
1015
+ get_broken_days = lambda do |data|
1016
+ data[:ci_failures]&.values&.sum do |branches|
1017
+ branches&.values&.sum { |job| job[:dates]&.size || 0 } || 0
1018
+ end || 0
1019
+ end
1020
+ all_repo_stats
1021
+ .sort_by { |data| -get_broken_days.call(data) }.first(n)
1022
+ .each do |r|
1023
+ selected_repos.add(r) if get_broken_days.call(r).positive?
1024
+ end
1025
+ end
1026
+
1027
+ # Filter by top_n_most_broken_ci_jobs
1028
+ if config.top_n_most_broken_ci_jobs
1029
+ n = calculate_n.call(config.top_n_most_broken_ci_jobs)
1030
+ get_broken_jobs_count = lambda do |data|
1031
+ data[:ci_failures]&.values&.sum do |branches|
1032
+ branches&.keys&.size || 0
1033
+ end || 0
1034
+ end
1035
+ all_repo_stats
1036
+ .sort_by { |data| -get_broken_jobs_count.call(data) }
1037
+ .first(n)
1038
+ .each do |r|
1039
+ selected_repos.add(r) if get_broken_jobs_count.call(r).positive?
1040
+ end
1041
+ end
1042
+ end
1043
+
1044
+ log.debug("Selected #{selected_repos.size} repos after filtering.")
1045
+ selected_repos.to_a # Convert set to array before returning
1046
+ end
1047
+ end
1048
+ end