github-daily-digest 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,364 @@
1
+ # github_daily_digest/lib/github_service.rb
2
+ require 'octokit'
3
+ require 'time' # Ensure Time is loaded for ISO8601
4
+ require 'ostruct'
5
+
6
+ module GithubDailyDigest
7
+ class GithubService
8
+ MAX_RETRIES = 3 # Local retry specifically for rate limits within a method call
9
+
10
+ def initialize(token:, logger:, config:)
11
+ @logger = logger
12
+ @config = config
13
+ @client = Octokit::Client.new(access_token: token)
14
+ @client.auto_paginate = true # Essential for members/repos
15
+ verify_authentication
16
+ rescue Octokit::Unauthorized => e
17
+ @logger.fatal("GitHub authentication failed. Check GITHUB_TOKEN. Error: #{e.message}")
18
+ raise # Re-raise to stop execution
19
+ rescue => e
20
+ @logger.fatal("Failed to initialize GitHub client: #{e.message}")
21
+ raise
22
+ end
23
+
24
+ def fetch_members(org_name)
25
+ @logger.info("Fetching members for organization: #{org_name}")
26
+ members = handle_api_errors { @client.organization_members(org_name) }
27
+ if members
28
+ logins = members.map(&:login)
29
+ @logger.info("Found #{logins.count} members.")
30
+ logins
31
+ else
32
+ @logger.error("Could not fetch members for #{org_name}.")
33
+ [] # Return empty array on failure after retries
34
+ end
35
+ rescue Octokit::NotFound => e
36
+ @logger.error("Organization '#{org_name}' not found or token lacks permission. Error: #{e.message}")
37
+ []
38
+ end
39
+
40
+ # Get information about the authenticated user
41
+ def get_current_user
42
+ handle_api_errors do
43
+ user = @client.user
44
+ {
45
+ login: user.login,
46
+ name: user.name,
47
+ email: user.email,
48
+ avatar_url: user.avatar_url,
49
+ scopes: @client.scopes
50
+ }
51
+ end
52
+ rescue => e
53
+ @logger.error("Failed to get current user information: #{e.message}")
54
+ nil
55
+ end
56
+
57
+ def fetch_org_repos(org_name)
58
+ @logger.info("Fetching repositories for organization: #{org_name}")
59
+
60
+ # First try with type: 'all'
61
+ repos = handle_api_errors do
62
+ @client.organization_repositories(org_name, { type: 'all', per_page: 100 })
63
+ end
64
+
65
+ if repos
66
+ @logger.info("Found #{repos.count} repositories.")
67
+
68
+ # Log some details about the first few repos for debugging
69
+ if repos.any?
70
+ sample_repos = repos.take(3)
71
+ sample_repos.each do |repo|
72
+ @logger.info("Sample repo: #{repo.full_name}, Private: #{repo.private}, Fork: #{repo.fork}")
73
+ end
74
+ end
75
+
76
+ repos # Return the array of Sawyer::Resource objects
77
+ else
78
+ @logger.error("Could not fetch repositories for #{org_name}.")
79
+ []
80
+ end
81
+ end
82
+
83
+ # Fetches commits for a specific user in a specific repo since a given time
84
+ def fetch_user_commits_in_repo(repo_full_name, username, since_time)
85
+ @logger.debug("Fetching commits by #{username} in #{repo_full_name} since #{since_time}")
86
+ options = { author: username, since: since_time }
87
+ commits = handle_api_errors(catch_conflicts: true) do
88
+ @client.commits_since(repo_full_name, since_time, options)
89
+ # Alternative if above doesn't work reliably with author filter:
90
+ # @client.commits(repo_full_name, since: since_time).select { |c| c.author&.login == username }
91
+ end
92
+ commits || [] # Return empty array on failure
93
+ rescue Octokit::Conflict, Octokit::NotFound => e
94
+ # Repo might be empty, disabled issues/wiki, or inaccessible
95
+ @logger.warn("Skipping repo #{repo_full_name} for user #{username}. Reason: #{e.message}")
96
+ []
97
+ end
98
+
99
+ # Searches for PRs reviewed by the user
100
+ def search_user_reviews(username, org_name, since_time)
101
+ @logger.debug("Searching PR reviews for user: #{username} since #{since_time}")
102
+ query = "is:pr reviewed-by:#{username} org:#{org_name} updated:>#{since_time}"
103
+ results = handle_api_errors { @client.search_issues(query, per_page: 1) } # Fetch 1 to get total_count efficiently
104
+ count = results ? results.total_count : 0
105
+ @logger.debug("Found #{count} PRs reviewed by #{username} via search.")
106
+ count
107
+ end
108
+
109
+ # Fetches all repositories with activity since a given time
110
+ def fetch_active_repos(org_name, since_time)
111
+ @logger.info("Fetching active repositories for organization: #{org_name} since #{since_time}")
112
+ repos = fetch_org_repos(org_name)
113
+
114
+ @logger.info("Checking #{repos.size} repositories for activity")
115
+ active_repos = {}
116
+
117
+ repos.each_with_index do |repo, index|
118
+ repo_full_name = repo.full_name
119
+ @logger.info("Checking for activity in #{repo_full_name} since #{since_time} [#{index+1}/#{repos.size}]")
120
+
121
+ begin
122
+ # Get all branches for this repository
123
+ branches = handle_api_errors(catch_conflicts: true) do
124
+ @client.branches(repo_full_name)
125
+ end
126
+
127
+ if branches.nil? || branches.empty?
128
+ @logger.debug("No branches found in #{repo_full_name}")
129
+ next
130
+ end
131
+
132
+ @logger.info("Found #{branches.count} branches in #{repo_full_name}")
133
+
134
+ # Find branches with recent activity
135
+ active_branches = []
136
+ all_commits = []
137
+
138
+ # We'll check each branch in parallel
139
+ branches.each do |branch|
140
+ branch_name = branch.name
141
+
142
+ # Get latest commit for the branch
143
+ latest_commit = branch.commit
144
+ if latest_commit
145
+ commit_date = nil
146
+
147
+ # Get full commit details to check date
148
+ commit_details = handle_api_errors(catch_conflicts: true) do
149
+ @client.commit(repo_full_name, latest_commit.sha)
150
+ end
151
+
152
+ if commit_details && commit_details.commit && commit_details.commit.author
153
+ commit_date = commit_details.commit.author.date
154
+ end
155
+
156
+ # If this branch has commits since our cutoff date, flag it as active
157
+ if commit_date && Time.parse(commit_date.to_s) >= Time.parse(since_time.to_s)
158
+ active_branches << branch_name
159
+ end
160
+ end
161
+ end
162
+
163
+ if active_branches.any?
164
+ @logger.info("Found #{active_branches.size} active branches in #{repo_full_name}: #{active_branches.join(', ')}")
165
+
166
+ # Now get commits for each active branch
167
+ active_branches.each do |branch_name|
168
+ branch_commits = handle_api_errors(catch_conflicts: true) do
169
+ @client.commits(repo_full_name, { sha: branch_name, since: since_time })
170
+ end
171
+
172
+ if branch_commits && branch_commits.any?
173
+ @logger.debug("Found #{branch_commits.count} commits in branch #{branch_name} of #{repo_full_name}")
174
+
175
+ # Add branch information to each commit
176
+ branch_commits.each do |commit|
177
+ commit.branch = branch_name
178
+ end
179
+
180
+ all_commits.concat(branch_commits)
181
+ end
182
+ end
183
+
184
+ # Remove duplicate commits (same SHA across multiple branches)
185
+ # But preserve branch information
186
+ unique_commits = {}
187
+ all_commits.each do |commit|
188
+ if unique_commits[commit.sha]
189
+ # If we've seen this commit before, add branch to its branches list
190
+ unique_commits[commit.sha].branches ||= []
191
+ unique_commits[commit.sha].branches << commit.branch unless unique_commits[commit.sha].branches.include?(commit.branch)
192
+ else
193
+ # First time seeing this commit
194
+ commit.branches = [commit.branch]
195
+ unique_commits[commit.sha] = commit
196
+ end
197
+ end
198
+
199
+ commits = unique_commits.values
200
+
201
+ if commits.any?
202
+ @logger.info("Found #{commits.count} unique commits across active branches in #{repo_full_name}")
203
+ active_repos[repo_full_name] = commits
204
+ end
205
+ else
206
+ @logger.debug("No active branches found in #{repo_full_name} since #{since_time}")
207
+ end
208
+
209
+ # Avoid hitting rate limits
210
+ sleep(0.1) if index > 0 && index % 10 == 0
211
+ rescue => e
212
+ @logger.error("Error checking repo #{repo_full_name}: #{e.message}")
213
+ @logger.error(e.backtrace.join("\n"))
214
+ next
215
+ end
216
+ end
217
+
218
+ if active_repos.empty?
219
+ @logger.warn("No active repositories found with commits since #{since_time}")
220
+ else
221
+ @logger.info("Found #{active_repos.size} active repositories with commits out of #{repos.size} total repos")
222
+ active_repos.keys.each do |repo_name|
223
+ @logger.info("Active repo: #{repo_name} with #{active_repos[repo_name].size} commits")
224
+ end
225
+ end
226
+
227
+ active_repos
228
+ end
229
+
230
+ # Maps commits to users for efficient activity tracking
231
+ def map_commits_to_users(active_repos)
232
+ @logger.info("Mapping commits to users")
233
+ user_commits = {}
234
+
235
+ active_repos.each do |repo_full_name, commits|
236
+ commits.each do |commit|
237
+ author = commit.author&.login
238
+ next unless author # Skip commits without a valid GitHub author
239
+
240
+ # Fetch commit details to get line changes
241
+ commit_details = handle_api_errors do
242
+ @client.commit(repo_full_name, commit.sha)
243
+ end
244
+
245
+ user_commits[author] ||= []
246
+
247
+ if commit_details
248
+ # Add commit with line changes information if available
249
+ user_commits[author] << format_commit(commit, repo_full_name, commit_details)
250
+ else
251
+ # Fallback to basic commit info if details couldn't be fetched
252
+ user_commits[author] << format_commit(commit, repo_full_name)
253
+ end
254
+ end
255
+ end
256
+
257
+ @logger.info("Found commits from #{user_commits.size} users")
258
+ user_commits
259
+ end
260
+
261
+ private
262
+
263
+ def verify_authentication
264
+ @logger.info("Verifying GitHub authentication...")
265
+ user = @client.user
266
+ @logger.info("Authenticated to GitHub as user: #{user.login}")
267
+
268
+ # Check token scopes
269
+ scopes = @client.scopes
270
+ @logger.info("Token scopes: #{scopes.join(', ')}")
271
+
272
+ # Check if the token has sufficient permissions
273
+ has_repo_scope = scopes.any? { |s| s == 'repo' || s.start_with?('repo:') }
274
+ has_org_scope = scopes.any? { |s| s == 'read:org' || s == 'admin:org' }
275
+
276
+ @logger.info("Token has repo scope: #{has_repo_scope}")
277
+ @logger.info("Token has org read scope: #{has_org_scope}")
278
+
279
+ if !has_repo_scope
280
+ @logger.warn("WARNING: Token may not have sufficient permissions to access private repositories")
281
+ end
282
+
283
+ if !has_org_scope
284
+ @logger.warn("WARNING: Token may not have sufficient permissions to access all organization data")
285
+ end
286
+
287
+ user # Return user object if needed elsewhere, otherwise just confirms connection
288
+ end
289
+
290
+ # Wrapper for handling common API errors and rate limiting
291
+ def handle_api_errors(retries = @config.max_api_retries, catch_conflicts: false)
292
+ attempts = 0
293
+ begin
294
+ attempts += 1
295
+ yield # Execute the Octokit API call block
296
+ rescue Octokit::Conflict => e
297
+ if catch_conflicts && e.message.include?('Git Repository is empty')
298
+ @logger.warn("Repository is empty: #{e.message}")
299
+ nil # Just return nil without stack trace for empty repositories
300
+ else
301
+ @logger.error("GitHub API conflict error: #{e.message}")
302
+ nil
303
+ end
304
+ rescue Octokit::TooManyRequests => e
305
+ if attempts <= retries
306
+ sleep_time = calculate_backoff(attempts)
307
+ @logger.warn("GitHub rate limit hit (Attempt #{attempts}/#{retries}). Sleeping for #{sleep_time}s. Limit resets at: #{e.response_headers['x-ratelimit-reset'] ? Time.at(e.response_headers['x-ratelimit-reset'].to_i) : 'N/A'}")
308
+ sleep sleep_time
309
+ retry
310
+ else
311
+ @logger.error("GitHub rate limit exceeded after #{attempts} attempts. Error: #{e.message}")
312
+ nil # Indicate failure after retries
313
+ end
314
+ rescue Octokit::ServerError, Octokit::BadGateway, Net::ReadTimeout, Faraday::ConnectionFailed => e
315
+ # Retry on temporary server issues or network problems
316
+ if attempts <= retries
317
+ sleep_time = calculate_backoff(attempts)
318
+ @logger.warn("GitHub temporary error (Attempt #{attempts}/#{retries}): #{e.class}. Retrying in #{sleep_time}s.")
319
+ sleep sleep_time
320
+ retry
321
+ else
322
+ @logger.error("GitHub API error after #{attempts} attempts: #{e.class} - #{e.message}")
323
+ nil
324
+ end
325
+ rescue => e # Catch other potential Octokit errors or unexpected issues
326
+ @logger.error("Unexpected GitHub API error: #{e.class} - #{e.message}")
327
+ nil # Indicate failure
328
+ end
329
+ end
330
+
331
+ # Helper to format commit data consistently
332
+ def format_commit(commit_data, repo_full_name, commit_details = nil)
333
+ # Basic commit info
334
+ formatted = {
335
+ sha: commit_data.sha,
336
+ repo: repo_full_name,
337
+ date: commit_data.commit.author.date.iso8601,
338
+ message: commit_data.commit.message
339
+ }
340
+
341
+ # Add branch information if available
342
+ if commit_data.respond_to?(:branches) && commit_data.branches
343
+ formatted[:branches] = commit_data.branches
344
+ end
345
+
346
+ # Add stats if available from commit details
347
+ if commit_details && commit_details.stats
348
+ formatted[:stats] = {
349
+ additions: commit_details.stats.additions,
350
+ deletions: commit_details.stats.deletions,
351
+ total_changes: commit_details.stats.total
352
+ }
353
+ end
354
+
355
+ formatted
356
+ end
357
+
358
+ def calculate_backoff(attempt)
359
+ # Exponential backoff with jitter
360
+ (@config.rate_limit_sleep_base ** attempt) + rand(0.0..1.0)
361
+ end
362
+
363
+ end
364
+ end