github-daily-digest 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1191 @@
1
+ # github_daily_digest/lib/github_graphql_service.rb
2
+ require 'net/http'
3
+ require 'uri'
4
+ require 'json'
5
+ require 'time'
6
+ require 'ostruct'
7
+
8
+ module GithubDailyDigest
9
+ class GithubGraphQLService
10
+ # GitHub GraphQL API endpoint
11
+ GITHUB_API_URL = 'https://api.github.com/graphql'
12
+
13
+ # New query to fetch commits from all branches
14
+ ALL_BRANCH_COMMITS_QUERY = <<-'GRAPHQL'
15
+ query OrgAllBranchesChanges($orgName: String!, $since: GitTimestamp!, $repoCursor: String, $refCursor: String, $commitCursor: String) {
16
+ organization(login: $orgName) {
17
+ repositories(first: 10, after: $repoCursor) { # Adjust repo page size as needed
18
+ pageInfo {
19
+ hasNextPage
20
+ endCursor
21
+ }
22
+ nodes {
23
+ name
24
+ refs(refPrefix: "refs/heads/", first: 50, after: $refCursor) { # Adjust branch page size
25
+ pageInfo {
26
+ hasNextPage
27
+ endCursor
28
+ }
29
+ nodes {
30
+ name
31
+ target {
32
+ ... on Commit {
33
+ history(since: $since, first: 100, after: $commitCursor) { # Adjust commit page size
34
+ pageInfo {
35
+ hasNextPage
36
+ endCursor
37
+ }
38
+ nodes {
39
+ oid
40
+ message
41
+ committedDate
42
+ author {
43
+ name
44
+ email
45
+ user {
46
+ login
47
+ }
48
+ }
49
+ additions
50
+ deletions
51
+ changedFiles
52
+ # Fetch associated pull requests (optional, can be heavy)
53
+ # associatedPullRequests(first: 1) {
54
+ # nodes {
55
+ # number
56
+ # title
57
+ # }
58
+ # }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ }
68
+ }
69
+ GRAPHQL
70
+
71
+ def initialize(token:, logger:, config:)
72
+ @token = token
73
+ @logger = logger
74
+ @config = config
75
+ @current_org_name = config.github_org_name.to_s.split(',').first
76
+
77
+ # Set up HTTP client for GraphQL communication
78
+ @uri = URI.parse(GITHUB_API_URL)
79
+ @http = Net::HTTP.new(@uri.host, @uri.port)
80
+ @http.use_ssl = true
81
+
82
+ # Initialize request headers
83
+ @headers = {
84
+ 'Authorization' => "Bearer #{token}",
85
+ 'Content-Type' => 'application/json',
86
+ 'User-Agent' => 'GitHub-Daily-Digest/1.0'
87
+ }
88
+
89
+ verify_authentication
90
+ rescue => e
91
+ @logger.fatal("GitHub GraphQL initialization failed: #{e.message}")
92
+ raise
93
+ end
94
+
95
+ def fetch_members(org_name)
96
+ @logger.info("Fetching members for organization: #{org_name} via GraphQL")
97
+
98
+ query_string = <<-GRAPHQL
99
+ query($org_name: String!) {
100
+ organization(login: $org_name) {
101
+ membersWithRole(first: 100) {
102
+ nodes {
103
+ login
104
+ name
105
+ }
106
+ }
107
+ }
108
+ }
109
+ GRAPHQL
110
+
111
+ response = execute_query(query_string, variables: { org_name: org_name })
112
+
113
+ if response && response["data"] && response["data"]["organization"] && response["data"]["organization"]["membersWithRole"]
114
+ members = response["data"]["organization"]["membersWithRole"]["nodes"].map { |node| node["login"] }
115
+ @logger.info("Found #{members.count} members via GraphQL.")
116
+ members
117
+ else
118
+ @logger.error("Could not fetch members for #{org_name} via GraphQL.")
119
+ [] # Return empty array on failure
120
+ end
121
+ rescue => e
122
+ @logger.error("Failed to fetch organization members: #{e.message}")
123
+ []
124
+ end
125
+
126
+ def fetch_active_repos(org_name, since_time)
127
+ @logger.info("Fetching active repositories for organization: #{org_name} since #{since_time} via GraphQL")
128
+
129
+ since_time_formatted = Time.parse(since_time.to_s).iso8601
130
+
131
+ query_string = <<-GRAPHQL
132
+ query($org_name: String!, $since_date: GitTimestamp!) {
133
+ organization(login: $org_name) {
134
+ repositories(first: 50) {
135
+ nodes {
136
+ name
137
+ nameWithOwner
138
+ isPrivate
139
+ isFork
140
+ createdAt
141
+ updatedAt
142
+ stargazerCount
143
+ forkCount
144
+ diskUsage
145
+ languages(first: 10, orderBy: {field: SIZE, direction: DESC}) {
146
+ edges {
147
+ size
148
+ node {
149
+ name
150
+ color
151
+ }
152
+ }
153
+ }
154
+ defaultBranchRef {
155
+ name
156
+ }
157
+ }
158
+ }
159
+ }
160
+ }
161
+ GRAPHQL
162
+
163
+ response = execute_query(query_string, variables: {
164
+ org_name: org_name,
165
+ since_date: since_time_formatted
166
+ })
167
+
168
+ active_repos = {}
169
+
170
+ if response && response["data"] && response["data"]["organization"]
171
+ repos = response["data"]["organization"]["repositories"]["nodes"]
172
+ total_count = response["data"]["organization"]["repositories"]["totalCount"]
173
+
174
+ @logger.info("Found #{repos.size} repositories (out of #{total_count} total) in #{org_name}")
175
+
176
+ repos.each do |repo|
177
+ repo_full_name = repo["nameWithOwner"]
178
+
179
+ # Extract primary language
180
+ primary_language = nil
181
+ if repo["languages"] && repo["languages"]["edges"] && !repo["languages"]["edges"].empty?
182
+ lang_edge = repo["languages"]["edges"][0]
183
+ primary_language = {
184
+ name: lang_edge["node"]["name"],
185
+ color: lang_edge["node"]["color"],
186
+ size: lang_edge["size"]
187
+ }
188
+ end
189
+
190
+ # Build stats object
191
+ repo_stats = {
192
+ name: repo["name"],
193
+ full_name: repo_full_name,
194
+ private: repo["isPrivate"],
195
+ fork: repo["isFork"],
196
+ created_at: repo["createdAt"],
197
+ updated_at: repo["updatedAt"],
198
+ stars: repo["stargazerCount"],
199
+ forks: repo["forkCount"],
200
+ size: repo["diskUsage"],
201
+ default_branch: repo["defaultBranchRef"] ? repo["defaultBranchRef"]["name"] : nil,
202
+ primary_language: primary_language
203
+ }
204
+
205
+ # Check if the repository has been updated since the given time
206
+ if Time.parse(repo["updatedAt"]) >= Time.parse(since_time)
207
+ active_repos[repo_full_name] = repo_stats
208
+ end
209
+ end
210
+ end
211
+
212
+ if active_repos.empty?
213
+ @logger.warn("No active repositories found with commits since #{since_time}")
214
+ else
215
+ @logger.info("Found #{active_repos.size} active repositories with commits via GraphQL")
216
+ end
217
+
218
+ active_repos
219
+ rescue => e
220
+ @logger.error("Failed to fetch active repos via GraphQL: #{e.message}")
221
+ {}
222
+ end
223
+
224
+ def fetch_pull_request_reviews(org_name, since_time)
225
+ @logger.info("Fetching pull request reviews for organization: #{org_name} since #{since_time} via GraphQL")
226
+
227
+ # Convert since_time to ISO8601 format string
228
+ since_iso8601 = if since_time.is_a?(Time)
229
+ since_time.iso8601
230
+ elsif since_time.is_a?(String)
231
+ # Assume it's already in a valid format for the GraphQL API
232
+ since_time
233
+ else
234
+ # Fallback to current time minus 7 days if invalid
235
+ @logger.warn("Invalid since_time format: #{since_time.inspect}, using default (7 days ago)")
236
+ (Time.now - 7*24*60*60).iso8601
237
+ end
238
+
239
+ # Parse the since time for filtering
240
+ since_time_parsed = Time.parse(since_iso8601) rescue Time.now - 7*24*60*60
241
+
242
+ # Reduce the query size to avoid hitting the node limit (505,050 > 500,000)
243
+ # We'll fetch fewer repositories per page and fewer PRs per repository
244
+ query_string = <<-GRAPHQL
245
+ query FetchPRReviews($orgName: String!) {
246
+ organization(login: $orgName) {
247
+ repositories(first: 25) {
248
+ nodes {
249
+ name
250
+ nameWithOwner
251
+ pullRequests(first: 20, orderBy: {field: UPDATED_AT, direction: DESC}) {
252
+ nodes {
253
+ number
254
+ title
255
+ url
256
+ createdAt
257
+ updatedAt
258
+ author {
259
+ login
260
+ }
261
+ reviews(first: 20) {
262
+ nodes {
263
+ author {
264
+ login
265
+ }
266
+ submittedAt
267
+ state
268
+ }
269
+ }
270
+ }
271
+ }
272
+ }
273
+ }
274
+ }
275
+ }
276
+ GRAPHQL
277
+
278
+ response = execute_query(query_string, variables: {
279
+ orgName: org_name
280
+ })
281
+
282
+ user_reviews = {}
283
+
284
+ if response && response["data"] && response["data"]["organization"]
285
+ repos = response["data"]["organization"]["repositories"]["nodes"]
286
+
287
+ repos.each do |repo|
288
+ repo_name = repo["nameWithOwner"]
289
+
290
+ if repo["pullRequests"] && repo["pullRequests"]["nodes"]
291
+ repo["pullRequests"]["nodes"].each do |pr|
292
+ # Filter by the since_time_parsed after we get the data
293
+ pr_updated_at = Time.parse(pr["updatedAt"]) rescue nil
294
+ next unless pr_updated_at && pr_updated_at >= since_time_parsed
295
+
296
+ if pr["reviews"] && pr["reviews"]["nodes"]
297
+ pr["reviews"]["nodes"].each do |review|
298
+ review_submitted_at = Time.parse(review["submittedAt"]) rescue nil
299
+ next unless review_submitted_at && review_submitted_at >= since_time_parsed
300
+ next unless review["author"] && review["author"]["login"]
301
+
302
+ reviewer = review["author"]["login"]
303
+ user_reviews[reviewer] ||= []
304
+
305
+ user_reviews[reviewer] << {
306
+ repo: repo_name,
307
+ pr_number: pr["number"],
308
+ pr_title: pr["title"],
309
+ pr_url: pr["url"],
310
+ submitted_at: review["submittedAt"],
311
+ state: review["state"]
312
+ }
313
+ end
314
+ end
315
+ end
316
+ end
317
+ end
318
+ end
319
+
320
+ @logger.info("Found #{user_reviews.keys.size} users with PR reviews")
321
+ user_reviews.each do |username, reviews|
322
+ @logger.info("User #{username} has #{reviews.size} PR reviews")
323
+ end
324
+
325
+ user_reviews
326
+ rescue => e
327
+ @logger.error("Failed to fetch PR reviews via GraphQL: #{e.message}")
328
+ {}
329
+ end
330
+
331
+ # Fetches commits from ALL branches across all repos in the organization since a given time
332
+ def fetch_all_branch_commits(org_name, since_time)
333
+ @logger.info("Fetching commits from all branches for organization: #{org_name} since #{since_time} via GraphQL")
334
+ all_commits = []
335
+ repo_cursor = nil
336
+ repo_has_next_page = true
337
+ repo_count = 0
338
+ max_repos_to_process = 100 # Increased from 50 to allow for more repos
339
+
340
+ # Convert since_time to ISO8601 format string
341
+ # Handle both Time objects and strings
342
+ since_iso8601 = if since_time.is_a?(Time)
343
+ since_time.iso8601
344
+ elsif since_time.is_a?(String)
345
+ # Assume it's already in a valid format for the GraphQL API
346
+ since_time
347
+ else
348
+ # Fallback to current time minus 7 days if invalid
349
+ @logger.warn("Invalid since_time format: #{since_time.inspect}, using default (7 days ago)")
350
+ (Time.now - 7*24*60*60).iso8601
351
+ end
352
+
353
+ @logger.info("Starting to fetch repositories for organization: #{org_name}")
354
+
355
+ while repo_has_next_page && repo_count < max_repos_to_process
356
+ repo_count += 1
357
+ @logger.info("Fetching repository page #{repo_count} for organization: #{org_name}")
358
+
359
+ repo_page_variables = { orgName: org_name, since: since_iso8601, repoCursor: repo_cursor }
360
+ repo_response = execute_query(ALL_BRANCH_COMMITS_QUERY, variables: repo_page_variables)
361
+
362
+ unless repo_response && repo_response['data'] && repo_response['data']['organization'] && repo_response['data']['organization']['repositories']
363
+ @logger.error("Failed to fetch repositories or invalid response structure: #{repo_response.inspect}")
364
+ break
365
+ end
366
+
367
+ repos_data = repo_response['data']['organization']['repositories']
368
+ repo_page_info = repos_data['pageInfo']
369
+ repo_has_next_page = repo_page_info['hasNextPage']
370
+ repo_cursor = repo_page_info['endCursor']
371
+
372
+ @logger.info("Processing #{repos_data['nodes'].size} repositories from page #{repo_count}")
373
+
374
+ repos_data['nodes'].each_with_index do |repo_node, repo_index|
375
+ repo_name = repo_node['name']
376
+ @logger.info("Processing repository #{repo_index + 1}/#{repos_data['nodes'].size}: #{repo_name}")
377
+
378
+ # Skip if no refs data is available
379
+ unless repo_node['refs'] && repo_node['refs']['nodes']
380
+ @logger.warn("No refs found for repo: #{repo_name}")
381
+ next
382
+ end
383
+
384
+ # Process each branch with pagination support
385
+ branch_cursor = nil
386
+ branch_has_next_page = repo_node['refs']['pageInfo']['hasNextPage']
387
+ branch_nodes = repo_node['refs']['nodes']
388
+
389
+ # Process initial set of branches
390
+ process_branches_for_repo(repo_name, branch_nodes, all_commits)
391
+
392
+ # Continue fetching more branches if available
393
+ while branch_has_next_page
394
+ @logger.info("Fetching additional branches for repo: #{repo_name}")
395
+ branch_variables = {
396
+ orgName: org_name,
397
+ since: since_iso8601,
398
+ repoCursor: nil, # We're targeting a specific repo
399
+ refCursor: branch_cursor
400
+ }
401
+
402
+ branch_response = execute_query(ALL_BRANCH_COMMITS_QUERY, variables: branch_variables)
403
+
404
+ # Break if we can't get valid branch data
405
+ unless branch_response && branch_response['data'] &&
406
+ branch_response['data']['organization'] &&
407
+ branch_response['data']['organization']['repositories'] &&
408
+ branch_response['data']['organization']['repositories']['nodes']
409
+ @logger.error("Failed to fetch additional branches or invalid response")
410
+ break
411
+ end
412
+
413
+ # Get the specific repo from the response
414
+ target_repo = nil
415
+ branch_response['data']['organization']['repositories']['nodes'].each do |repo|
416
+ if repo['name'] == repo_name
417
+ target_repo = repo
418
+ break
419
+ end
420
+ end
421
+
422
+ # No target repo found, break
423
+ unless target_repo && target_repo['refs']
424
+ @logger.warn("Couldn't find #{repo_name} in the branch pagination response")
425
+ break
426
+ end
427
+
428
+ # Process the new set of branches
429
+ branch_nodes = target_repo['refs']['nodes']
430
+ branch_page_info = target_repo['refs']['pageInfo']
431
+ branch_has_next_page = branch_page_info['hasNextPage']
432
+ branch_cursor = branch_page_info['endCursor']
433
+
434
+ # Process this page of branches
435
+ process_branches_for_repo(repo_name, branch_nodes, all_commits)
436
+ end
437
+ end
438
+
439
+ @logger.info("Processed repository page #{repo_count}, found #{all_commits.size} commits so far. Next page: #{repo_has_next_page}")
440
+
441
+ # Remove break statement to process all repository pages
442
+ # break unless repo_has_next_page
443
+ end
444
+
445
+ @logger.info("Completed fetching commits. Found #{all_commits.size} commits across all branches since #{since_time}")
446
+ all_commits
447
+ end
448
+
449
+ # Helper method to process branches for a repository
450
+ def process_branches_for_repo(repo_name, branch_nodes, all_commits)
451
+ branch_count = 0
452
+
453
+ branch_nodes.each do |ref_node|
454
+ branch_count += 1
455
+ branch_name = ref_node['name']
456
+ @logger.debug("Processing branch #{branch_count}: #{branch_name} in repo: #{repo_name}")
457
+
458
+ target = ref_node['target']
459
+ next unless target && target['history'] && target['history']['nodes']
460
+
461
+ # Process each commit in this branch with pagination
462
+ process_commits_for_branch(repo_name, branch_name, target['history'], all_commits)
463
+
464
+ # Handle commit pagination
465
+ commit_cursor = target['history']['pageInfo']['endCursor']
466
+ commit_has_next_page = target['history']['pageInfo']['hasNextPage']
467
+
468
+ # Fetch additional pages of commits if available
469
+ while commit_has_next_page
470
+ @logger.debug("Fetching additional commits for #{repo_name}/#{branch_name}")
471
+
472
+ # Fetch the next page of commits
473
+ commit_variables = {
474
+ owner: repo_name.split('/').first,
475
+ name: repo_name.split('/').last,
476
+ branch: branch_name,
477
+ since: since_time.is_a?(Time) ? since_time.iso8601 : since_time,
478
+ cursor: commit_cursor,
479
+ limit: 100
480
+ }
481
+
482
+ # This query would need to be defined elsewhere or use a different approach
483
+ # For now we'll log that we would fetch more commits
484
+ @logger.info("Would fetch additional commits for #{repo_name}/#{branch_name} after cursor #{commit_cursor}")
485
+
486
+ # Break for now - this would need a separate implementation
487
+ break
488
+ end
489
+ end
490
+
491
+ if branch_count > 0
492
+ @logger.info("Processed #{branch_count} branches in #{repo_name}")
493
+ end
494
+ end
495
+
496
+ # Helper method to process commits for a branch
497
+ def process_commits_for_branch(repo_name, branch_name, history_data, all_commits)
498
+ commit_count = 0
499
+ commit_found = false
500
+
501
+ # Process each commit in this branch
502
+ history_data['nodes'].each do |commit_node|
503
+ commit_count += 1
504
+ commit_found = true
505
+
506
+ # Extract author details - handle both formats
507
+ # First try the user.login format from the GraphQL query
508
+ github_login = commit_node.dig('author', 'user', 'login')
509
+
510
+ # If that's not available, fall back to name/email
511
+ author_details = if github_login
512
+ # If we have a GitHub login, use that as the primary identifier
513
+ { name: github_login, email: nil }
514
+ else
515
+ # Otherwise use the name/email from the commit
516
+ {
517
+ name: commit_node.dig('author', 'name'),
518
+ email: commit_node.dig('author', 'email')
519
+ }
520
+ end
521
+
522
+ # Create the commit payload with nested structure
523
+ commit_payload = {
524
+ repo: repo_name,
525
+ branch: branch_name,
526
+ commit: {
527
+ oid: commit_node['oid'],
528
+ message: commit_node['message'],
529
+ committedDate: commit_node['committedDate'],
530
+ author: author_details,
531
+ additions: commit_node['additions'],
532
+ deletions: commit_node['deletions'],
533
+ changedFiles: commit_node['changedFiles'] || 0
534
+ }
535
+ }
536
+
537
+ # Log the author information for debugging
538
+ @logger.debug("Commit in #{repo_name}/#{branch_name} by author: #{author_details[:name] || 'unknown'}")
539
+
540
+ all_commits << commit_payload
541
+ end
542
+
543
+ if commit_found
544
+ @logger.info("Found #{commit_count} commits in #{repo_name}/#{branch_name}")
545
+ end
546
+ end
547
+
548
+ # Maps commits to users
549
+ def map_commits_to_users(commits) # Takes the structured commits from fetch_all_branch_commits
550
+ @logger.info("Mapping commits to users")
551
+ user_commits = Hash.new { |h, k| h[k] = [] }
552
+
553
+ commits.each do |commit_data|
554
+ commit = commit_data[:commit]
555
+ next unless commit && commit[:author] # Ensure commit and author data exist
556
+
557
+ # Use author's name as the key, fallback to email if name is nil/empty
558
+ author_name = commit[:author][:name]
559
+ author_email = commit[:author][:email]
560
+ username = (author_name && !author_name.strip.empty?) ? author_name : author_email
561
+
562
+ # Skip if no valid identifier found (name or email)
563
+ next unless username && !username.strip.empty?
564
+
565
+ # Use email as a key if the name is generic like "GitHub" or "GitHub Action"
566
+ if ['GitHub', 'GitHub Action', 'github-actions[bot]'].include?(username) && author_email && !author_email.empty?
567
+ username = author_email
568
+ end
569
+
570
+ # Skip common bot emails unless they are the only identifier
571
+ if username.end_with?('[bot]@users.noreply.github.com') && !(author_name && !author_name.strip.empty?)
572
+ # Allow bot commits if they have a specific name, otherwise potentially skip
573
+ # Or decide how to handle bot commits specifically
574
+ @logger.debug("Skipping potential bot commit without specific author name: #{author_email}")
575
+ # next # Uncomment this line to skip bot commits without specific names
576
+ end
577
+
578
+
579
+ user_commits[username] << {
580
+ repo: commit_data[:repo],
581
+ branch: commit_data[:branch],
582
+ sha: commit[:oid],
583
+ message: commit[:message],
584
+ date: Time.parse(commit[:committedDate]),
585
+ stats: { additions: commit[:additions], deletions: commit[:deletions] },
586
+ files: commit[:changedFiles] # changedFiles is the count
587
+ }
588
+ end
589
+
590
+ @logger.info("Mapped commits from #{user_commits.keys.size} unique users/emails")
591
+ user_commits
592
+ end
593
+
594
+ # Fetch repository statistics for a specific organization
595
+ def fetch_repository_stats(org_name, since_time = nil)
596
+ @logger.info("Fetching repository statistics for organization: #{org_name} via GraphQL")
597
+
598
+ # Convert since_time to ISO8601 format string if provided
599
+ since_iso8601 = if since_time
600
+ if since_time.is_a?(Time)
601
+ since_time.iso8601
602
+ elsif since_time.is_a?(String)
603
+ since_time
604
+ else
605
+ nil
606
+ end
607
+ else
608
+ nil
609
+ end
610
+
611
+ query_string = <<-GRAPHQL
612
+ query FetchRepoStats($orgName: String!) {
613
+ organization(login: $orgName) {
614
+ repositories(first: 100) {
615
+ totalCount
616
+ nodes {
617
+ name
618
+ nameWithOwner
619
+ isPrivate
620
+ isFork
621
+ createdAt
622
+ updatedAt
623
+ stargazerCount
624
+ forkCount
625
+ diskUsage
626
+ languages(first: 10, orderBy: {field: SIZE, direction: DESC}) {
627
+ edges {
628
+ size
629
+ node {
630
+ name
631
+ color
632
+ }
633
+ }
634
+ }
635
+ defaultBranchRef {
636
+ name
637
+ }
638
+ }
639
+ }
640
+ }
641
+ }
642
+ GRAPHQL
643
+
644
+ response = execute_query(query_string, variables: {
645
+ orgName: org_name
646
+ })
647
+
648
+ active_repos = {}
649
+
650
+ if response && response["data"] && response["data"]["organization"]
651
+ repos = response["data"]["organization"]["repositories"]["nodes"]
652
+ total_count = response["data"]["organization"]["repositories"]["totalCount"]
653
+
654
+ @logger.info("Found #{repos.size} repositories (out of #{total_count} total) in #{org_name}")
655
+
656
+ repos.each do |repo|
657
+ repo_full_name = repo["nameWithOwner"]
658
+
659
+ # Extract primary language
660
+ primary_language = nil
661
+ if repo["languages"] && repo["languages"]["edges"] && !repo["languages"]["edges"].empty?
662
+ lang_edge = repo["languages"]["edges"][0]
663
+ primary_language = {
664
+ name: lang_edge["node"]["name"],
665
+ color: lang_edge["node"]["color"],
666
+ size: lang_edge["size"]
667
+ }
668
+ end
669
+
670
+ # Build stats object
671
+ repo_stats = {
672
+ name: repo["name"],
673
+ full_name: repo_full_name,
674
+ private: repo["isPrivate"],
675
+ fork: repo["isFork"],
676
+ created_at: repo["createdAt"],
677
+ updated_at: repo["updatedAt"],
678
+ stars: repo["stargazerCount"],
679
+ forks: repo["forkCount"],
680
+ size: repo["diskUsage"],
681
+ default_branch: repo["defaultBranchRef"] ? repo["defaultBranchRef"]["name"] : nil,
682
+ primary_language: primary_language
683
+ }
684
+
685
+ # Check if the repository has been updated since the given time
686
+ if since_time.nil? || (repo["updatedAt"] && Time.parse(repo["updatedAt"]) >= Time.parse(since_iso8601))
687
+ active_repos[repo_full_name] = repo_stats
688
+ end
689
+ end
690
+ end
691
+
692
+ @logger.info("Found #{active_repos.size} repositories (out of #{total_count} total) in #{org_name}")
693
+ active_repos
694
+ rescue => e
695
+ @logger.error("Failed to fetch repository stats via GraphQL: #{e.message}")
696
+ {}
697
+ end
698
+
699
+ # Fetch detailed user profile information for a list of usernames
700
+ def fetch_user_profiles(usernames)
701
+ @logger.info("Fetching user profiles for #{usernames.size} users via GraphQL")
702
+
703
+ user_profiles = {}
704
+
705
+ # Process in smaller batches to avoid hitting GraphQL complexity limits
706
+ usernames.each_slice(10) do |batch|
707
+ batch_response = fetch_user_profiles_batch(batch)
708
+ user_profiles.merge!(batch_response) if batch_response
709
+ sleep(0.5) # Small delay to avoid rate limits
710
+ end
711
+
712
+ @logger.info("Retrieved profile data for #{user_profiles.size} users")
713
+ user_profiles
714
+ end
715
+
716
+ # Helper method to fetch user profiles in batches
717
+ def fetch_user_profiles_batch(usernames)
718
+ # Build dynamic query with user variables
719
+ query_parts = []
720
+ variables = {}
721
+
722
+ usernames.each_with_index do |username, index|
723
+ alias_name = "user#{index}"
724
+ query_parts << "#{alias_name}: user(login: $#{alias_name}) { login name avatarUrl bio websiteUrl createdAt company location }"
725
+ variables[alias_name.to_sym] = username
726
+ end
727
+
728
+ query_string = <<-GRAPHQL
729
+ query(#{usernames.each_with_index.map { |_, i| "$user#{i}: String!" }.join(', ')}) {
730
+ #{query_parts.join("\n")}
731
+ }
732
+ GRAPHQL
733
+
734
+ response = execute_query(query_string, variables: variables)
735
+
736
+ user_profiles = {}
737
+
738
+ if response && response["data"]
739
+ # Process each user in the response
740
+ usernames.each_with_index do |_, index|
741
+ alias_name = "user#{index}"
742
+ user_data = response["data"][alias_name]
743
+
744
+ if user_data
745
+ user_profiles[user_data["login"]] = {
746
+ login: user_data["login"],
747
+ name: user_data["name"],
748
+ avatar_url: user_data["avatarUrl"],
749
+ bio: user_data["bio"],
750
+ website: user_data["websiteUrl"],
751
+ created_at: user_data["createdAt"],
752
+ company: user_data["company"],
753
+ location: user_data["location"]
754
+ }
755
+ end
756
+ end
757
+ end
758
+
759
+ user_profiles
760
+ end
761
+
762
+ # Fetch trending repositories within the organization in a specific time period
763
+ def fetch_trending_repositories(org_name, since_time)
764
+ @logger.info("Fetching trending repositories for organization: #{org_name} since #{since_time} via GraphQL")
765
+
766
+ since_time_formatted = Time.parse(since_time.to_s).iso8601
767
+ since_time_parsed = Time.parse(since_time_formatted)
768
+
769
+ query_string = <<-GRAPHQL
770
+ query($org_name: String!, $since_date: GitTimestamp!) {
771
+ organization(login: $org_name) {
772
+ repositories(first: 50, orderBy: {field: UPDATED_AT, direction: DESC}) {
773
+ nodes {
774
+ name
775
+ nameWithOwner
776
+ updatedAt
777
+ stargazerCount
778
+ forkCount
779
+ watchers { totalCount }
780
+ defaultBranchRef {
781
+ target {
782
+ ... on Commit {
783
+ history(since: $since_date) {
784
+ totalCount
785
+ }
786
+ }
787
+ }
788
+ }
789
+ issues(states: OPEN) {
790
+ totalCount
791
+ }
792
+ pullRequests(states: OPEN) {
793
+ totalCount
794
+ }
795
+ }
796
+ }
797
+ }
798
+ }
799
+ GRAPHQL
800
+
801
+ response = execute_query(query_string, variables: {
802
+ org_name: org_name,
803
+ since_date: since_time_formatted
804
+ })
805
+
806
+ trending_repos = []
807
+
808
+ if response && response["data"] && response["data"]["organization"] && response["data"]["organization"]["repositories"]
809
+ repos = response["data"]["organization"]["repositories"]["nodes"]
810
+
811
+ repos.each do |repo|
812
+ next unless repo["defaultBranchRef"] && repo["defaultBranchRef"]["target"] &&
813
+ repo["defaultBranchRef"]["target"]["history"]
814
+
815
+ commit_count = repo["defaultBranchRef"]["target"]["history"]["totalCount"]
816
+ issue_count = repo["issues"]["totalCount"]
817
+ pr_count = repo["pullRequests"]["totalCount"]
818
+ updated_at = Time.parse(repo["updatedAt"]) rescue nil
819
+
820
+ # Skip repositories not updated since the provided time
821
+ next unless updated_at && updated_at >= since_time_parsed
822
+
823
+ # Score based on activity - higher score means more active
824
+ activity_score = (commit_count * 3) + (pr_count * 5) + (issue_count * 2)
825
+
826
+ if activity_score > 0
827
+ trending_repos << {
828
+ name: repo["name"],
829
+ full_name: repo["nameWithOwner"],
830
+ updated_at: repo["updatedAt"],
831
+ commits: commit_count,
832
+ issues: issue_count,
833
+ pull_requests: pr_count,
834
+ stars: repo["stargazerCount"],
835
+ forks: repo["forkCount"],
836
+ watchers: repo["watchers"]["totalCount"],
837
+ activity_score: activity_score
838
+ }
839
+ end
840
+ end
841
+
842
+ # Sort by activity score (highest first)
843
+ trending_repos.sort_by! { |repo| -repo[:activity_score] }
844
+ end
845
+
846
+ trending_repos
847
+ end
848
+
849
+ # Fetch code changes for a specific commit
850
+ def fetch_commit_changes(repo_name, commit_oid)
851
+ @logger.debug("Fetching code changes for commit: #{commit_oid} in repo: #{repo_name}")
852
+
853
+ query_string = <<-GRAPHQL
854
+ query($owner: String!, $repo: String!, $oid: GitObjectID!) {
855
+ repository(owner: $owner, name: $repo) {
856
+ object(oid: $oid) {
857
+ ... on Commit {
858
+ oid
859
+ additions
860
+ deletions
861
+ changedFiles
862
+ # Use commitResourcePath to get the URL that can be used for REST API fallback
863
+ commitResourcePath
864
+ }
865
+ }
866
+ }
867
+ }
868
+ GRAPHQL
869
+
870
+ # Split the repo name into owner and repo parts
871
+ owner, repo = repo_name.split('/')
872
+
873
+ unless owner && repo
874
+ @logger.error("Invalid repository name format: #{repo_name}. Expected format: 'owner/repo'")
875
+ return {}
876
+ end
877
+
878
+ response = execute_query(query_string, variables: {
879
+ owner: owner,
880
+ repo: repo,
881
+ oid: commit_oid
882
+ })
883
+
884
+ if response && response["data"] && response["data"]["repository"] &&
885
+ response["data"]["repository"]["object"]
886
+
887
+ commit_object = response["data"]["repository"]["object"]
888
+
889
+ # Since GraphQL doesn't provide files directly, try to fetch them via REST API
890
+ files_data = fetch_commit_files_via_rest(owner, repo, commit_oid)
891
+
892
+ return {
893
+ oid: commit_object["oid"],
894
+ additions: commit_object["additions"],
895
+ deletions: commit_object["deletions"],
896
+ changed_files: commit_object["changedFiles"],
897
+ files: files_data
898
+ }
899
+ end
900
+
901
+ # Return empty hash if we couldn't get the changes
902
+ @logger.warn("Could not fetch changes for commit: #{commit_oid}")
903
+ {}
904
+ rescue => e
905
+ @logger.error("Error fetching commit changes: #{e.message}")
906
+ {}
907
+ end
908
+
909
+ # Fetch commit files using REST API as a fallback
910
+ def fetch_commit_files_via_rest(owner, repo, commit_oid)
911
+ @logger.debug("Fetching commit files via REST API for #{owner}/#{repo} commit: #{commit_oid}")
912
+
913
+ begin
914
+ # Create a new Net::HTTP client instance for REST API access
915
+ uri = URI.parse("https://api.github.com/repos/#{owner}/#{repo}/commits/#{commit_oid}")
916
+ http = Net::HTTP.new(uri.host, uri.port)
917
+ http.use_ssl = true
918
+
919
+ headers = {
920
+ "Authorization" => "Bearer #{@token}",
921
+ "User-Agent" => "GitHub-Daily-Digest",
922
+ "Content-Type" => "application/json"
923
+ }
924
+
925
+ response = http.get(uri.path, headers)
926
+
927
+ # Check if response is HTML instead of JSON (common error when rate limited or auth issues)
928
+ if response.body.strip.start_with?('<!DOCTYPE', '<html')
929
+ raise "Received HTML response instead of JSON. This usually indicates rate limiting or authentication issues. Status: #{response.code}"
930
+ end
931
+
932
+ # Check for non-200 status codes
933
+ unless response.code.to_i == 200
934
+ raise "GitHub API returned non-200 status code: #{response.code}, body: #{response.body[0..100]}"
935
+ end
936
+
937
+ # Parse the JSON response
938
+ parsed_response = JSON.parse(response.body)
939
+
940
+ # Extract file details
941
+ if parsed_response && parsed_response["files"]
942
+ @logger.debug("Successfully fetched #{parsed_response["files"].count} changed files via REST API")
943
+
944
+ return parsed_response["files"].map do |file|
945
+ {
946
+ path: file["filename"],
947
+ additions: file["additions"],
948
+ deletions: file["deletions"],
949
+ patch: file["patch"]
950
+ }
951
+ end
952
+ end
953
+ rescue => e
954
+ @logger.error("Error fetching commit files via REST API: #{e.message}")
955
+ end
956
+
957
+ # Return empty array if REST fallback fails
958
+ []
959
+ end
960
+
961
+ # Fetch changes for a batch of commits
962
+ def fetch_commits_changes(commits, max_commits = 100)
963
+ return [] if commits.nil? || commits.empty?
964
+
965
+ @logger.info("Fetching code changes for #{[commits.size, max_commits].min} of #{commits.size} commits")
966
+
967
+ # Debug logging to see commit structure
968
+ if commits.first
969
+ @logger.debug("Sample commit structure: #{commits.first.inspect}")
970
+ end
971
+
972
+ # Filter out any commits with invalid structure
973
+ valid_commits = commits.select do |commit|
974
+ commit && commit[:repo] && commit[:sha]
975
+ end
976
+
977
+ if valid_commits.empty?
978
+ @logger.warn("No valid commits found with required data (commit hash and repo)")
979
+ return []
980
+ end
981
+
982
+ # Log the first valid commit structure
983
+ if valid_commits.first
984
+ @logger.debug("Valid commit sample: #{valid_commits.first.inspect}")
985
+ end
986
+
987
+ # Limit to the most recent commits to avoid overloading the API
988
+ commits_to_process = valid_commits.sort_by do |c|
989
+ Time.parse(c[:date].to_s) rescue Time.now
990
+ end.reverse.first(max_commits)
991
+
992
+ commits_with_changes = commits_to_process.map do |commit|
993
+ begin
994
+ # Fetch changes for this commit
995
+ repo_name = commit[:repo]
996
+ commit_oid = commit[:sha]
997
+
998
+ # Format the repo name correctly for the GitHub API
999
+ # Determine the correct organization for this repository
1000
+ if repo_name.include?('/')
1001
+ # If repo already has owner/name format, use it as is
1002
+ repo_full_name = repo_name
1003
+ else
1004
+ # If just repo name, add the current organization name
1005
+ org_name = @current_org_name || @config.github_org_name.to_s.split(',').first
1006
+ repo_full_name = "#{org_name}/#{repo_name}"
1007
+ end
1008
+
1009
+ @logger.debug("Fetching changes for commit #{commit_oid} in repo #{repo_full_name}")
1010
+
1011
+ changes = fetch_commit_changes(repo_full_name, commit_oid)
1012
+
1013
+ # Add the changes to the commit data
1014
+ commit.merge(code_changes: changes)
1015
+ rescue => e
1016
+ @logger.error("Error processing commit changes: #{e.message}")
1017
+ # Return the original commit without changes in case of error
1018
+ commit.merge(code_changes: {})
1019
+ end
1020
+ end
1021
+
1022
+ @logger.info("Successfully fetched changes for #{commits_with_changes.count} commits")
1023
+ commits_with_changes
1024
+ end
1025
+
1026
+ # Verify GraphQL API authentication and check permissions
1027
+ def verify_authentication
1028
+ @logger.info("Verifying GitHub GraphQL authentication...")
1029
+
1030
+ # Simple query to verify authentication - viewerHasScopes doesn't exist in GitHub's GraphQL API
1031
+ query = <<-GRAPHQL
1032
+ query {
1033
+ viewer {
1034
+ login
1035
+ }
1036
+ }
1037
+ GRAPHQL
1038
+
1039
+ response = execute_query(query, variables: {})
1040
+
1041
+ if response && response['data'] && response['data']['viewer']
1042
+ username = response['data']['viewer']['login']
1043
+ @logger.info("Authenticated to GitHub GraphQL API as user: #{username}")
1044
+
1045
+ # We can't check scopes via GraphQL, so we'll use a separate method to check token scopes
1046
+ check_token_scopes
1047
+
1048
+ return true
1049
+ else
1050
+ error_message = if response && response['errors']
1051
+ response['errors'].map { |e| e['message'] }.join(', ')
1052
+ else
1053
+ "Unknown error"
1054
+ end
1055
+ @logger.error("Failed to authenticate to GitHub GraphQL API: #{error_message}")
1056
+ raise "GraphQL authentication failed: #{error_message}"
1057
+ end
1058
+ end
1059
+
1060
+ # Check token scopes using REST API since GraphQL doesn't provide this information
1061
+ def check_token_scopes
1062
+ begin
1063
+ uri = URI.parse("https://api.github.com/user")
1064
+ http = Net::HTTP.new(uri.host, uri.port)
1065
+ http.use_ssl = true
1066
+
1067
+ request = Net::HTTP::Get.new(uri.request_uri)
1068
+ request["Authorization"] = "Bearer #{@token}"
1069
+ request["User-Agent"] = "GitHub-Daily-Digest/1.0"
1070
+
1071
+ response = http.request(request)
1072
+
1073
+ if response.code == "200"
1074
+ scopes = response["X-OAuth-Scopes"]&.split(", ") || []
1075
+ @logger.info("Token scopes: #{scopes.join(', ')}")
1076
+ @logger.info("Token has repo scope: #{scopes.include?('repo')}")
1077
+ @logger.info("Token has org read scope: #{scopes.include?('read:org')}")
1078
+
1079
+ return true
1080
+ else
1081
+ @logger.warn("Could not verify token scopes: HTTP #{response.code}")
1082
+ return false
1083
+ end
1084
+ rescue => e
1085
+ @logger.warn("Error checking token scopes: #{e.message}")
1086
+ return false
1087
+ end
1088
+ end
1089
+
1090
+ def execute_query(query_string, variables: {})
1091
+ handle_api_errors do
1092
+ uri = URI.parse(GITHUB_API_URL)
1093
+ http = Net::HTTP.new(uri.host, uri.port)
1094
+ http.use_ssl = true
1095
+
1096
+ headers = {
1097
+ "Authorization" => "Bearer #{@token}",
1098
+ "User-Agent" => "GitHub-Daily-Digest",
1099
+ "Content-Type" => "application/json"
1100
+ }
1101
+
1102
+ body = {
1103
+ query: query_string,
1104
+ variables: variables
1105
+ }.to_json
1106
+
1107
+ response = http.post(uri.path, body, headers)
1108
+
1109
+ # Check if response is HTML instead of JSON (common error when rate limited or auth issues)
1110
+ if response.body.strip.start_with?('<!DOCTYPE', '<html')
1111
+ raise "Received HTML response instead of JSON. This usually indicates rate limiting or authentication issues. Status: #{response.code}"
1112
+ end
1113
+
1114
+ # Check for non-200 status codes
1115
+ unless response.code.to_i == 200
1116
+ raise "GitHub API returned non-200 status code: #{response.code}, body: #{response.body[0..100]}"
1117
+ end
1118
+
1119
+ # Parse the JSON response
1120
+ parsed_response = JSON.parse(response.body)
1121
+
1122
+ # Check for GraphQL errors
1123
+ if parsed_response['errors']
1124
+ error_messages = parsed_response['errors'].map { |e| e['message'] }.join(', ')
1125
+ raise "GraphQL errors: #{error_messages}"
1126
+ end
1127
+
1128
+ parsed_response
1129
+ end
1130
+ end
1131
+
1132
+ def handle_api_errors(retries = 3) # Default to 3 retries if not configured
1133
+ max_retries = @config&.max_api_retries || retries
1134
+ attempts = 0
1135
+ begin
1136
+ attempts += 1
1137
+ yield # Execute the GraphQL query block
1138
+ rescue => e
1139
+ # Check for various error types that might benefit from retrying
1140
+ should_retry = e.message.include?('rate limit') ||
1141
+ e.message.include?('timeout') ||
1142
+ e.message.include?('Received HTML response') ||
1143
+ e.message.include?('500') ||
1144
+ e.message.include?('503')
1145
+
1146
+ if should_retry && attempts <= max_retries
1147
+ sleep_time = calculate_backoff(attempts)
1148
+ @logger.warn("GitHub GraphQL API error (Attempt #{attempts}/#{max_retries}): #{e.message}")
1149
+ @logger.warn("Retrying in #{sleep_time} seconds...")
1150
+ sleep sleep_time
1151
+ retry
1152
+ else
1153
+ @logger.error("GitHub GraphQL API error: #{e.message}")
1154
+ if attempts > max_retries
1155
+ @logger.error("Exceeded maximum retry attempts (#{max_retries})")
1156
+ end
1157
+ nil # Indicate failure
1158
+ end
1159
+ end
1160
+ end
1161
+
1162
+ # Calculate exponential backoff with jitter for retries
1163
+ def calculate_backoff(attempt)
1164
+ base_delay = 2
1165
+ max_delay = 60
1166
+ # Exponential backoff: 2^attempt seconds with some randomness
1167
+ delay = [base_delay * (2 ** (attempt - 1)) * (0.5 + rand * 0.5), max_delay].min
1168
+ delay.round(1)
1169
+ end
1170
+
1171
+ def format_commit(commit_data, repo_full_name)
1172
+ # Convert GraphQL commit data to format used by the rest of the app
1173
+ author_user = commit_data["author"]["user"] if commit_data["author"]
1174
+
1175
+ {
1176
+ sha: commit_data["oid"],
1177
+ repo: repo_full_name,
1178
+ date: commit_data["committedDate"],
1179
+ message: commit_data["message"],
1180
+ author_login: author_user ? author_user["login"] : nil,
1181
+ author_name: commit_data["author"] ? commit_data["author"]["name"] : nil,
1182
+ author_email: commit_data["author"] ? commit_data["author"]["email"] : nil,
1183
+ stats: {
1184
+ additions: commit_data["additions"],
1185
+ deletions: commit_data["deletions"],
1186
+ total_changes: commit_data["additions"].to_i + commit_data["deletions"].to_i
1187
+ }
1188
+ }
1189
+ end
1190
+ end
1191
+ end