github-daily-digest 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,932 @@
1
+ # github_daily_digest/lib/daily_digest_runner.rb
2
+ require 'json'
3
+ require 'time'
4
+ require 'set'
5
+
6
+ # Require dependent classes (adjust paths if structure differs)
7
+ require_relative 'configuration'
8
+ require_relative 'github_service'
9
+ require_relative 'github_graphql_service'
10
+ require_relative 'activity_analyzer'
11
+ require_relative 'gemini_service'
12
+ require_relative 'output_formatter'
13
+ require_relative 'html_formatter'
14
+
15
+ module GithubDailyDigest
16
+ class DailyDigestRunner
17
+ # Delay between processing users
18
+ USER_PROCESSING_DELAY = 1 # second
19
+
20
+ def initialize(config:, logger:, use_graphql: nil)
21
+ @config = config
22
+ @logger = logger
23
+
24
+ # Default to using GraphQL API unless explicitly disabled
25
+ @use_graphql = use_graphql.nil? ? (!config.no_graphql) : use_graphql
26
+
27
+ # Initialize REST API service (always needed for some operations)
28
+ @github_service = GithubService.new(token: config.github_token, logger: @logger, config: config)
29
+
30
+ # Initialize GraphQL service if enabled
31
+ if @use_graphql
32
+ @logger.info("Initializing GitHub GraphQL service")
33
+ begin
34
+ @github_graphql_service = GithubGraphQLService.new(token: config.github_token, logger: @logger, config: config)
35
+ # Verify GraphQL authentication
36
+ @github_graphql_service.verify_authentication
37
+ @logger.info("GitHub GraphQL service successfully initialized")
38
+ rescue => e
39
+ @logger.warn("Failed to initialize GraphQL service: #{e.message}. Falling back to REST API.")
40
+ @use_graphql = false
41
+ @github_graphql_service = nil
42
+ end
43
+ end
44
+
45
+ @gemini_service = GeminiService.new(
46
+ api_key: config.gemini_api_key,
47
+ logger: @logger,
48
+ config: config,
49
+ github_graphql_service: @github_graphql_service
50
+ )
51
+ @analyzer = ActivityAnalyzer.new(gemini_service: @gemini_service, github_graphql_service: @github_graphql_service, logger: @logger)
52
+ end
53
+
54
+ def run
55
+ @logger.info("Starting GitHub Daily Digest")
56
+ @logger.debug("Debug mode enabled with log level: #{@logger.level}")
57
+
58
+ # Verify GitHub authentication via the client
59
+ @logger.info("Verifying GitHub authentication...")
60
+ begin
61
+ user = @github_service.get_current_user
62
+ if user
63
+ @logger.info("Authenticated to GitHub as user: #{user[:login]}")
64
+ else
65
+ @logger.fatal("GitHub authentication failed: Unable to get user information")
66
+ return false
67
+ end
68
+ rescue => e
69
+ @logger.fatal("GitHub authentication failed: #{e.message}")
70
+ @logger.debug("Authentication error backtrace: #{e.backtrace.join("\n")}")
71
+ return false
72
+ end
73
+
74
+ # Log where results will be output
75
+ if @config.output_to_stdout
76
+ @logger.info("Results will be output directly")
77
+ else
78
+ @logger.info("Results will be saved to file")
79
+ end
80
+
81
+ # Process all organization data
82
+ @logger.info("Starting daily digest process for organization(s): #{@config.github_org_name}")
83
+ @logger.info("Fetching data since: #{@config.time_since}")
84
+ @logger.info("Using GraphQL API: #{@use_graphql ? 'Yes' : 'No'}")
85
+ @logger.info("Output format: #{@config.output_formats}")
86
+
87
+ begin
88
+ results_by_org = process_organizations
89
+
90
+ # Process results into desired format (JSON, Markdown, or HTML)
91
+ @logger.info("Processing results into #{@config.output_formats} format")
92
+ result = process_results(results_by_org, @config.specific_users)
93
+
94
+ @logger.info("Execution finished successfully.")
95
+ return true
96
+ rescue => e
97
+ @logger.fatal("Error during execution: #{e.message}")
98
+ @logger.error("Error backtrace: #{e.backtrace.join("\n")}")
99
+ return false
100
+ end
101
+ rescue => e
102
+ @logger.fatal("Unhandled error: #{e.message}")
103
+ @logger.error("Unhandled error backtrace: #{e.backtrace.join("\n")}")
104
+ return false
105
+ end
106
+
107
+ # Process a single organization
108
+ def process_organization(org_name)
109
+ @logger.info("Processing organization #{org_name}")
110
+
111
+ # Set current organization in config and GraphQL service
112
+ original_org_name = @config.github_org_name
113
+ @config.instance_variable_set(:@github_org_name, org_name)
114
+ @github_graphql_service.instance_variable_set(:@current_org_name, org_name) if @github_graphql_service
115
+
116
+ begin
117
+ # Process based on API type and user selection
118
+ if @config.specific_users && !@config.specific_users.empty?
119
+ # If specific users were provided, process just those
120
+ if @use_graphql
121
+ org_results = process_specific_users_for_organization(org_name)
122
+ else
123
+ @logger.warn("Specific user processing is currently only supported with GraphQL API")
124
+ org_results = process_organization_with_rest(org_name)
125
+ end
126
+ else
127
+ # Process all users in the organization
128
+ if @use_graphql
129
+ org_results = process_organization_with_graphql(org_name)
130
+ else
131
+ org_results = process_organization_with_rest(org_name)
132
+ end
133
+ end
134
+
135
+ return org_results
136
+ ensure
137
+ # Restore original org name in config
138
+ @config.instance_variable_set(:@github_org_name, original_org_name)
139
+ end
140
+ end
141
+
142
+ # Process all organizations and return combined results
143
+ def process_organizations
144
+ org_names = @config.github_org_names
145
+
146
+ # Initialize results hash
147
+ all_org_results = {}
148
+
149
+ # Process each organization separately
150
+ org_names.each_with_index do |org_name, index|
151
+ @logger.info("===== Processing organization: #{org_name} (#{index+1}/#{org_names.size}) =====")
152
+
153
+ org_results = process_organization(org_name)
154
+
155
+ # Add results to the combined hash, using org name as namespace
156
+ all_org_results[org_name] = org_results
157
+ end
158
+
159
+ @logger.info("Daily digest process completed successfully for all organizations.")
160
+
161
+ # Save results to file
162
+ save_results(all_org_results)
163
+
164
+ return all_org_results
165
+ end
166
+
167
+ # Refactored method to process a user's activity data and analyze it
168
+ def process_user_activity(username, activity_data, time_window_days)
169
+ @logger.info("--------------------------------------------------")
170
+ @logger.info("Processing user: #{username}")
171
+
172
+ # Analyze the activity
173
+ analysis = @analyzer.analyze(
174
+ username: username,
175
+ activity_data: activity_data,
176
+ time_window_days: time_window_days
177
+ )
178
+ return analysis
179
+ end
180
+
181
+ # Common time window calculation used in multiple places
182
+ def calculate_time_window_days
183
+ ((Time.now - Time.parse(@config.time_since)) / 86400).round
184
+ end
185
+
186
+ def process_organization_with_graphql(org_name)
187
+ @logger.info("Processing organization #{org_name} using GraphQL API")
188
+
189
+ # 1. Fetch commits from all branches across all active repos via GraphQL
190
+ all_commits_data = @github_graphql_service.fetch_all_branch_commits(org_name, @config.time_since)
191
+
192
+ # 2. Map all commits to their respective users
193
+ user_commits_map = @github_graphql_service.map_commits_to_users(all_commits_data)
194
+
195
+ # 3. Get all PR review data
196
+ user_reviews_map = @github_graphql_service.fetch_pull_request_reviews(org_name, @config.time_since)
197
+
198
+ # 4. Get repository statistics
199
+ repo_stats = @github_graphql_service.fetch_repository_stats(org_name)
200
+
201
+ # 5. Get trending repositories
202
+ trending_repos = @github_graphql_service.fetch_trending_repositories(org_name, @config.time_since)
203
+
204
+ # 6. Process all relevant users
205
+ all_user_analysis = {}
206
+ time_window_days = calculate_time_window_days
207
+
208
+ # Get all active users (those with commits or reviews)
209
+ active_users = (user_commits_map.keys + user_reviews_map.keys).uniq
210
+
211
+ # Filter by specific users if provided
212
+ if @config.specific_users && !@config.specific_users.empty?
213
+ original_count = active_users.size
214
+ active_users = active_users.select do |user|
215
+ @config.specific_users.any? { |specific_user| specific_user.downcase == user.downcase }
216
+ end
217
+ @logger.info("Filtered active users from #{original_count} to #{active_users.size} based on specified users")
218
+ end
219
+
220
+ # Process users with activity
221
+ active_users.each do |username|
222
+ activity_data = {
223
+ commits: user_commits_map[username] || [],
224
+ review_count: user_reviews_map[username]&.size || 0
225
+ }
226
+
227
+ all_user_analysis[username] = process_user_activity(username, activity_data, time_window_days)
228
+ end
229
+
230
+ # Add metadata to be used by the formatter
231
+ all_user_analysis[:_meta] = {
232
+ api_type: "GitHub GraphQL API",
233
+ repo_stats: repo_stats,
234
+ trending_repos: trending_repos,
235
+ generated_at: Time.now
236
+ }
237
+
238
+ @logger.info("==================================================")
239
+ @logger.info("Finished processing all users for organization: #{org_name}")
240
+
241
+ # Return the results for this organization
242
+ return all_user_analysis
243
+ end
244
+
245
+ def process_organization_with_rest(org_name)
246
+ @logger.info("Processing organization #{org_name} using REST API")
247
+
248
+ # 1. Fetch all organization members
249
+ member_logins = @github_service.fetch_members(org_name)
250
+
251
+ if member_logins.empty?
252
+ @logger.warn("No members found or error occurred fetching members for #{org_name}.")
253
+ return {}
254
+ end
255
+
256
+ # Filter members if specific users were provided
257
+ if @config.specific_users && !@config.specific_users.empty?
258
+ original_count = member_logins.size
259
+ # Filter case-insensitively
260
+ member_logins = member_logins.select do |member|
261
+ @config.specific_users.any? { |user| user.downcase == member.downcase }
262
+ end
263
+ @logger.info("Filtered members from #{original_count} to #{member_logins.size} based on specified users")
264
+ end
265
+
266
+ # 2. Get all active repositories during the time window with their commits
267
+ active_repos = @github_service.fetch_active_repos(org_name, @config.time_since)
268
+
269
+ # 3. Map all commits to their respective users (much more efficient)
270
+ user_commits_map = @github_service.map_commits_to_users(active_repos)
271
+
272
+ # Process all relevant users (members who had commits + members without commits)
273
+ all_user_analysis = {}
274
+ time_window_days = calculate_time_window_days
275
+
276
+ # First process users with commits
277
+ user_commits_map.each do |username, commits|
278
+ # Skip users who aren't members of the organization
279
+ next unless member_logins.include?(username)
280
+
281
+ activity_data = {
282
+ commits: commits,
283
+ review_count: @github_service.search_user_reviews(username, org_name, @config.time_since)
284
+ }
285
+
286
+ all_user_analysis[username] = process_user_activity(username, activity_data, time_window_days)
287
+ end
288
+
289
+ # Now process remaining members (those without commits)
290
+ member_logins.each do |username|
291
+ next if all_user_analysis[username]
292
+
293
+ activity_data = {
294
+ commits: [],
295
+ review_count: @github_service.search_user_reviews(username, org_name, @config.time_since)
296
+ }
297
+
298
+ all_user_analysis[username] = process_user_activity(username, activity_data, time_window_days)
299
+ end
300
+
301
+ # Try to get some basic repo stats for consistent output format with GraphQL
302
+ repo_stats = active_repos.map do |repo, commits|
303
+ {
304
+ name: repo.split('/').last,
305
+ path: repo,
306
+ total_commits: commits.size,
307
+ open_prs: 0 # We don't have this info in REST mode without extra API calls
308
+ }
309
+ end
310
+
311
+ # Add metadata to be used by the formatter
312
+ all_user_analysis[:_meta] = {
313
+ api_type: "GitHub REST API",
314
+ repo_stats: repo_stats,
315
+ trending_repos: [], # Not available in REST mode
316
+ generated_at: Time.now
317
+ }
318
+
319
+ @logger.info("==================================================")
320
+ @logger.info("Finished processing all users for organization: #{org_name}")
321
+
322
+ # Return the results for this organization
323
+ return all_user_analysis
324
+ end
325
+
326
+ # Process specific users for an organization using the same approach as process_organization_with_graphql
327
+ def process_specific_users_for_organization(org_name)
328
+ @logger.info("Processing specific users for organization #{org_name}")
329
+
330
+ # Save current org name in config
331
+ original_org_name = @config.github_org_name
332
+ @config.instance_variable_set(:@github_org_name, org_name)
333
+
334
+ # Use the same logic as process_organization_with_graphql but with specific users
335
+ # 1. Fetch commits from all branches across all active repos via GraphQL
336
+ all_commits_data = @github_graphql_service.fetch_all_branch_commits(org_name, @config.time_since)
337
+
338
+ # 2. Map all commits to their respective users
339
+ user_commits_map = @github_graphql_service.map_commits_to_users(all_commits_data)
340
+
341
+ # 3. Get all PR review data
342
+ user_reviews_map = @github_graphql_service.fetch_pull_request_reviews(org_name, @config.time_since)
343
+
344
+ # 4. Get repository statistics
345
+ repo_stats = @github_graphql_service.fetch_repository_stats(org_name)
346
+
347
+ # 5. Get trending repositories
348
+ trending_repos = @github_graphql_service.fetch_trending_repositories(org_name, @config.time_since)
349
+
350
+ # Process all relevant users
351
+ all_user_analysis = {}
352
+ time_window_days = calculate_time_window_days
353
+
354
+ # Process only the specified users
355
+ @config.specific_users.each do |username|
356
+ @logger.info("Processing specific user: #{username}")
357
+
358
+ # Get activity data for this specific user
359
+ activity_data = {
360
+ commits: user_commits_map[username.downcase] || user_commits_map[username] || [],
361
+ review_count: user_reviews_map[username.downcase]&.size || user_reviews_map[username]&.size || 0
362
+ }
363
+
364
+ # Process user activity
365
+ begin
366
+ all_user_analysis[username] = process_user_activity(username, activity_data, time_window_days)
367
+ rescue => e
368
+ @logger.error("Error processing user #{username}: #{e.message}")
369
+
370
+ # Add empty data for this user to avoid breaking the report
371
+ all_user_analysis[username] = {
372
+ projects: [],
373
+ changes: 0,
374
+ spent_time: "0 hours",
375
+ pr_count: 0,
376
+ summary: "Error processing activity data: #{e.message}",
377
+ lines_changed: 0,
378
+ _generated_by: "error_handler"
379
+ }
380
+ end
381
+ end
382
+
383
+ # Add metadata to be used by the formatter
384
+ all_user_analysis[:_meta] = {
385
+ api_type: "GitHub GraphQL API",
386
+ repo_stats: repo_stats,
387
+ trending_repos: trending_repos,
388
+ generated_at: Time.now
389
+ }
390
+
391
+ # Restore original org name in config
392
+ @config.instance_variable_set(:@github_org_name, original_org_name)
393
+
394
+ @logger.info("==================================================")
395
+ @logger.info("Finished processing specific users for organization: #{org_name}")
396
+
397
+ # Return the results for this organization
398
+ return all_user_analysis
399
+ end
400
+
401
+ # Main function to process results from GitHub API and format them for output
402
+ def process_results(results, specific_users = [])
403
+ @logger.info("Processing results...")
404
+
405
+ # Debug the structure of the results hash
406
+ @logger.info("Results structure: #{results.keys.join(', ')}")
407
+
408
+ # Dump the first organization structure to better understand the data
409
+ if results.keys.first && results[results.keys.first].is_a?(Hash)
410
+ org_data = results[results.keys.first]
411
+ @logger.info("First org data keys: #{org_data.keys.join(', ')}")
412
+
413
+ # Count total commits in all repositories
414
+ total_repo_commits = 0
415
+
416
+ # Check for repository data in _meta
417
+ if org_data["_meta"] && org_data["_meta"]["repos"] && org_data["_meta"]["repos"].is_a?(Hash)
418
+ org_data["_meta"]["repos"].each do |repo_name, repo_data|
419
+ if repo_data["commit_count"].to_i > 0
420
+ @logger.info("Repository #{repo_name} has #{repo_data["commit_count"]} commits")
421
+ total_repo_commits += repo_data["commit_count"].to_i
422
+ end
423
+ end
424
+ @logger.info("Total commits in all repositories: #{total_repo_commits}")
425
+ end
426
+
427
+ # Check for key users
428
+ org_data.keys.select { |k| k.is_a?(String) && !k.start_with?("_") }.take(3).each do |username|
429
+ user_data = org_data[username]
430
+ if user_data.is_a?(Hash)
431
+ @logger.info("User #{username} data keys: #{user_data.keys.join(', ')}")
432
+
433
+ # Check for commits data structure
434
+ if user_data["commits"] && user_data["commits"].is_a?(Array)
435
+ @logger.info("User #{username} has #{user_data["commits"].size} commits as an array")
436
+ end
437
+
438
+ if user_data["commit_count"]
439
+ @logger.info("User #{username} has 'commit_count': #{user_data["commit_count"]}")
440
+ end
441
+
442
+ if user_data["commits_count"]
443
+ @logger.info("User #{username} has 'commits_count': #{user_data["commits_count"]}")
444
+ end
445
+ end
446
+ end
447
+ end
448
+
449
+ # Generate summary statistics and AI description
450
+ results = generate_summary_statistics(results) if @config.gemini_api_key
451
+
452
+ # Initialize the appropriate output formatter
453
+ output_formatter = OutputFormatter.new(
454
+ config: @config,
455
+ logger: @logger
456
+ )
457
+
458
+ output_results = {}
459
+
460
+ # Process each requested output format
461
+ @config.output_formats.each do |output_format|
462
+ @logger.info("Processing results into #{output_format} format")
463
+
464
+ case output_format
465
+ when 'json'
466
+ json_output = output_formatter.format(results, 'json')
467
+ if @config.output_to_stdout
468
+ @logger.info("Writing JSON to stdout")
469
+ puts json_output
470
+ else
471
+ output_file = "github_daily_digest_#{Time.now.strftime('%Y-%m-%d')}.json"
472
+ @logger.info("Writing JSON to file: #{output_file}")
473
+ File.write(output_file, json_output)
474
+ end
475
+ output_results['json'] = json_output
476
+
477
+ when 'markdown'
478
+ markdown_output = output_formatter.format(results, 'markdown')
479
+ if @config.output_to_stdout
480
+ @logger.info("Writing Markdown to stdout")
481
+ puts markdown_output
482
+ else
483
+ output_file = "github_daily_digest_#{Time.now.strftime('%Y-%m-%d')}.md"
484
+ @logger.info("Writing Markdown to file: #{output_file}")
485
+ File.write(output_file, markdown_output)
486
+ end
487
+ output_results['markdown'] = markdown_output
488
+
489
+ when 'html'
490
+ @logger.info("Processing results into html format")
491
+ # For HTML output, we'll use our standalone HTML formatter
492
+ # First, convert the data to JSON format
493
+ json_data = JSON.pretty_generate(results)
494
+
495
+ if @config.output_to_stdout
496
+ # Generate HTML and output to stdout
497
+ html_formatter = HtmlFormatter.new(
498
+ data: results,
499
+ theme: @config.html_theme,
500
+ title: @config.html_title || "Team Activity Report - #{Time.now.strftime('%Y-%m-%d')}",
501
+ show_charts: true
502
+ )
503
+ html_output = html_formatter.generate
504
+
505
+ # Output the HTML
506
+ puts html_output
507
+
508
+ output_results['html'] = html_output
509
+ else
510
+ # Generate the HTML to a file
511
+ output_file = "#{Time.now.strftime('%Y-%m-%d')}.html"
512
+ html_formatter = HtmlFormatter.new(
513
+ data: results,
514
+ output_file: output_file,
515
+ theme: @config.html_theme,
516
+ title: @config.html_title || "Team Activity Report - #{Time.now.strftime('%Y-%m-%d')}",
517
+ show_charts: true
518
+ )
519
+ html_formatter.generate
520
+ @logger.info("HTML output generated to: #{output_file}")
521
+ output_results['html'] = output_file
522
+ end
523
+ else
524
+ @logger.warn("Unknown output format: #{output_format}, skipping")
525
+ end
526
+ end
527
+
528
+ # Return all generated outputs
529
+ output_results
530
+ end
531
+
532
+ # Format time window into a human-readable string
533
+ def format_time_period(time_window_days)
534
+ time_window = time_window_days.to_i rescue 7
535
+
536
+ case time_window
537
+ when 1 then "Last 24 hours"
538
+ when 7 then "Last week"
539
+ when 30, 31 then "Last month"
540
+ else "Last #{time_window} days"
541
+ end
542
+ end
543
+
544
+ # Generate summary statistics and AI-generated summary description
545
+ def generate_summary_statistics(results)
546
+ @logger.info("Generating summary statistics and AI description...")
547
+
548
+ # Calculate aggregate statistics
549
+ total_commits = 0
550
+ total_prs = 0
551
+ total_reviews = 0
552
+ total_lines_changed = 0
553
+ all_weights = {
554
+ "lines_of_code" => [],
555
+ "complexity" => [],
556
+ "technical_depth" => [],
557
+ "scope" => [],
558
+ "pr_reviews" => []
559
+ }
560
+ active_users_count = 0
561
+ active_repos_count = 0
562
+
563
+ # Gather all language stats
564
+ all_languages = {}
565
+
566
+ # Process each organization's data
567
+ results.each do |org_name, org_data|
568
+ next if org_name == :_meta || org_name == "_meta" || !org_data.is_a?(Hash)
569
+
570
+ # Users are direct children of the org hash - we need to find all users
571
+ users_in_org = org_data.keys.reject { |key| key == "_meta" || key == :_meta }
572
+
573
+ # Count users with commit activity
574
+ active_users = users_in_org.select do |username|
575
+ user_data = org_data[username]
576
+ next false unless user_data.is_a?(Hash)
577
+
578
+ # Check for commit activity
579
+ commits = user_data["commits"] || user_data["commits_count"] || user_data["commit_count"] || []
580
+ commits.is_a?(Array) && !commits.empty?
581
+ end
582
+
583
+ active_users_count += active_users.size
584
+
585
+ # Process each user's data
586
+ users_in_org.each do |username|
587
+ user_data = org_data[username]
588
+ next unless user_data.is_a?(Hash)
589
+
590
+ # Track if this user has any activity
591
+ has_activity = false
592
+
593
+ # Aggregate user statistics
594
+ commits = user_data["commits"] || []
595
+
596
+ # Check if this user has commits (could be an array or a count)
597
+ if commits.is_a?(Array) && !commits.empty?
598
+ total_commits += commits.size
599
+ has_activity = true
600
+ end
601
+
602
+ if user_data["commits_count"].to_i > 0
603
+ total_commits += user_data["commits_count"].to_i
604
+ has_activity = true
605
+ end
606
+
607
+ if user_data["commit_count"].to_i > 0
608
+ total_commits += user_data["commit_count"].to_i
609
+ has_activity = true
610
+ end
611
+
612
+ # Count PRs
613
+ if user_data["prs_count"].to_i > 0
614
+ total_prs += user_data["prs_count"].to_i
615
+ has_activity = true
616
+ end
617
+
618
+ if user_data["pr_count"].to_i > 0
619
+ total_prs += user_data["pr_count"].to_i
620
+ has_activity = true
621
+ end
622
+
623
+ # Count reviews
624
+ if user_data["reviews_count"].to_i > 0
625
+ total_reviews += user_data["reviews_count"].to_i
626
+ has_activity = true
627
+ end
628
+
629
+ if user_data["review_count"].to_i > 0
630
+ total_reviews += user_data["review_count"].to_i
631
+ has_activity = true
632
+ end
633
+
634
+ # Count lines changed
635
+ if user_data["lines_changed"].to_i > 0
636
+ total_lines_changed += user_data["lines_changed"].to_i
637
+ has_activity = true
638
+ end
639
+
640
+ # Collect language stats
641
+ if user_data["language_distribution"] && user_data["language_distribution"].is_a?(Hash)
642
+ user_data["language_distribution"].each do |lang, percentage|
643
+ all_languages[lang] ||= 0
644
+ all_languages[lang] += percentage.to_f
645
+ end
646
+ end
647
+
648
+ # Process contribution weights
649
+ if user_data["contribution_weights"] && user_data["contribution_weights"].is_a?(Hash)
650
+ weights = user_data["contribution_weights"]
651
+ all_weights.keys.each do |key|
652
+ weight_value = weights[key].to_i rescue 0
653
+ all_weights[key] << weight_value if weight_value > 0
654
+ end
655
+ end
656
+
657
+ # If this user had any activity, increment active users count
658
+ active_users_count += 1 if has_activity
659
+ end
660
+
661
+ # Count active repositories from _meta.repos
662
+ if org_data["_meta"] && org_data["_meta"]["repos"] && org_data["_meta"]["repos"].is_a?(Hash)
663
+ active_repos = org_data["_meta"]["repos"].values.select do |repo|
664
+ repo["commit_count"].to_i > 0 if repo.is_a?(Hash) && repo["commit_count"]
665
+ end
666
+ active_repos_count += active_repos.size
667
+ end
668
+ end
669
+
670
+ # Calculate average contribution weights
671
+ average_weights = {}
672
+ all_weights.each do |key, values|
673
+ average_weights[key] = values.empty? ? 0 : (values.sum.to_f / values.size).round(1)
674
+ end
675
+
676
+ # Normalize language percentages
677
+ language_distribution = {}
678
+ if all_languages.any?
679
+ total_percentage = all_languages.values.sum
680
+ all_languages.each do |lang, percentage|
681
+ normalized = (percentage.to_f / total_percentage * 100).round(1)
682
+ language_distribution[lang] = normalized if normalized > 0
683
+ end
684
+ end
685
+
686
+ # Create the formatted time period text for the summary
687
+ time_period = format_time_period(@config.time_window_days)
688
+
689
+ # Generate an AI summary if Gemini is configured
690
+ ai_summary = nil
691
+ if @config.gemini_api_key
692
+ ai_prompt = create_summary_prompt(
693
+ results: results,
694
+ period: time_period,
695
+ total_commits: total_commits,
696
+ total_prs: total_prs,
697
+ total_lines_changed: total_lines_changed,
698
+ active_users_count: active_users_count,
699
+ active_repos_count: active_repos_count,
700
+ language_distribution: language_distribution
701
+ )
702
+ ai_summary = generate_ai_summary(ai_prompt)
703
+ end
704
+
705
+ # Build the final summary statistics
706
+ summary_statistics = {
707
+ "total_commits" => total_commits,
708
+ "total_prs" => total_prs,
709
+ "total_reviews" => total_reviews,
710
+ "total_lines_changed" => total_lines_changed,
711
+ "active_users_count" => active_users_count,
712
+ "active_repos_count" => active_repos_count,
713
+ "average_weights" => average_weights,
714
+ "team_language_distribution" => language_distribution,
715
+ "period" => time_period,
716
+ "ai_summary" => ai_summary
717
+ }
718
+
719
+ # Add the summary statistics to the results hash
720
+ results["summary_statistics"] = summary_statistics
721
+
722
+ results
723
+ end
724
+
725
+ # Create a prompt for the AI to generate a summary of team activity
726
+ def create_summary_prompt(results:, period:, total_commits:, total_prs:, total_lines_changed:, active_users_count:, active_repos_count:, language_distribution:)
727
+ prompt = "Create a comprehensive yet concise professional summary of team activity for the following period: #{period}.\n\n"
728
+ prompt += "Key metrics:\n"
729
+ prompt += "- Total commits: #{total_commits}\n"
730
+ prompt += "- Total pull requests: #{total_prs}\n"
731
+ prompt += "- Total lines of code changed: #{total_lines_changed}\n"
732
+ prompt += "- Active developers: #{active_users_count}\n"
733
+ prompt += "- Active repositories: #{active_repos_count}\n"
734
+
735
+ # Add team language distribution
736
+ if language_distribution && !language_distribution.empty?
737
+ top_languages = language_distribution.sort_by { |_, percentage| -percentage }.take(5)
738
+ prompt += "\nTop programming languages used by the team:\n"
739
+ top_languages.each do |lang, percentage|
740
+ prompt += "- #{lang}: #{percentage.round(1)}%\n"
741
+ end
742
+ end
743
+
744
+ # Collect information about individual developers and their work
745
+ if results && results.is_a?(Hash)
746
+ user_summaries = []
747
+ repositories_worked_on = []
748
+
749
+ results.each do |org_name, org_data|
750
+ next if org_name == :_meta || org_name == "_meta" || org_name == "summary_statistics" || !org_data.is_a?(Hash)
751
+
752
+ org_data.each do |username, user_data|
753
+ next if username == "_meta" || username == :_meta || !user_data.is_a?(Hash)
754
+ next unless user_data["total_score"].to_i > 0 || user_data["lines_changed"].to_i > 0
755
+
756
+ # Gather user summary
757
+ if user_data["summary"].is_a?(String) && !user_data["summary"].empty?
758
+ user_summaries << "#{username}: #{user_data["summary"]}"
759
+ end
760
+
761
+ # Gather repositories
762
+ if user_data["projects"].is_a?(Array)
763
+ @logger.info(" Found #{user_data["projects"].length} projects for user #{username}") if @logger
764
+
765
+ user_data["projects"].each do |project|
766
+ begin
767
+ if project.is_a?(Hash)
768
+ repo_name = nil
769
+
770
+ # Try to extract the name safely
771
+ if project.key?("name")
772
+ repo_name = project["name"].to_s
773
+ elsif project.key?(:name)
774
+ repo_name = project[:name].to_s
775
+ end
776
+
777
+ if repo_name && !repo_name.empty?
778
+ repositories_worked_on << repo_name
779
+ end
780
+ else
781
+ @logger.warn(" Skipping non-hash project for user #{username}: #{project.inspect}") if @logger
782
+ end
783
+ rescue => e
784
+ @logger.warn(" Error processing project for user #{username}: #{e.message}") if @logger
785
+ end
786
+ end
787
+ end
788
+ end
789
+ end
790
+
791
+ # Add individual developer summaries
792
+ if user_summaries.any?
793
+ prompt += "\nIndividual developer summaries:\n"
794
+ user_summaries.take(5).each do |summary|
795
+ prompt += "- #{summary}\n"
796
+ end
797
+ end
798
+
799
+ # Add repositories being worked on
800
+ if repositories_worked_on.any?
801
+ unique_repos = repositories_worked_on.uniq
802
+ prompt += "\nRepositories being worked on:\n"
803
+ unique_repos.take(10).each do |repo|
804
+ prompt += "- #{repo}\n"
805
+ end
806
+ end
807
+ end
808
+
809
+ prompt += "\nBased on this information, provide a professional summary of the team's activity "
810
+ prompt += "that highlights the main focus areas, types of work being done, and overall productivity trends. "
811
+ prompt += "Keep it concise (3-4 sentences) and data-focused. Emphasize what the team accomplished collectively."
812
+
813
+ return prompt
814
+ end
815
+
816
+ # Build the final summary statistics
817
+ def build_summary_prompt(stats, results)
818
+ prompt = "Generate a concise 2-3 sentence summary of the following GitHub team activity:\n\n"
819
+ prompt += "Time period: #{stats['period']}\n"
820
+ prompt += "#{stats['active_users_count']} developers made #{stats['total_commits']} commits "
821
+ prompt += "across #{stats['active_repos_count']} repositories.\n"
822
+ prompt += "Total lines of code changed: #{stats['total_lines_changed']}\n"
823
+
824
+ # Add language distribution
825
+ if stats["team_language_distribution"] && !stats["team_language_distribution"].empty?
826
+ top_languages = stats["team_language_distribution"].sort_by { |_, v| -v }.take(3)
827
+ prompt += "\nTop programming languages used by the team:\n"
828
+ top_languages.each do |lang, percentage|
829
+ prompt += "- #{lang}: #{percentage.round(1)}%\n"
830
+ end
831
+ end
832
+
833
+ # Add information about most active repos if available
834
+ active_repos = results["organizations"]&.flat_map do |_, org|
835
+ org["repositories"]&.map do |name, repo|
836
+ { name: name, commits: repo["commit_count"] || 0 }
837
+ end
838
+ end&.compact
839
+
840
+ if active_repos && !active_repos.empty?
841
+ top_repos = active_repos.sort_by { |r| -r[:commits] }.take(3)
842
+ prompt += "Most active repositories: #{top_repos.map { |r| r[:name] }.join(', ')}\n"
843
+ end
844
+
845
+ prompt += "\nBased on this information, provide a professional summary of the team's activity "
846
+ prompt += "that highlights the main focus areas and overall productivity. Keep it brief and data-focused. Emphasize what the team accomplished collectively."
847
+
848
+ return prompt
849
+ end
850
+
851
+ def generate_ai_summary(prompt)
852
+ begin
853
+ @logger.info("Generating AI summary of team activity")
854
+ response = @gemini_service.client.generate_content({
855
+ contents: { role: 'user', parts: { text: prompt } },
856
+ generation_config: { temperature: 0.2 }
857
+ })
858
+
859
+ # Extract text from the response
860
+ if response && response.respond_to?(:text) && response.text
861
+ @logger.info("Successfully generated AI summary")
862
+ return response.text.strip
863
+ elsif response.is_a?(Hash) && response['candidates'] && response['candidates'][0] &&
864
+ response['candidates'][0]['content'] && response['candidates'][0]['content']['parts'] &&
865
+ response['candidates'][0]['content']['parts'][0]
866
+ # Direct hash structure
867
+ @logger.info("Successfully generated AI summary (hash structure)")
868
+ return response['candidates'][0]['content']['parts'][0]['text'].to_s.strip
869
+ else
870
+ @logger.warn("Failed to generate AI summary: Empty response")
871
+ return "Team showed varied activity levels across multiple repositories, demonstrating collaborative development efforts."
872
+ end
873
+ rescue => e
874
+ @logger.error("Error generating AI summary: #{e.message}")
875
+ return "Team showed varied activity levels across multiple repositories, demonstrating collaborative development efforts."
876
+ end
877
+ end
878
+
879
+ private
880
+
881
+ def save_results(analysis_data)
882
+ # Only save results to file if not outputting to stdout
883
+ unless @config.output_to_stdout
884
+ timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
885
+
886
+ # Create results directory if it doesn't exist
887
+ results_dir = File.join(Dir.pwd, 'results')
888
+ Dir.mkdir(results_dir) unless Dir.exist?(results_dir)
889
+
890
+ output_files = []
891
+
892
+ @config.output_formats.each do |format|
893
+ case format
894
+ when 'json'
895
+ output_file = File.join(results_dir, "daily_digest_#{timestamp}.json")
896
+ output_formatter = OutputFormatter.new(config: @config, logger: @logger)
897
+ File.write(output_file, output_formatter.format(analysis_data, 'json'))
898
+ output_files << output_file
899
+
900
+ when 'markdown'
901
+ output_file = File.join(results_dir, "daily_digest_#{timestamp}.md")
902
+ output_formatter = OutputFormatter.new(config: @config, logger: @logger)
903
+ File.write(output_file, output_formatter.format(analysis_data, 'markdown'))
904
+ output_files << output_file
905
+
906
+ when 'html'
907
+ output_file = File.join(results_dir, "daily_digest_#{timestamp}.html")
908
+ html_formatter = HtmlFormatter.new(
909
+ data: analysis_data,
910
+ output_file: output_file,
911
+ theme: @config.html_theme,
912
+ title: @config.html_title || "Team Activity Report - #{Time.now.strftime('%Y-%m-%d')}",
913
+ show_charts: true
914
+ )
915
+ html_formatter.generate
916
+ @logger.info("HTML output generated to: #{output_file}")
917
+ output_files << output_file
918
+ else
919
+ # Default to JSON
920
+ output_file = File.join(results_dir, "daily_digest_#{timestamp}.json")
921
+ output_formatter = OutputFormatter.new(config: @config, logger: @logger)
922
+ File.write(output_file, output_formatter.format(analysis_data, 'json'))
923
+ output_files << output_file
924
+ end
925
+ end
926
+
927
+ @logger.info("Analysis saved to #{output_files.join(', ')}")
928
+ output_files
929
+ end
930
+ end
931
+ end
932
+ end