gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2770 @@
1
+ """Comprehensive JSON export system for GitFlow Analytics.
2
+
3
+ This module provides a comprehensive JSON export system that consolidates all report data
4
+ into a single structured JSON format optimized for web consumption and API integration.
5
+
6
+ WHY: Traditional CSV reports are excellent for analysis tools but lack the structure needed
7
+ for modern web applications and dashboards. This JSON exporter creates a self-contained,
8
+ hierarchical data structure that includes:
9
+ - Time series data for charts
10
+ - Cross-references between entities
11
+ - Anomaly detection and trend analysis
12
+ - Health scores and insights
13
+ - All existing report data in a unified format
14
+
15
+ DESIGN DECISIONS:
16
+ - Self-contained: All data needed for visualization is included
17
+ - Hierarchical: Supports drill-down from executive summary to detailed metrics
18
+ - Web-optimized: Compatible with common charting libraries (Chart.js, D3, etc.)
19
+ - Extensible: Easy to add new metrics and dimensions
20
+ - Consistent: Follows established patterns from existing report generators
21
+ """
22
+
23
+ import json
24
+ import logging
25
+ import statistics
26
+ from collections import defaultdict
27
+ from datetime import datetime, timedelta, timezone
28
+ from pathlib import Path
29
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union
30
+
31
+ import numpy as np
32
+ import pandas as pd
33
+
34
+ from .base import BaseReportGenerator, ReportData, ReportOutput
35
+ from .interfaces import ReportFormat
36
+
37
+ # Get logger for this module
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class ComprehensiveJSONExporter(BaseReportGenerator):
42
+ """Generate comprehensive JSON exports with advanced analytics and insights.
43
+
44
+ This exporter consolidates all GitFlow Analytics data into a single, structured
45
+ JSON format that's optimized for web consumption and includes:
46
+
47
+ - Executive summary with key metrics and trends
48
+ - Project-level data with health scores
49
+ - Developer profiles with contribution patterns
50
+ - Time series data for visualization
51
+ - Anomaly detection and alerting
52
+ - Cross-references between entities
53
+ """
54
+
55
+ def __init__(self, anonymize: bool = False, **kwargs):
56
+ """Initialize the comprehensive JSON exporter.
57
+
58
+ Args:
59
+ anonymize: Whether to anonymize developer information
60
+ **kwargs: Additional arguments passed to base class
61
+ """
62
+ super().__init__(anonymize=anonymize, **kwargs)
63
+ # Note: anonymization map and counter are now in base class
64
+
65
+ # Anomaly detection thresholds
66
+ self.anomaly_thresholds = {
67
+ 'spike_multiplier': 2.0, # 2x normal activity = spike
68
+ 'drop_threshold': 0.3, # 30% of normal activity = drop
69
+ 'volatility_threshold': 1.5, # Standard deviation threshold
70
+ 'trend_threshold': 0.2 # 20% change = significant trend
71
+ }
72
+
73
+ # Health score weights
74
+ self.health_weights = {
75
+ 'activity_consistency': 0.3,
76
+ 'ticket_coverage': 0.25,
77
+ 'collaboration': 0.2,
78
+ 'code_quality': 0.15,
79
+ 'velocity': 0.1
80
+ }
81
+
82
+ def export_comprehensive_data(
83
+ self,
84
+ commits: List[Dict[str, Any]],
85
+ prs: List[Dict[str, Any]],
86
+ developer_stats: List[Dict[str, Any]],
87
+ project_metrics: Dict[str, Any],
88
+ dora_metrics: Dict[str, Any],
89
+ output_path: Path,
90
+ weeks: int = 12,
91
+ pm_data: Optional[Dict[str, Any]] = None,
92
+ qualitative_data: Optional[List[Dict[str, Any]]] = None,
93
+ enhanced_qualitative_analysis: Optional[Dict[str, Any]] = None
94
+ ) -> Path:
95
+ """Export comprehensive analytics data to JSON format.
96
+
97
+ Args:
98
+ commits: List of commit data
99
+ prs: List of pull request data
100
+ developer_stats: Developer statistics
101
+ project_metrics: Project-level metrics
102
+ dora_metrics: DORA metrics data
103
+ output_path: Path to write JSON file
104
+ weeks: Number of weeks analyzed
105
+ pm_data: PM platform integration data
106
+ qualitative_data: Qualitative analysis results
107
+ enhanced_qualitative_analysis: Enhanced multi-dimensional qualitative analysis
108
+
109
+ Returns:
110
+ Path to the generated JSON file
111
+ """
112
+ logger.info(f"Starting comprehensive JSON export with {len(commits)} commits")
113
+
114
+ # Calculate analysis period
115
+ end_date = datetime.now(timezone.utc)
116
+ start_date = end_date - timedelta(weeks=weeks)
117
+
118
+ # Build comprehensive data structure
119
+ export_data = {
120
+ "metadata": self._build_metadata(commits, prs, developer_stats, start_date, end_date),
121
+ "executive_summary": self._build_executive_summary(commits, prs, developer_stats, project_metrics, dora_metrics),
122
+ "projects": self._build_project_data(commits, prs, developer_stats, project_metrics),
123
+ "developers": self._build_developer_profiles(commits, developer_stats),
124
+ "workflow_analysis": self._build_workflow_analysis(commits, prs, project_metrics, pm_data),
125
+ "time_series": self._build_time_series_data(commits, prs, weeks),
126
+ "insights": self._build_insights_data(commits, developer_stats, qualitative_data),
127
+ "untracked_analysis": self._build_untracked_analysis(commits, project_metrics),
128
+ "raw_data": self._build_raw_data_summary(commits, prs, developer_stats, dora_metrics)
129
+ }
130
+
131
+ # Add enhanced qualitative analysis if available
132
+ if enhanced_qualitative_analysis:
133
+ export_data["enhanced_qualitative_analysis"] = enhanced_qualitative_analysis
134
+
135
+ # Add PM platform data if available
136
+ if pm_data:
137
+ export_data["pm_integration"] = self._build_pm_integration_data(pm_data)
138
+
139
+ # Serialize and write JSON
140
+ serialized_data = self._serialize_for_json(export_data)
141
+
142
+ with open(output_path, 'w') as f:
143
+ json.dump(serialized_data, f, indent=2, ensure_ascii=False)
144
+
145
+ logger.info(f"Comprehensive JSON export written to {output_path}")
146
+ return output_path
147
+
148
+ def create_enhanced_qualitative_analysis(
149
+ self,
150
+ commits: List[Dict[str, Any]],
151
+ qualitative_data: Optional[List[Any]] = None,
152
+ developer_stats: Optional[List[Dict[str, Any]]] = None,
153
+ project_metrics: Optional[Dict[str, Any]] = None,
154
+ pm_data: Optional[Dict[str, Any]] = None,
155
+ weeks_analyzed: int = 12
156
+ ) -> Optional[Dict[str, Any]]:
157
+ """Create enhanced qualitative analysis using the EnhancedQualitativeAnalyzer.
158
+
159
+ This method integrates with the enhanced analyzer to generate comprehensive
160
+ qualitative insights across executive, project, developer, and workflow dimensions.
161
+
162
+ Args:
163
+ commits: List of commit data
164
+ qualitative_data: Optional qualitative commit analysis results
165
+ developer_stats: Optional developer statistics
166
+ project_metrics: Optional project-level metrics
167
+ pm_data: Optional PM platform integration data
168
+ weeks_analyzed: Number of weeks in analysis period
169
+
170
+ Returns:
171
+ Enhanced qualitative analysis results or None if analyzer unavailable
172
+ """
173
+ try:
174
+ # Import here to avoid circular dependencies
175
+ from ..qualitative.enhanced_analyzer import EnhancedQualitativeAnalyzer
176
+
177
+ # Initialize analyzer
178
+ analyzer = EnhancedQualitativeAnalyzer()
179
+
180
+ # Perform comprehensive analysis
181
+ enhanced_analysis = analyzer.analyze_comprehensive(
182
+ commits=commits,
183
+ qualitative_data=qualitative_data,
184
+ developer_stats=developer_stats,
185
+ project_metrics=project_metrics,
186
+ pm_data=pm_data,
187
+ weeks_analyzed=weeks_analyzed
188
+ )
189
+
190
+ logger.info("Enhanced qualitative analysis completed successfully")
191
+ return enhanced_analysis
192
+
193
+ except ImportError as e:
194
+ logger.warning(f"Enhanced qualitative analyzer not available: {e}")
195
+ return None
196
+ except Exception as e:
197
+ logger.error(f"Enhanced qualitative analysis failed: {e}")
198
+ return None
199
+
200
+ def _build_metadata(
201
+ self,
202
+ commits: List[Dict[str, Any]],
203
+ prs: List[Dict[str, Any]],
204
+ developer_stats: List[Dict[str, Any]],
205
+ start_date: datetime,
206
+ end_date: datetime
207
+ ) -> Dict[str, Any]:
208
+ """Build metadata section with generation info and data summary."""
209
+
210
+ # Get unique repositories and projects
211
+ repositories = set()
212
+ projects = set()
213
+
214
+ for commit in commits:
215
+ if commit.get('repository'):
216
+ repositories.add(commit['repository'])
217
+ if commit.get('project_key'):
218
+ projects.add(commit['project_key'])
219
+
220
+ return {
221
+ "generated_at": datetime.now(timezone.utc).isoformat(),
222
+ "format_version": "2.0.0",
223
+ "generator": "GitFlow Analytics Comprehensive JSON Exporter",
224
+ "analysis_period": {
225
+ "start_date": start_date.isoformat(),
226
+ "end_date": end_date.isoformat(),
227
+ "weeks_analyzed": (end_date - start_date).days // 7,
228
+ "total_days": (end_date - start_date).days
229
+ },
230
+ "data_summary": {
231
+ "total_commits": len(commits),
232
+ "total_prs": len(prs),
233
+ "total_developers": len(developer_stats),
234
+ "repositories_analyzed": len(repositories),
235
+ "projects_identified": len(projects),
236
+ "repositories": sorted(list(repositories)),
237
+ "projects": sorted(list(projects))
238
+ },
239
+ "export_settings": {
240
+ "anonymized": self.anonymize,
241
+ "timezone": "UTC"
242
+ }
243
+ }
244
+
245
+ def _build_executive_summary(
246
+ self,
247
+ commits: List[Dict[str, Any]],
248
+ prs: List[Dict[str, Any]],
249
+ developer_stats: List[Dict[str, Any]],
250
+ project_metrics: Dict[str, Any],
251
+ dora_metrics: Dict[str, Any]
252
+ ) -> Dict[str, Any]:
253
+ """Build executive summary with key metrics, trends, and insights."""
254
+
255
+ # Core metrics
256
+ total_commits = len(commits)
257
+ total_prs = len(prs)
258
+ total_developers = len(developer_stats)
259
+
260
+ # Calculate lines changed
261
+ total_lines = sum(
262
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
263
+ commit.get('filtered_deletions', commit.get('deletions', 0))
264
+ for commit in commits
265
+ )
266
+
267
+ # Story points
268
+ total_story_points = sum(
269
+ commit.get('story_points', 0) or 0 for commit in commits
270
+ )
271
+
272
+ # Ticket coverage
273
+ ticket_analysis = project_metrics.get('ticket_analysis', {})
274
+ ticket_coverage = ticket_analysis.get('commit_coverage_pct', 0)
275
+
276
+ # Calculate trends (compare first half vs second half)
277
+ trends = self._calculate_executive_trends(commits, prs)
278
+
279
+ # Detect anomalies
280
+ anomalies = self._detect_executive_anomalies(commits, developer_stats)
281
+
282
+ # Identify wins and concerns
283
+ wins, concerns = self._identify_wins_and_concerns(
284
+ commits, developer_stats, project_metrics, dora_metrics
285
+ )
286
+
287
+ return {
288
+ "key_metrics": {
289
+ "commits": {
290
+ "total": total_commits,
291
+ "trend_percent": trends.get('commits_trend', 0),
292
+ "trend_direction": self._get_trend_direction(trends.get('commits_trend', 0))
293
+ },
294
+ "lines_changed": {
295
+ "total": total_lines,
296
+ "trend_percent": trends.get('lines_trend', 0),
297
+ "trend_direction": self._get_trend_direction(trends.get('lines_trend', 0))
298
+ },
299
+ "story_points": {
300
+ "total": total_story_points,
301
+ "trend_percent": trends.get('story_points_trend', 0),
302
+ "trend_direction": self._get_trend_direction(trends.get('story_points_trend', 0))
303
+ },
304
+ "developers": {
305
+ "total": total_developers,
306
+ "active_percentage": self._calculate_active_developer_percentage(developer_stats)
307
+ },
308
+ "pull_requests": {
309
+ "total": total_prs,
310
+ "trend_percent": trends.get('prs_trend', 0),
311
+ "trend_direction": self._get_trend_direction(trends.get('prs_trend', 0))
312
+ },
313
+ "ticket_coverage": {
314
+ "percentage": round(ticket_coverage, 1),
315
+ "quality_rating": self._get_coverage_quality_rating(ticket_coverage)
316
+ }
317
+ },
318
+ "performance_indicators": {
319
+ "velocity": {
320
+ "commits_per_week": round(total_commits / max((len(set(self._get_week_start(c['timestamp']) for c in commits))), 1), 1),
321
+ "story_points_per_week": round(total_story_points / max((len(set(self._get_week_start(c['timestamp']) for c in commits))), 1), 1)
322
+ },
323
+ "quality": {
324
+ "avg_commit_size": round(total_lines / max(total_commits, 1), 1),
325
+ "ticket_coverage_pct": round(ticket_coverage, 1)
326
+ },
327
+ "collaboration": {
328
+ "developers_per_project": self._calculate_avg_developers_per_project(commits),
329
+ "cross_project_contributors": self._count_cross_project_contributors(commits, developer_stats)
330
+ }
331
+ },
332
+ "trends": trends,
333
+ "anomalies": anomalies,
334
+ "wins": wins,
335
+ "concerns": concerns,
336
+ "health_score": self._calculate_overall_health_score(commits, developer_stats, project_metrics, dora_metrics)
337
+ }
338
+
339
+ def _build_project_data(
340
+ self,
341
+ commits: List[Dict[str, Any]],
342
+ prs: List[Dict[str, Any]],
343
+ developer_stats: List[Dict[str, Any]],
344
+ project_metrics: Dict[str, Any]
345
+ ) -> Dict[str, Any]:
346
+ """Build project-level data with health scores and contributor details."""
347
+
348
+ # Group data by project
349
+ project_data = defaultdict(lambda: {
350
+ 'commits': [],
351
+ 'prs': [],
352
+ 'contributors': set(),
353
+ 'lines_changed': 0,
354
+ 'story_points': 0,
355
+ 'files_changed': set()
356
+ })
357
+
358
+ # Process commits by project
359
+ for commit in commits:
360
+ project_key = commit.get('project_key', 'UNKNOWN')
361
+ project_data[project_key]['commits'].append(commit)
362
+ project_data[project_key]['contributors'].add(commit.get('canonical_id', commit.get('author_email')))
363
+
364
+ lines = (
365
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
366
+ commit.get('filtered_deletions', commit.get('deletions', 0))
367
+ )
368
+ project_data[project_key]['lines_changed'] += lines
369
+ project_data[project_key]['story_points'] += commit.get('story_points', 0) or 0
370
+
371
+ # Track files (simplified - just count)
372
+ files_changed = commit.get('filtered_files_changed', commit.get('files_changed', 0))
373
+ if files_changed:
374
+ # Add placeholder file references
375
+ for i in range(files_changed):
376
+ project_data[project_key]['files_changed'].add(f"file_{i}")
377
+
378
+ # Process PRs by project (if available)
379
+ for pr in prs:
380
+ # Try to determine project from PR data
381
+ project_key = pr.get('project_key', 'UNKNOWN')
382
+ project_data[project_key]['prs'].append(pr)
383
+
384
+ # Build structured project data
385
+ projects = {}
386
+
387
+ for project_key, data in project_data.items():
388
+ commits_list = data['commits']
389
+ contributors = data['contributors']
390
+
391
+ # Calculate project health score
392
+ health_score = self._calculate_project_health_score(commits_list, contributors)
393
+
394
+ # Get contributor details
395
+ contributor_details = self._get_project_contributor_details(commits_list, developer_stats)
396
+
397
+ # Calculate project trends
398
+ project_trends = self._calculate_project_trends(commits_list)
399
+
400
+ # Detect project anomalies
401
+ project_anomalies = self._detect_project_anomalies(commits_list)
402
+
403
+ projects[project_key] = {
404
+ "summary": {
405
+ "total_commits": len(commits_list),
406
+ "total_contributors": len(contributors),
407
+ "lines_changed": data['lines_changed'],
408
+ "story_points": data['story_points'],
409
+ "files_touched": len(data['files_changed']),
410
+ "pull_requests": len(data['prs'])
411
+ },
412
+ "health_score": health_score,
413
+ "contributors": contributor_details,
414
+ "activity_patterns": {
415
+ "commits_per_week": self._calculate_weekly_commits(commits_list),
416
+ "peak_activity_day": self._find_peak_activity_day(commits_list),
417
+ "commit_size_distribution": self._analyze_commit_size_distribution(commits_list)
418
+ },
419
+ "trends": project_trends,
420
+ "anomalies": project_anomalies,
421
+ "focus_metrics": {
422
+ "primary_contributors": self._identify_primary_contributors(commits_list, contributor_details),
423
+ "contribution_distribution": self._calculate_contribution_distribution(commits_list)
424
+ }
425
+ }
426
+
427
+ return projects
428
+
429
+ def _build_developer_profiles(
430
+ self,
431
+ commits: List[Dict[str, Any]],
432
+ developer_stats: List[Dict[str, Any]]
433
+ ) -> Dict[str, Any]:
434
+ """Build comprehensive developer profiles with contribution patterns."""
435
+
436
+ profiles = {}
437
+
438
+ for dev in developer_stats:
439
+ dev_id = dev['canonical_id']
440
+ dev_name = self._anonymize_value(dev['primary_name'], 'name')
441
+
442
+ # Get developer's commits
443
+ dev_commits = [c for c in commits if c.get('canonical_id') == dev_id]
444
+
445
+ # Calculate various metrics
446
+ projects_worked = self._get_developer_projects(dev_commits)
447
+ contribution_patterns = self._analyze_developer_contribution_patterns(dev_commits)
448
+ collaboration_metrics = self._calculate_developer_collaboration_metrics(dev_commits, developer_stats)
449
+
450
+ # Calculate developer health score
451
+ health_score = self._calculate_developer_health_score(dev_commits, dev)
452
+
453
+ # Identify achievements and areas for improvement
454
+ achievements = self._identify_developer_achievements(dev_commits, dev)
455
+ improvement_areas = self._identify_improvement_areas(dev_commits, dev)
456
+
457
+ profiles[dev_id] = {
458
+ "identity": {
459
+ "name": dev_name,
460
+ "canonical_id": dev_id,
461
+ "primary_email": self._anonymize_value(dev['primary_email'], 'email'),
462
+ "github_username": self._anonymize_value(dev.get('github_username', ''), 'username') if dev.get('github_username') else None,
463
+ "aliases_count": dev.get('alias_count', 1)
464
+ },
465
+ "summary": {
466
+ "total_commits": dev['total_commits'],
467
+ "total_story_points": dev['total_story_points'],
468
+ "projects_contributed": len(projects_worked),
469
+ "first_seen": dev.get('first_seen').isoformat() if dev.get('first_seen') else None,
470
+ "last_seen": dev.get('last_seen').isoformat() if dev.get('last_seen') else None,
471
+ "days_active": (dev.get('last_seen') - dev.get('first_seen')).days if dev.get('first_seen') and dev.get('last_seen') else 0
472
+ },
473
+ "health_score": health_score,
474
+ "projects": projects_worked,
475
+ "contribution_patterns": contribution_patterns,
476
+ "collaboration": collaboration_metrics,
477
+ "achievements": achievements,
478
+ "improvement_areas": improvement_areas,
479
+ "activity_timeline": self._build_developer_activity_timeline(dev_commits)
480
+ }
481
+
482
+ return profiles
483
+
484
+ def _build_workflow_analysis(
485
+ self,
486
+ commits: List[Dict[str, Any]],
487
+ prs: List[Dict[str, Any]],
488
+ project_metrics: Dict[str, Any],
489
+ pm_data: Optional[Dict[str, Any]]
490
+ ) -> Dict[str, Any]:
491
+ """Build workflow analysis including Git-PM correlation."""
492
+
493
+ # Analyze branching patterns
494
+ branching_analysis = self._analyze_branching_patterns(commits)
495
+
496
+ # Analyze commit patterns
497
+ commit_patterns = self._analyze_commit_timing_patterns(commits)
498
+
499
+ # Analyze PR workflow if available
500
+ pr_workflow = self._analyze_pr_workflow(prs) if prs else {}
501
+
502
+ # Git-PM correlation analysis
503
+ git_pm_correlation = {}
504
+ if pm_data:
505
+ git_pm_correlation = self._analyze_git_pm_correlation(commits, pm_data)
506
+
507
+ return {
508
+ "branching_strategy": branching_analysis,
509
+ "commit_patterns": commit_patterns,
510
+ "pr_workflow": pr_workflow,
511
+ "git_pm_correlation": git_pm_correlation,
512
+ "process_health": {
513
+ "ticket_linking_rate": project_metrics.get('ticket_analysis', {}).get('commit_coverage_pct', 0),
514
+ "merge_commit_rate": self._calculate_merge_commit_rate(commits),
515
+ "commit_message_quality": self._analyze_commit_message_quality(commits)
516
+ }
517
+ }
518
+
519
+ def _build_time_series_data(
520
+ self,
521
+ commits: List[Dict[str, Any]],
522
+ prs: List[Dict[str, Any]],
523
+ weeks: int
524
+ ) -> Dict[str, Any]:
525
+ """Build time series data optimized for charting libraries."""
526
+
527
+ # Calculate date range
528
+ end_date = datetime.now(timezone.utc)
529
+ start_date = end_date - timedelta(weeks=weeks)
530
+
531
+ # Generate weekly data points
532
+ weekly_data = self._generate_weekly_time_series(commits, prs, start_date, end_date)
533
+ daily_data = self._generate_daily_time_series(commits, prs, start_date, end_date)
534
+
535
+ return {
536
+ "weekly": {
537
+ "labels": [d["date"] for d in weekly_data],
538
+ "datasets": {
539
+ "commits": {
540
+ "label": "Commits",
541
+ "data": [d["commits"] for d in weekly_data],
542
+ "backgroundColor": "rgba(54, 162, 235, 0.2)",
543
+ "borderColor": "rgba(54, 162, 235, 1)"
544
+ },
545
+ "lines_changed": {
546
+ "label": "Lines Changed",
547
+ "data": [d["lines_changed"] for d in weekly_data],
548
+ "backgroundColor": "rgba(255, 99, 132, 0.2)",
549
+ "borderColor": "rgba(255, 99, 132, 1)"
550
+ },
551
+ "story_points": {
552
+ "label": "Story Points",
553
+ "data": [d["story_points"] for d in weekly_data],
554
+ "backgroundColor": "rgba(75, 192, 192, 0.2)",
555
+ "borderColor": "rgba(75, 192, 192, 1)"
556
+ },
557
+ "active_developers": {
558
+ "label": "Active Developers",
559
+ "data": [d["active_developers"] for d in weekly_data],
560
+ "backgroundColor": "rgba(153, 102, 255, 0.2)",
561
+ "borderColor": "rgba(153, 102, 255, 1)"
562
+ }
563
+ }
564
+ },
565
+ "daily": {
566
+ "labels": [d["date"] for d in daily_data],
567
+ "datasets": {
568
+ "commits": {
569
+ "label": "Daily Commits",
570
+ "data": [d["commits"] for d in daily_data],
571
+ "backgroundColor": "rgba(54, 162, 235, 0.1)",
572
+ "borderColor": "rgba(54, 162, 235, 1)"
573
+ }
574
+ }
575
+ }
576
+ }
577
+
578
+ def _build_insights_data(
579
+ self,
580
+ commits: List[Dict[str, Any]],
581
+ developer_stats: List[Dict[str, Any]],
582
+ qualitative_data: Optional[List[Dict[str, Any]]]
583
+ ) -> Dict[str, Any]:
584
+ """Build insights data with qualitative and quantitative analysis."""
585
+
586
+ # Generate quantitative insights
587
+ quantitative_insights = self._generate_quantitative_insights(commits, developer_stats)
588
+
589
+ # Process qualitative insights if available
590
+ qualitative_insights = []
591
+ if qualitative_data:
592
+ qualitative_insights = self._process_qualitative_insights(qualitative_data)
593
+
594
+ # Combine and prioritize insights
595
+ all_insights = quantitative_insights + qualitative_insights
596
+ prioritized_insights = self._prioritize_insights(all_insights)
597
+
598
+ return {
599
+ "quantitative": quantitative_insights,
600
+ "qualitative": qualitative_insights,
601
+ "prioritized": prioritized_insights,
602
+ "insight_categories": self._categorize_insights(all_insights),
603
+ "actionable_recommendations": self._generate_actionable_recommendations(all_insights)
604
+ }
605
+
606
+ def _build_raw_data_summary(
607
+ self,
608
+ commits: List[Dict[str, Any]],
609
+ prs: List[Dict[str, Any]],
610
+ developer_stats: List[Dict[str, Any]],
611
+ dora_metrics: Dict[str, Any]
612
+ ) -> Dict[str, Any]:
613
+ """Build summary of raw data for reference and validation."""
614
+
615
+ return {
616
+ "commits_sample": commits[:5] if commits else [], # First 5 commits as sample
617
+ "prs_sample": prs[:3] if prs else [], # First 3 PRs as sample
618
+ "developer_stats_schema": {
619
+ "fields": list(developer_stats[0].keys()) if developer_stats else [],
620
+ "sample_record": developer_stats[0] if developer_stats else {}
621
+ },
622
+ "dora_metrics": dora_metrics,
623
+ "data_quality": {
624
+ "commits_with_timestamps": sum(1 for c in commits if c.get('timestamp')),
625
+ "commits_with_projects": sum(1 for c in commits if c.get('project_key')),
626
+ "commits_with_tickets": sum(1 for c in commits if c.get('ticket_references')),
627
+ "developers_with_github": sum(1 for d in developer_stats if d.get('github_username'))
628
+ }
629
+ }
630
+
631
+ def _build_pm_integration_data(self, pm_data: Dict[str, Any]) -> Dict[str, Any]:
632
+ """Build PM platform integration data summary."""
633
+
634
+ metrics = pm_data.get('metrics', {})
635
+ correlations = pm_data.get('correlations', [])
636
+
637
+ return {
638
+ "platforms": list(metrics.get('platform_coverage', {}).keys()),
639
+ "total_issues": metrics.get('total_pm_issues', 0),
640
+ "story_point_coverage": metrics.get('story_point_analysis', {}).get('story_point_coverage_pct', 0),
641
+ "correlations_count": len(correlations),
642
+ "correlation_quality": metrics.get('correlation_quality', {}),
643
+ "issue_types": metrics.get('issue_type_distribution', {}),
644
+ "platform_summary": {
645
+ platform: {
646
+ "total_issues": data.get('total_issues', 0),
647
+ "linked_issues": data.get('linked_issues', 0),
648
+ "coverage_percentage": data.get('coverage_percentage', 0)
649
+ }
650
+ for platform, data in metrics.get('platform_coverage', {}).items()
651
+ }
652
+ }
653
+
654
+ # Helper methods for calculations and analysis
655
+
656
+ def _calculate_executive_trends(
657
+ self,
658
+ commits: List[Dict[str, Any]],
659
+ prs: List[Dict[str, Any]]
660
+ ) -> Dict[str, float]:
661
+ """Calculate trends by comparing first half vs second half of data."""
662
+
663
+ if not commits:
664
+ return {}
665
+
666
+ # Sort commits by timestamp
667
+ sorted_commits = sorted(commits, key=lambda x: x['timestamp'])
668
+ midpoint = len(sorted_commits) // 2
669
+
670
+ first_half = sorted_commits[:midpoint]
671
+ second_half = sorted_commits[midpoint:]
672
+
673
+ # Calculate metrics for each half
674
+ def get_half_metrics(commit_list):
675
+ return {
676
+ 'commits': len(commit_list),
677
+ 'lines': sum(
678
+ c.get('filtered_insertions', c.get('insertions', 0)) +
679
+ c.get('filtered_deletions', c.get('deletions', 0))
680
+ for c in commit_list
681
+ ),
682
+ 'story_points': sum(c.get('story_points', 0) or 0 for c in commit_list)
683
+ }
684
+
685
+ first_metrics = get_half_metrics(first_half)
686
+ second_metrics = get_half_metrics(second_half)
687
+
688
+ # Calculate percentage changes
689
+ trends = {}
690
+ for metric in ['commits', 'lines', 'story_points']:
691
+ if first_metrics[metric] > 0:
692
+ change = ((second_metrics[metric] - first_metrics[metric]) / first_metrics[metric]) * 100
693
+ trends[f'{metric}_trend'] = round(change, 1)
694
+ else:
695
+ trends[f'{metric}_trend'] = 0
696
+
697
+ # PR trends if available
698
+ if prs:
699
+ sorted_prs = sorted(prs, key=lambda x: x.get('merged_at', x.get('created_at', datetime.now())))
700
+ pr_midpoint = len(sorted_prs) // 2
701
+
702
+ first_pr_count = pr_midpoint
703
+ second_pr_count = len(sorted_prs) - pr_midpoint
704
+
705
+ if first_pr_count > 0:
706
+ pr_change = ((second_pr_count - first_pr_count) / first_pr_count) * 100
707
+ trends['prs_trend'] = round(pr_change, 1)
708
+ else:
709
+ trends['prs_trend'] = 0
710
+
711
+ return trends
712
+
713
+ def _detect_executive_anomalies(
714
+ self,
715
+ commits: List[Dict[str, Any]],
716
+ developer_stats: List[Dict[str, Any]]
717
+ ) -> List[Dict[str, Any]]:
718
+ """Detect anomalies in executive-level data."""
719
+
720
+ anomalies = []
721
+
722
+ # Check for commit spikes/drops by week
723
+ weekly_commits = self._get_weekly_commit_counts(commits)
724
+ if len(weekly_commits) >= 3:
725
+ mean_commits = statistics.mean(weekly_commits)
726
+ std_commits = statistics.pstdev(weekly_commits) if len(weekly_commits) > 1 else 0
727
+
728
+ for i, count in enumerate(weekly_commits):
729
+ if std_commits > 0:
730
+ if count > mean_commits + (std_commits * self.anomaly_thresholds['spike_multiplier']):
731
+ anomalies.append({
732
+ "type": "spike",
733
+ "metric": "weekly_commits",
734
+ "value": count,
735
+ "expected": round(mean_commits, 1),
736
+ "severity": "high" if count > mean_commits + (std_commits * 3) else "medium",
737
+ "week_index": i
738
+ })
739
+ elif count < mean_commits * self.anomaly_thresholds['drop_threshold']:
740
+ anomalies.append({
741
+ "type": "drop",
742
+ "metric": "weekly_commits",
743
+ "value": count,
744
+ "expected": round(mean_commits, 1),
745
+ "severity": "high" if count < mean_commits * 0.1 else "medium",
746
+ "week_index": i
747
+ })
748
+
749
+ # Check for contributor anomalies
750
+ commit_counts = [dev['total_commits'] for dev in developer_stats]
751
+ if len(commit_counts) > 1:
752
+ gini_coefficient = self._calculate_gini_coefficient(commit_counts)
753
+ if gini_coefficient > 0.8:
754
+ anomalies.append({
755
+ "type": "concentration",
756
+ "metric": "contribution_distribution",
757
+ "value": round(gini_coefficient, 2),
758
+ "threshold": 0.8,
759
+ "severity": "medium",
760
+ "description": "Highly concentrated contribution pattern"
761
+ })
762
+
763
+ return anomalies
764
+
765
+ def _identify_wins_and_concerns(
766
+ self,
767
+ commits: List[Dict[str, Any]],
768
+ developer_stats: List[Dict[str, Any]],
769
+ project_metrics: Dict[str, Any],
770
+ dora_metrics: Dict[str, Any]
771
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
772
+ """Identify key wins and concerns from the data."""
773
+
774
+ wins = []
775
+ concerns = []
776
+
777
+ # Ticket coverage analysis
778
+ ticket_coverage = project_metrics.get('ticket_analysis', {}).get('commit_coverage_pct', 0)
779
+ if ticket_coverage > 80:
780
+ wins.append({
781
+ "category": "process",
782
+ "title": "Excellent Ticket Coverage",
783
+ "description": f"{ticket_coverage:.1f}% of commits linked to tickets",
784
+ "impact": "high"
785
+ })
786
+ elif ticket_coverage < 30:
787
+ concerns.append({
788
+ "category": "process",
789
+ "title": "Low Ticket Coverage",
790
+ "description": f"Only {ticket_coverage:.1f}% of commits linked to tickets",
791
+ "impact": "high",
792
+ "recommendation": "Improve ticket referencing in commit messages"
793
+ })
794
+
795
+ # Team activity analysis
796
+ if len(developer_stats) > 1:
797
+ commit_counts = [dev['total_commits'] for dev in developer_stats]
798
+ avg_commits = sum(commit_counts) / len(commit_counts)
799
+
800
+ if min(commit_counts) > avg_commits * 0.5:
801
+ wins.append({
802
+ "category": "team",
803
+ "title": "Balanced Team Contributions",
804
+ "description": "All team members are actively contributing",
805
+ "impact": "medium"
806
+ })
807
+ elif max(commit_counts) > avg_commits * 3:
808
+ concerns.append({
809
+ "category": "team",
810
+ "title": "Unbalanced Contributions",
811
+ "description": "Work is heavily concentrated among few developers",
812
+ "impact": "medium",
813
+ "recommendation": "Consider distributing work more evenly"
814
+ })
815
+
816
+ # Code quality indicators
817
+ total_lines = sum(
818
+ c.get('filtered_insertions', c.get('insertions', 0)) +
819
+ c.get('filtered_deletions', c.get('deletions', 0))
820
+ for c in commits
821
+ )
822
+ avg_commit_size = total_lines / max(len(commits), 1)
823
+
824
+ if 20 <= avg_commit_size <= 200:
825
+ wins.append({
826
+ "category": "quality",
827
+ "title": "Optimal Commit Size",
828
+ "description": f"Average commit size of {avg_commit_size:.0f} lines indicates good change management",
829
+ "impact": "low"
830
+ })
831
+ elif avg_commit_size > 500:
832
+ concerns.append({
833
+ "category": "quality",
834
+ "title": "Large Commit Sizes",
835
+ "description": f"Average commit size of {avg_commit_size:.0f} lines may indicate batched changes",
836
+ "impact": "low",
837
+ "recommendation": "Consider breaking down changes into smaller commits"
838
+ })
839
+
840
+ return wins, concerns
841
+
842
+ def _calculate_overall_health_score(
843
+ self,
844
+ commits: List[Dict[str, Any]],
845
+ developer_stats: List[Dict[str, Any]],
846
+ project_metrics: Dict[str, Any],
847
+ dora_metrics: Dict[str, Any]
848
+ ) -> Dict[str, Any]:
849
+ """Calculate overall project health score."""
850
+
851
+ scores = {}
852
+
853
+ # Activity consistency score (0-100)
854
+ weekly_commits = self._get_weekly_commit_counts(commits)
855
+ if weekly_commits:
856
+ consistency = max(0, 100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 100))
857
+ scores['activity_consistency'] = min(100, consistency)
858
+ else:
859
+ scores['activity_consistency'] = 0
860
+
861
+ # Ticket coverage score
862
+ ticket_coverage = project_metrics.get('ticket_analysis', {}).get('commit_coverage_pct', 0)
863
+ scores['ticket_coverage'] = min(100, ticket_coverage)
864
+
865
+ # Collaboration score (based on multi-project work and team balance)
866
+ if len(developer_stats) > 1:
867
+ commit_counts = [dev['total_commits'] for dev in developer_stats]
868
+ gini = self._calculate_gini_coefficient(commit_counts)
869
+ collaboration_score = max(0, 100 - (gini * 100))
870
+ scores['collaboration'] = collaboration_score
871
+ else:
872
+ scores['collaboration'] = 50 # Neutral for single developer
873
+
874
+ # Code quality score (based on commit size and patterns)
875
+ total_lines = sum(
876
+ c.get('filtered_insertions', c.get('insertions', 0)) +
877
+ c.get('filtered_deletions', c.get('deletions', 0))
878
+ for c in commits
879
+ )
880
+ avg_commit_size = total_lines / max(len(commits), 1)
881
+
882
+ # Optimal range is 20-200 lines per commit
883
+ if 20 <= avg_commit_size <= 200:
884
+ quality_score = 100
885
+ elif avg_commit_size < 20:
886
+ quality_score = max(0, (avg_commit_size / 20) * 100)
887
+ else:
888
+ quality_score = max(0, 100 - ((avg_commit_size - 200) / 500 * 100))
889
+
890
+ scores['code_quality'] = min(100, quality_score)
891
+
892
+ # Velocity score (commits per week vs. baseline)
893
+ weeks_with_activity = len([w for w in weekly_commits if w > 0])
894
+ velocity_score = min(100, (weeks_with_activity / max(len(weekly_commits), 1)) * 100)
895
+ scores['velocity'] = velocity_score
896
+
897
+ # Calculate weighted overall score
898
+ overall_score = sum(
899
+ scores.get(metric, 0) * weight
900
+ for metric, weight in self.health_weights.items()
901
+ )
902
+
903
+ return {
904
+ "overall": round(overall_score, 1),
905
+ "components": {k: round(v, 1) for k, v in scores.items()},
906
+ "weights": self.health_weights,
907
+ "rating": self._get_health_rating(overall_score)
908
+ }
909
+
910
+ def _get_health_rating(self, score: float) -> str:
911
+ """Get health rating based on score."""
912
+ if score >= 80:
913
+ return "excellent"
914
+ elif score >= 60:
915
+ return "good"
916
+ elif score >= 40:
917
+ return "fair"
918
+ else:
919
+ return "needs_improvement"
920
+
921
+ def _get_trend_direction(self, trend_percent: float) -> str:
922
+ """Get trend direction from percentage change."""
923
+ if abs(trend_percent) < self.anomaly_thresholds['trend_threshold'] * 100:
924
+ return "stable"
925
+ elif trend_percent > 0:
926
+ return "increasing"
927
+ else:
928
+ return "decreasing"
929
+
930
+ def _get_coverage_quality_rating(self, coverage: float) -> str:
931
+ """Get quality rating for ticket coverage."""
932
+ if coverage >= 80:
933
+ return "excellent"
934
+ elif coverage >= 60:
935
+ return "good"
936
+ elif coverage >= 40:
937
+ return "fair"
938
+ else:
939
+ return "poor"
940
+
941
+ def _calculate_active_developer_percentage(self, developer_stats: List[Dict[str, Any]]) -> float:
942
+ """Calculate percentage of developers with meaningful activity."""
943
+ if not developer_stats:
944
+ return 0
945
+
946
+ total_commits = sum(dev['total_commits'] for dev in developer_stats)
947
+ avg_commits = total_commits / len(developer_stats)
948
+ threshold = max(1, avg_commits * 0.1) # 10% of average
949
+
950
+ active_developers = sum(1 for dev in developer_stats if dev['total_commits'] >= threshold)
951
+ return round((active_developers / len(developer_stats)) * 100, 1)
952
+
953
+ def _calculate_avg_developers_per_project(self, commits: List[Dict[str, Any]]) -> float:
954
+ """Calculate average number of developers per project."""
955
+ project_developers = defaultdict(set)
956
+
957
+ for commit in commits:
958
+ project_key = commit.get('project_key', 'UNKNOWN')
959
+ dev_id = commit.get('canonical_id', commit.get('author_email'))
960
+ project_developers[project_key].add(dev_id)
961
+
962
+ if not project_developers:
963
+ return 0
964
+
965
+ avg = sum(len(devs) for devs in project_developers.values()) / len(project_developers)
966
+ return round(avg, 1)
967
+
968
+ def _count_cross_project_contributors(
969
+ self,
970
+ commits: List[Dict[str, Any]],
971
+ developer_stats: List[Dict[str, Any]]
972
+ ) -> int:
973
+ """Count developers who contribute to multiple projects."""
974
+ developer_projects = defaultdict(set)
975
+
976
+ for commit in commits:
977
+ project_key = commit.get('project_key', 'UNKNOWN')
978
+ dev_id = commit.get('canonical_id', commit.get('author_email'))
979
+ developer_projects[dev_id].add(project_key)
980
+
981
+ return sum(1 for projects in developer_projects.values() if len(projects) > 1)
982
+
983
+ def _calculate_project_health_score(
984
+ self,
985
+ commits: List[Dict[str, Any]],
986
+ contributors: Set[str]
987
+ ) -> Dict[str, Any]:
988
+ """Calculate health score for a specific project."""
989
+
990
+ if not commits:
991
+ return {"overall": 0, "components": {}, "rating": "no_data"}
992
+
993
+ scores = {}
994
+
995
+ # Activity score (commits per week)
996
+ weekly_commits = self._get_weekly_commit_counts(commits)
997
+ if weekly_commits:
998
+ avg_weekly = statistics.mean(weekly_commits)
999
+ activity_score = min(100, avg_weekly * 10) # Scale appropriately
1000
+ scores['activity'] = activity_score
1001
+ else:
1002
+ scores['activity'] = 0
1003
+
1004
+ # Contributor diversity score
1005
+ if len(contributors) == 1:
1006
+ diversity_score = 30 # Single contributor is risky
1007
+ elif len(contributors) <= 3:
1008
+ diversity_score = 60
1009
+ else:
1010
+ diversity_score = 100
1011
+ scores['contributor_diversity'] = diversity_score
1012
+
1013
+ # Consistency score
1014
+ if len(weekly_commits) > 1:
1015
+ consistency = max(0, 100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 50))
1016
+ scores['consistency'] = consistency
1017
+ else:
1018
+ scores['consistency'] = 50
1019
+
1020
+ # Overall score (equal weights for now)
1021
+ overall_score = sum(scores.values()) / len(scores)
1022
+
1023
+ return {
1024
+ "overall": round(overall_score, 1),
1025
+ "components": {k: round(v, 1) for k, v in scores.items()},
1026
+ "rating": self._get_health_rating(overall_score)
1027
+ }
1028
+
1029
+ def _get_project_contributor_details(
1030
+ self,
1031
+ commits: List[Dict[str, Any]],
1032
+ developer_stats: List[Dict[str, Any]]
1033
+ ) -> List[Dict[str, Any]]:
1034
+ """Get detailed contributor information for a project."""
1035
+
1036
+ # Create developer lookup
1037
+ dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
1038
+
1039
+ # Count contributions per developer
1040
+ contributor_commits = defaultdict(int)
1041
+ contributor_lines = defaultdict(int)
1042
+
1043
+ for commit in commits:
1044
+ dev_id = commit.get('canonical_id', commit.get('author_email'))
1045
+ contributor_commits[dev_id] += 1
1046
+
1047
+ lines = (
1048
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
1049
+ commit.get('filtered_deletions', commit.get('deletions', 0))
1050
+ )
1051
+ contributor_lines[dev_id] += lines
1052
+
1053
+ # Build contributor details
1054
+ contributors = []
1055
+ total_commits = len(commits)
1056
+
1057
+ for dev_id, commit_count in contributor_commits.items():
1058
+ dev = dev_lookup.get(dev_id, {})
1059
+
1060
+ contributors.append({
1061
+ "id": dev_id,
1062
+ "name": self._anonymize_value(dev.get('primary_name', 'Unknown'), 'name'),
1063
+ "commits": commit_count,
1064
+ "commits_percentage": round((commit_count / total_commits) * 100, 1),
1065
+ "lines_changed": contributor_lines[dev_id],
1066
+ "role": self._determine_contributor_role(commit_count, total_commits)
1067
+ })
1068
+
1069
+ # Sort by commits descending
1070
+ contributors.sort(key=lambda x: x['commits'], reverse=True)
1071
+
1072
+ return contributors
1073
+
1074
+ def _determine_contributor_role(self, commits: int, total_commits: int) -> str:
1075
+ """Determine contributor role based on contribution percentage."""
1076
+ percentage = (commits / total_commits) * 100
1077
+
1078
+ if percentage >= 50:
1079
+ return "primary"
1080
+ elif percentage >= 25:
1081
+ return "major"
1082
+ elif percentage >= 10:
1083
+ return "regular"
1084
+ else:
1085
+ return "occasional"
1086
+
1087
+ def _calculate_project_trends(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
1088
+ """Calculate trends for a specific project."""
1089
+
1090
+ if len(commits) < 4: # Need sufficient data for trends
1091
+ return {"insufficient_data": True}
1092
+
1093
+ # Sort by timestamp
1094
+ sorted_commits = sorted(commits, key=lambda x: x['timestamp'])
1095
+
1096
+ # Split into quarters for trend analysis
1097
+ quarter_size = len(sorted_commits) // 4
1098
+ quarters = [
1099
+ sorted_commits[i*quarter_size:(i+1)*quarter_size]
1100
+ for i in range(4)
1101
+ ]
1102
+
1103
+ # Handle remainder commits
1104
+ if len(sorted_commits) % 4:
1105
+ quarters[-1].extend(sorted_commits[4*quarter_size:])
1106
+
1107
+ # Calculate metrics per quarter
1108
+ quarter_metrics = []
1109
+ for quarter in quarters:
1110
+ metrics = {
1111
+ 'commits': len(quarter),
1112
+ 'lines': sum(
1113
+ c.get('filtered_insertions', c.get('insertions', 0)) +
1114
+ c.get('filtered_deletions', c.get('deletions', 0))
1115
+ for c in quarter
1116
+ ),
1117
+ 'contributors': len(set(c.get('canonical_id', c.get('author_email')) for c in quarter))
1118
+ }
1119
+ quarter_metrics.append(metrics)
1120
+
1121
+ # Calculate trends (compare Q1 vs Q4)
1122
+ trends = {}
1123
+ for metric in ['commits', 'lines', 'contributors']:
1124
+ q1_value = quarter_metrics[0][metric]
1125
+ q4_value = quarter_metrics[-1][metric]
1126
+
1127
+ if q1_value > 0:
1128
+ change = ((q4_value - q1_value) / q1_value) * 100
1129
+ trends[f'{metric}_trend'] = round(change, 1)
1130
+ else:
1131
+ trends[f'{metric}_trend'] = 0
1132
+
1133
+ return trends
1134
+
1135
+ def _detect_project_anomalies(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
1136
+ """Detect anomalies in project-specific data."""
1137
+
1138
+ if len(commits) < 7: # Need sufficient data
1139
+ return []
1140
+
1141
+ anomalies = []
1142
+
1143
+ # Get daily commit counts
1144
+ daily_commits = self._get_daily_commit_counts(commits)
1145
+
1146
+ if len(daily_commits) >= 7:
1147
+ mean_daily = statistics.mean(daily_commits)
1148
+ std_daily = statistics.pstdev(daily_commits) if len(daily_commits) > 1 else 0
1149
+
1150
+ # Find days with unusual activity
1151
+ for i, count in enumerate(daily_commits):
1152
+ if std_daily > 0 and count > mean_daily + (std_daily * 2):
1153
+ anomalies.append({
1154
+ "type": "activity_spike",
1155
+ "value": count,
1156
+ "expected": round(mean_daily, 1),
1157
+ "day_index": i,
1158
+ "severity": "medium"
1159
+ })
1160
+
1161
+ return anomalies
1162
+
1163
+ def _identify_primary_contributors(
1164
+ self,
1165
+ commits: List[Dict[str, Any]],
1166
+ contributor_details: List[Dict[str, Any]]
1167
+ ) -> List[str]:
1168
+ """Identify primary contributors (top 80% of activity)."""
1169
+
1170
+ sorted_contributors = sorted(contributor_details, key=lambda x: x['commits'], reverse=True)
1171
+ total_commits = sum(c['commits'] for c in contributor_details)
1172
+
1173
+ primary_contributors = []
1174
+ cumulative_commits = 0
1175
+
1176
+ for contributor in sorted_contributors:
1177
+ cumulative_commits += contributor['commits']
1178
+ primary_contributors.append(contributor['name'])
1179
+
1180
+ if cumulative_commits >= total_commits * 0.8:
1181
+ break
1182
+
1183
+ return primary_contributors
1184
+
1185
+ def _calculate_contribution_distribution(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
1186
+ """Calculate distribution metrics for contributions."""
1187
+
1188
+ contributor_commits = defaultdict(int)
1189
+ for commit in commits:
1190
+ dev_id = commit.get('canonical_id', commit.get('author_email'))
1191
+ contributor_commits[dev_id] += 1
1192
+
1193
+ commit_counts = list(contributor_commits.values())
1194
+
1195
+ if not commit_counts:
1196
+ return {}
1197
+
1198
+ gini = self._calculate_gini_coefficient(commit_counts)
1199
+
1200
+ return {
1201
+ "gini_coefficient": round(gini, 3),
1202
+ "concentration_level": "high" if gini > 0.7 else "medium" if gini > 0.4 else "low",
1203
+ "top_contributor_percentage": round((max(commit_counts) / sum(commit_counts)) * 100, 1),
1204
+ "contributor_count": len(commit_counts)
1205
+ }
1206
+
1207
+ def _get_developer_projects(self, commits: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
1208
+ """Get projects a developer has worked on with contribution details."""
1209
+
1210
+ project_contributions = defaultdict(lambda: {
1211
+ 'commits': 0,
1212
+ 'lines_changed': 0,
1213
+ 'story_points': 0,
1214
+ 'first_commit': None,
1215
+ 'last_commit': None
1216
+ })
1217
+
1218
+ for commit in commits:
1219
+ project_key = commit.get('project_key', 'UNKNOWN')
1220
+ project_data = project_contributions[project_key]
1221
+
1222
+ project_data['commits'] += 1
1223
+
1224
+ lines = (
1225
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
1226
+ commit.get('filtered_deletions', commit.get('deletions', 0))
1227
+ )
1228
+ project_data['lines_changed'] += lines
1229
+ project_data['story_points'] += commit.get('story_points', 0) or 0
1230
+
1231
+ # Track first and last commits
1232
+ commit_date = commit['timestamp']
1233
+ if not project_data['first_commit'] or commit_date < project_data['first_commit']:
1234
+ project_data['first_commit'] = commit_date
1235
+ if not project_data['last_commit'] or commit_date > project_data['last_commit']:
1236
+ project_data['last_commit'] = commit_date
1237
+
1238
+ # Convert to regular dict and add percentages
1239
+ total_commits = len(commits)
1240
+ projects = {}
1241
+
1242
+ for project_key, data in project_contributions.items():
1243
+ projects[project_key] = {
1244
+ 'commits': data['commits'],
1245
+ 'commits_percentage': round((data['commits'] / total_commits) * 100, 1),
1246
+ 'lines_changed': data['lines_changed'],
1247
+ 'story_points': data['story_points'],
1248
+ 'first_commit': data['first_commit'].isoformat() if data['first_commit'] else None,
1249
+ 'last_commit': data['last_commit'].isoformat() if data['last_commit'] else None,
1250
+ 'days_active': (data['last_commit'] - data['first_commit']).days if data['first_commit'] and data['last_commit'] else 0
1251
+ }
1252
+
1253
+ return projects
1254
+
1255
+ def _analyze_developer_contribution_patterns(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
1256
+ """Analyze a developer's contribution patterns."""
1257
+
1258
+ if not commits:
1259
+ return {}
1260
+
1261
+ # Time-based patterns (use local hour if available)
1262
+ commit_hours = []
1263
+ for c in commits:
1264
+ if 'local_hour' in c:
1265
+ commit_hours.append(c['local_hour'])
1266
+ elif hasattr(c['timestamp'], 'hour'):
1267
+ commit_hours.append(c['timestamp'].hour)
1268
+
1269
+ commit_days = [c['timestamp'].weekday() for c in commits if hasattr(c['timestamp'], 'weekday')]
1270
+
1271
+ # Size patterns
1272
+ commit_sizes = []
1273
+ for commit in commits:
1274
+ lines = (
1275
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
1276
+ commit.get('filtered_deletions', commit.get('deletions', 0))
1277
+ )
1278
+ commit_sizes.append(lines)
1279
+
1280
+ patterns = {
1281
+ 'total_commits': len(commits),
1282
+ 'avg_commit_size': round(statistics.mean(commit_sizes), 1) if commit_sizes else 0,
1283
+ 'commit_size_stddev': round(statistics.pstdev(commit_sizes), 1) if len(commit_sizes) > 1 else 0
1284
+ }
1285
+
1286
+ if commit_hours:
1287
+ patterns['peak_hour'] = max(set(commit_hours), key=commit_hours.count)
1288
+ patterns['time_distribution'] = self._get_time_distribution_pattern(commit_hours)
1289
+
1290
+ if commit_days:
1291
+ patterns['peak_day'] = self._get_day_name(max(set(commit_days), key=commit_days.count))
1292
+ patterns['work_pattern'] = self._get_work_pattern(commit_days)
1293
+
1294
+ # Consistency patterns
1295
+ weekly_commits = self._get_weekly_commit_counts(commits)
1296
+ if len(weekly_commits) > 1:
1297
+ patterns['consistency_score'] = round(100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 100), 1)
1298
+ else:
1299
+ patterns['consistency_score'] = 50
1300
+
1301
+ return patterns
1302
+
1303
+ def _get_time_distribution_pattern(self, hours: List[int]) -> str:
1304
+ """Determine time distribution pattern from commit hours."""
1305
+ avg_hour = statistics.mean(hours)
1306
+
1307
+ if avg_hour < 10:
1308
+ return "early_bird"
1309
+ elif avg_hour < 14:
1310
+ return "morning_focused"
1311
+ elif avg_hour < 18:
1312
+ return "afternoon_focused"
1313
+ else:
1314
+ return "night_owl"
1315
+
1316
+ def _get_day_name(self, day_index: int) -> str:
1317
+ """Convert day index to day name."""
1318
+ days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
1319
+ return days[day_index] if 0 <= day_index < 7 else 'Unknown'
1320
+
1321
+ def _get_work_pattern(self, days: List[int]) -> str:
1322
+ """Determine work pattern from commit days."""
1323
+ weekday_commits = sum(1 for day in days if day < 5) # Mon-Fri
1324
+ weekend_commits = sum(1 for day in days if day >= 5) # Sat-Sun
1325
+
1326
+ total = len(days)
1327
+ weekday_pct = (weekday_commits / total) * 100 if total > 0 else 0
1328
+
1329
+ if weekday_pct > 90:
1330
+ return "strictly_weekdays"
1331
+ elif weekday_pct > 75:
1332
+ return "mostly_weekdays"
1333
+ elif weekday_pct > 50:
1334
+ return "mixed_schedule"
1335
+ else:
1336
+ return "weekend_warrior"
1337
+
1338
+ def _calculate_developer_collaboration_metrics(
1339
+ self,
1340
+ commits: List[Dict[str, Any]],
1341
+ all_developer_stats: List[Dict[str, Any]]
1342
+ ) -> Dict[str, Any]:
1343
+ """Calculate collaboration metrics for a developer."""
1344
+
1345
+ # Get projects this developer worked on
1346
+ dev_projects = set(c.get('project_key', 'UNKNOWN') for c in commits)
1347
+
1348
+ # Find other developers on same projects
1349
+ collaborators = set()
1350
+ for dev in all_developer_stats:
1351
+ dev_id = dev['canonical_id']
1352
+ # Simple check - assumes we can identify overlapping work
1353
+ # In real implementation, would need more sophisticated analysis
1354
+ if len(dev_projects) > 0: # Placeholder logic
1355
+ collaborators.add(dev_id)
1356
+
1357
+ # Remove self from collaborators
1358
+ dev_id = commits[0].get('canonical_id') if commits else None
1359
+ collaborators.discard(dev_id)
1360
+
1361
+ return {
1362
+ 'projects_count': len(dev_projects),
1363
+ 'potential_collaborators': len(collaborators),
1364
+ 'cross_project_work': len(dev_projects) > 1,
1365
+ 'collaboration_score': min(100, len(collaborators) * 10) # Simple scoring
1366
+ }
1367
+
1368
+ def _calculate_developer_health_score(
1369
+ self,
1370
+ commits: List[Dict[str, Any]],
1371
+ dev_stats: Dict[str, Any]
1372
+ ) -> Dict[str, Any]:
1373
+ """Calculate health score for a specific developer."""
1374
+
1375
+ if not commits:
1376
+ return {"overall": 0, "components": {}, "rating": "no_data"}
1377
+
1378
+ scores = {}
1379
+
1380
+ # Activity score based on commits per week
1381
+ weekly_commits = self._get_weekly_commit_counts(commits)
1382
+ if weekly_commits:
1383
+ avg_weekly = statistics.mean(weekly_commits)
1384
+ activity_score = min(100, avg_weekly * 20) # Scale appropriately
1385
+ scores['activity'] = activity_score
1386
+ else:
1387
+ scores['activity'] = 0
1388
+
1389
+ # Consistency score
1390
+ if len(weekly_commits) > 1:
1391
+ consistency = max(0, 100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 50))
1392
+ scores['consistency'] = consistency
1393
+ else:
1394
+ scores['consistency'] = 50
1395
+
1396
+ # Engagement score (based on projects and commit sizes)
1397
+ project_count = len(set(c.get('project_key', 'UNKNOWN') for c in commits))
1398
+ engagement_score = min(100, project_count * 25 + 25) # Bonus for multi-project work
1399
+ scores['engagement'] = engagement_score
1400
+
1401
+ # Overall score
1402
+ overall_score = sum(scores.values()) / len(scores)
1403
+
1404
+ return {
1405
+ "overall": round(overall_score, 1),
1406
+ "components": {k: round(v, 1) for k, v in scores.items()},
1407
+ "rating": self._get_health_rating(overall_score)
1408
+ }
1409
+
1410
+ def _identify_developer_achievements(
1411
+ self,
1412
+ commits: List[Dict[str, Any]],
1413
+ dev_stats: Dict[str, Any]
1414
+ ) -> List[Dict[str, Any]]:
1415
+ """Identify achievements for a developer."""
1416
+
1417
+ achievements = []
1418
+
1419
+ # High commit count
1420
+ if dev_stats['total_commits'] > 50:
1421
+ achievements.append({
1422
+ "type": "productivity",
1423
+ "title": "High Productivity",
1424
+ "description": f"{dev_stats['total_commits']} commits in analysis period",
1425
+ "badge": "prolific_contributor"
1426
+ })
1427
+
1428
+ # Multi-project contributor
1429
+ projects = set(c.get('project_key', 'UNKNOWN') for c in commits)
1430
+ if len(projects) > 3:
1431
+ achievements.append({
1432
+ "type": "versatility",
1433
+ "title": "Multi-Project Contributor",
1434
+ "description": f"Contributed to {len(projects)} projects",
1435
+ "badge": "versatile_developer"
1436
+ })
1437
+
1438
+ # Consistent contributor
1439
+ weekly_commits = self._get_weekly_commit_counts(commits)
1440
+ if len(weekly_commits) > 4:
1441
+ active_weeks = sum(1 for w in weekly_commits if w > 0)
1442
+ consistency_rate = active_weeks / len(weekly_commits)
1443
+
1444
+ if consistency_rate > 0.8:
1445
+ achievements.append({
1446
+ "type": "consistency",
1447
+ "title": "Consistent Contributor",
1448
+ "description": f"Active in {active_weeks} out of {len(weekly_commits)} weeks",
1449
+ "badge": "reliable_contributor"
1450
+ })
1451
+
1452
+ return achievements
1453
+
1454
+ def _identify_improvement_areas(
1455
+ self,
1456
+ commits: List[Dict[str, Any]],
1457
+ dev_stats: Dict[str, Any]
1458
+ ) -> List[Dict[str, Any]]:
1459
+ """Identify areas for improvement for a developer."""
1460
+
1461
+ improvements = []
1462
+
1463
+ # Check ticket linking
1464
+ commits_with_tickets = sum(1 for c in commits if c.get('ticket_references'))
1465
+ ticket_rate = (commits_with_tickets / len(commits)) * 100 if commits else 0
1466
+
1467
+ if ticket_rate < 50:
1468
+ improvements.append({
1469
+ "category": "process",
1470
+ "title": "Improve Ticket Linking",
1471
+ "description": f"Only {ticket_rate:.1f}% of commits reference tickets",
1472
+ "priority": "medium",
1473
+ "suggestion": "Include ticket references in commit messages"
1474
+ })
1475
+
1476
+ # Check commit size consistency
1477
+ commit_sizes = []
1478
+ for commit in commits:
1479
+ lines = (
1480
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
1481
+ commit.get('filtered_deletions', commit.get('deletions', 0))
1482
+ )
1483
+ commit_sizes.append(lines)
1484
+
1485
+ if commit_sizes and len(commit_sizes) > 5:
1486
+ avg_size = statistics.mean(commit_sizes)
1487
+ if avg_size > 300:
1488
+ improvements.append({
1489
+ "category": "quality",
1490
+ "title": "Consider Smaller Commits",
1491
+ "description": f"Average commit size is {avg_size:.0f} lines",
1492
+ "priority": "low",
1493
+ "suggestion": "Break down large changes into smaller, focused commits"
1494
+ })
1495
+
1496
+ return improvements
1497
+
1498
+ def _build_developer_activity_timeline(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
1499
+ """Build activity timeline for a developer."""
1500
+
1501
+ if not commits:
1502
+ return []
1503
+
1504
+ # Group commits by week
1505
+ weekly_activity = defaultdict(lambda: {
1506
+ 'commits': 0,
1507
+ 'lines_changed': 0,
1508
+ 'projects': set()
1509
+ })
1510
+
1511
+ for commit in commits:
1512
+ week_start = self._get_week_start(commit['timestamp'])
1513
+ week_key = week_start.strftime('%Y-%m-%d')
1514
+
1515
+ weekly_activity[week_key]['commits'] += 1
1516
+
1517
+ lines = (
1518
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
1519
+ commit.get('filtered_deletions', commit.get('deletions', 0))
1520
+ )
1521
+ weekly_activity[week_key]['lines_changed'] += lines
1522
+ weekly_activity[week_key]['projects'].add(commit.get('project_key', 'UNKNOWN'))
1523
+
1524
+ # Convert to timeline format
1525
+ timeline = []
1526
+ for week_key in sorted(weekly_activity.keys()):
1527
+ data = weekly_activity[week_key]
1528
+ timeline.append({
1529
+ 'week': week_key,
1530
+ 'commits': data['commits'],
1531
+ 'lines_changed': data['lines_changed'],
1532
+ 'projects': len(data['projects']),
1533
+ 'project_list': sorted(list(data['projects']))
1534
+ })
1535
+
1536
+ return timeline
1537
+
1538
+ def _analyze_branching_patterns(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
1539
+ """Analyze branching and merge patterns."""
1540
+
1541
+ merge_commits = sum(1 for c in commits if c.get('is_merge'))
1542
+ total_commits = len(commits)
1543
+
1544
+ merge_rate = (merge_commits / total_commits) * 100 if total_commits > 0 else 0
1545
+
1546
+ # Determine branching strategy
1547
+ if merge_rate < 5:
1548
+ strategy = "linear"
1549
+ elif merge_rate < 15:
1550
+ strategy = "feature_branches"
1551
+ elif merge_rate < 30:
1552
+ strategy = "git_flow"
1553
+ else:
1554
+ strategy = "complex_branching"
1555
+
1556
+ return {
1557
+ "merge_commits": merge_commits,
1558
+ "merge_rate_percent": round(merge_rate, 1),
1559
+ "strategy": strategy,
1560
+ "complexity_rating": "low" if merge_rate < 15 else "medium" if merge_rate < 30 else "high"
1561
+ }
1562
+
1563
+ def _analyze_commit_timing_patterns(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
1564
+ """Analyze when commits typically happen."""
1565
+
1566
+ if not commits:
1567
+ return {}
1568
+
1569
+ # Extract timing data
1570
+ hours = []
1571
+ days = []
1572
+
1573
+ for commit in commits:
1574
+ timestamp = commit['timestamp']
1575
+ # Use local hour if available
1576
+ if 'local_hour' in commit:
1577
+ hours.append(commit['local_hour'])
1578
+ elif hasattr(timestamp, 'hour'):
1579
+ hours.append(timestamp.hour)
1580
+ if hasattr(timestamp, 'weekday'):
1581
+ days.append(timestamp.weekday())
1582
+
1583
+ patterns = {}
1584
+
1585
+ if hours:
1586
+ # Hour distribution
1587
+ hour_counts = defaultdict(int)
1588
+ for hour in hours:
1589
+ hour_counts[hour] += 1
1590
+
1591
+ peak_hour = max(hour_counts, key=hour_counts.get)
1592
+ patterns['peak_hour'] = peak_hour
1593
+ patterns['peak_hour_commits'] = hour_counts[peak_hour]
1594
+
1595
+ # Time periods
1596
+ morning = sum(1 for h in hours if 6 <= h < 12)
1597
+ afternoon = sum(1 for h in hours if 12 <= h < 18)
1598
+ evening = sum(1 for h in hours if 18 <= h < 24)
1599
+ night = sum(1 for h in hours if 0 <= h < 6)
1600
+
1601
+ total = len(hours)
1602
+ patterns['time_distribution'] = {
1603
+ 'morning_pct': round((morning / total) * 100, 1),
1604
+ 'afternoon_pct': round((afternoon / total) * 100, 1),
1605
+ 'evening_pct': round((evening / total) * 100, 1),
1606
+ 'night_pct': round((night / total) * 100, 1)
1607
+ }
1608
+
1609
+ if days:
1610
+ # Day distribution
1611
+ day_counts = defaultdict(int)
1612
+ for day in days:
1613
+ day_counts[day] += 1
1614
+
1615
+ peak_day = max(day_counts, key=day_counts.get)
1616
+ patterns['peak_day'] = self._get_day_name(peak_day)
1617
+ patterns['peak_day_commits'] = day_counts[peak_day]
1618
+
1619
+ # Weekday vs weekend
1620
+ weekday_commits = sum(1 for d in days if d < 5)
1621
+ weekend_commits = sum(1 for d in days if d >= 5)
1622
+
1623
+ total = len(days)
1624
+ patterns['weekday_pct'] = round((weekday_commits / total) * 100, 1)
1625
+ patterns['weekend_pct'] = round((weekend_commits / total) * 100, 1)
1626
+
1627
+ return patterns
1628
+
1629
+ def _analyze_pr_workflow(self, prs: List[Dict[str, Any]]) -> Dict[str, Any]:
1630
+ """Analyze pull request workflow patterns."""
1631
+
1632
+ if not prs:
1633
+ return {}
1634
+
1635
+ # PR lifecycle analysis
1636
+ lifetimes = []
1637
+ sizes = []
1638
+ review_counts = []
1639
+
1640
+ for pr in prs:
1641
+ # Calculate PR lifetime
1642
+ created = pr.get('created_at')
1643
+ merged = pr.get('merged_at')
1644
+
1645
+ if created and merged:
1646
+ if isinstance(created, str):
1647
+ created = datetime.fromisoformat(created.replace('Z', '+00:00'))
1648
+ if isinstance(merged, str):
1649
+ merged = datetime.fromisoformat(merged.replace('Z', '+00:00'))
1650
+
1651
+ lifetime_hours = (merged - created).total_seconds() / 3600
1652
+ lifetimes.append(lifetime_hours)
1653
+
1654
+ # PR size (additions + deletions)
1655
+ additions = pr.get('additions', 0)
1656
+ deletions = pr.get('deletions', 0)
1657
+ sizes.append(additions + deletions)
1658
+
1659
+ # Review comments
1660
+ review_comments = pr.get('review_comments', 0)
1661
+ review_counts.append(review_comments)
1662
+
1663
+ workflow = {}
1664
+
1665
+ if lifetimes:
1666
+ workflow['avg_lifetime_hours'] = round(statistics.mean(lifetimes), 1)
1667
+ workflow['median_lifetime_hours'] = round(statistics.median(lifetimes), 1)
1668
+
1669
+ if sizes:
1670
+ workflow['avg_pr_size'] = round(statistics.mean(sizes), 1)
1671
+ workflow['median_pr_size'] = round(statistics.median(sizes), 1)
1672
+
1673
+ if review_counts:
1674
+ workflow['avg_review_comments'] = round(statistics.mean(review_counts), 1)
1675
+ workflow['prs_with_reviews'] = sum(1 for r in review_counts if r > 0)
1676
+ workflow['review_rate_pct'] = round((workflow['prs_with_reviews'] / len(prs)) * 100, 1)
1677
+
1678
+ return workflow
1679
+
1680
+ def _analyze_git_pm_correlation(
1681
+ self,
1682
+ commits: List[Dict[str, Any]],
1683
+ pm_data: Dict[str, Any]
1684
+ ) -> Dict[str, Any]:
1685
+ """Analyze correlation between Git activity and PM platform data."""
1686
+
1687
+ correlations = pm_data.get('correlations', [])
1688
+ metrics = pm_data.get('metrics', {})
1689
+
1690
+ if not correlations:
1691
+ return {"status": "no_correlations"}
1692
+
1693
+ # Analyze correlation quality
1694
+ high_confidence = sum(1 for c in correlations if c.get('confidence', 0) > 0.8)
1695
+ medium_confidence = sum(1 for c in correlations if 0.5 <= c.get('confidence', 0) <= 0.8)
1696
+ low_confidence = sum(1 for c in correlations if c.get('confidence', 0) < 0.5)
1697
+
1698
+ total_correlations = len(correlations)
1699
+
1700
+ # Analyze correlation methods
1701
+ methods = defaultdict(int)
1702
+ for c in correlations:
1703
+ method = c.get('correlation_method', 'unknown')
1704
+ methods[method] += 1
1705
+
1706
+ # Story point accuracy analysis
1707
+ story_analysis = metrics.get('story_point_analysis', {})
1708
+
1709
+ return {
1710
+ "total_correlations": total_correlations,
1711
+ "confidence_distribution": {
1712
+ "high": high_confidence,
1713
+ "medium": medium_confidence,
1714
+ "low": low_confidence
1715
+ },
1716
+ "confidence_rates": {
1717
+ "high_pct": round((high_confidence / total_correlations) * 100, 1),
1718
+ "medium_pct": round((medium_confidence / total_correlations) * 100, 1),
1719
+ "low_pct": round((low_confidence / total_correlations) * 100, 1)
1720
+ },
1721
+ "correlation_methods": dict(methods),
1722
+ "story_point_analysis": story_analysis,
1723
+ "platforms": list(metrics.get('platform_coverage', {}).keys())
1724
+ }
1725
+
1726
+ def _calculate_merge_commit_rate(self, commits: List[Dict[str, Any]]) -> float:
1727
+ """Calculate percentage of merge commits."""
1728
+ if not commits:
1729
+ return 0
1730
+
1731
+ merge_commits = sum(1 for c in commits if c.get('is_merge'))
1732
+ return round((merge_commits / len(commits)) * 100, 1)
1733
+
1734
+ def _analyze_commit_message_quality(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
1735
+ """Analyze commit message quality patterns."""
1736
+
1737
+ if not commits:
1738
+ return {}
1739
+
1740
+ message_lengths = []
1741
+ has_ticket_ref = 0
1742
+ conventional_commits = 0
1743
+
1744
+ # Conventional commit prefixes
1745
+ conventional_prefixes = ['feat:', 'fix:', 'docs:', 'style:', 'refactor:', 'test:', 'chore:']
1746
+
1747
+ for commit in commits:
1748
+ message = commit.get('message', '')
1749
+
1750
+ # Message length (in words)
1751
+ word_count = len(message.split())
1752
+ message_lengths.append(word_count)
1753
+
1754
+ # Ticket reference check
1755
+ if commit.get('ticket_references'):
1756
+ has_ticket_ref += 1
1757
+
1758
+ # Conventional commit check
1759
+ if any(message.lower().startswith(prefix) for prefix in conventional_prefixes):
1760
+ conventional_commits += 1
1761
+
1762
+ total_commits = len(commits)
1763
+
1764
+ quality = {}
1765
+
1766
+ if message_lengths:
1767
+ quality['avg_message_length_words'] = round(statistics.mean(message_lengths), 1)
1768
+ quality['median_message_length_words'] = round(statistics.median(message_lengths), 1)
1769
+
1770
+ quality['ticket_reference_rate_pct'] = round((has_ticket_ref / total_commits) * 100, 1)
1771
+ quality['conventional_commit_rate_pct'] = round((conventional_commits / total_commits) * 100, 1)
1772
+
1773
+ # Quality rating
1774
+ score = 0
1775
+ if quality.get('avg_message_length_words', 0) >= 5:
1776
+ score += 25
1777
+ if quality.get('ticket_reference_rate_pct', 0) >= 50:
1778
+ score += 35
1779
+ if quality.get('conventional_commit_rate_pct', 0) >= 30:
1780
+ score += 40
1781
+
1782
+ if score >= 80:
1783
+ quality['overall_rating'] = 'excellent'
1784
+ elif score >= 60:
1785
+ quality['overall_rating'] = 'good'
1786
+ elif score >= 40:
1787
+ quality['overall_rating'] = 'fair'
1788
+ else:
1789
+ quality['overall_rating'] = 'needs_improvement'
1790
+
1791
+ return quality
1792
+
1793
+ def _generate_weekly_time_series(
1794
+ self,
1795
+ commits: List[Dict[str, Any]],
1796
+ prs: List[Dict[str, Any]],
1797
+ start_date: datetime,
1798
+ end_date: datetime
1799
+ ) -> List[Dict[str, Any]]:
1800
+ """Generate weekly time series data for charts."""
1801
+
1802
+ weekly_data = []
1803
+ current_date = start_date
1804
+
1805
+ while current_date <= end_date:
1806
+ week_end = current_date + timedelta(days=7)
1807
+
1808
+ # Filter commits for this week
1809
+ week_commits = []
1810
+ for c in commits:
1811
+ # Ensure both timestamps are timezone-aware for comparison
1812
+ commit_ts = c['timestamp']
1813
+ if hasattr(commit_ts, 'tzinfo') and commit_ts.tzinfo is None:
1814
+ # Make timezone-aware if needed
1815
+ commit_ts = commit_ts.replace(tzinfo=timezone.utc)
1816
+ elif not hasattr(commit_ts, 'tzinfo'):
1817
+ # Convert to datetime if needed
1818
+ commit_ts = datetime.fromisoformat(str(commit_ts))
1819
+ if commit_ts.tzinfo is None:
1820
+ commit_ts = commit_ts.replace(tzinfo=timezone.utc)
1821
+
1822
+ if current_date <= commit_ts < week_end:
1823
+ week_commits.append(c)
1824
+
1825
+ # Filter PRs for this week (by merge date)
1826
+ week_prs = []
1827
+ for pr in prs:
1828
+ merged_at = pr.get('merged_at')
1829
+ if merged_at:
1830
+ if isinstance(merged_at, str):
1831
+ merged_at = datetime.fromisoformat(merged_at.replace('Z', '+00:00'))
1832
+ # Ensure timezone-aware for comparison
1833
+ if hasattr(merged_at, 'tzinfo') and merged_at.tzinfo is None:
1834
+ merged_at = merged_at.replace(tzinfo=timezone.utc)
1835
+ if current_date <= merged_at < week_end:
1836
+ week_prs.append(pr)
1837
+
1838
+ # Calculate metrics
1839
+ lines_changed = sum(
1840
+ c.get('filtered_insertions', c.get('insertions', 0)) +
1841
+ c.get('filtered_deletions', c.get('deletions', 0))
1842
+ for c in week_commits
1843
+ )
1844
+
1845
+ story_points = sum(c.get('story_points', 0) or 0 for c in week_commits)
1846
+
1847
+ active_developers = len(set(
1848
+ c.get('canonical_id', c.get('author_email'))
1849
+ for c in week_commits
1850
+ ))
1851
+
1852
+ weekly_data.append({
1853
+ 'date': current_date.strftime('%Y-%m-%d'),
1854
+ 'commits': len(week_commits),
1855
+ 'lines_changed': lines_changed,
1856
+ 'story_points': story_points,
1857
+ 'active_developers': active_developers,
1858
+ 'pull_requests': len(week_prs)
1859
+ })
1860
+
1861
+ current_date = week_end
1862
+
1863
+ return weekly_data
1864
+
1865
+ def _generate_daily_time_series(
1866
+ self,
1867
+ commits: List[Dict[str, Any]],
1868
+ prs: List[Dict[str, Any]],
1869
+ start_date: datetime,
1870
+ end_date: datetime
1871
+ ) -> List[Dict[str, Any]]:
1872
+ """Generate daily time series data for detailed analysis."""
1873
+
1874
+ daily_data = []
1875
+ current_date = start_date
1876
+
1877
+ while current_date <= end_date:
1878
+ day_end = current_date + timedelta(days=1)
1879
+
1880
+ # Filter commits for this day
1881
+ day_commits = []
1882
+ for c in commits:
1883
+ # Ensure both timestamps are timezone-aware for comparison
1884
+ commit_ts = c['timestamp']
1885
+ if hasattr(commit_ts, 'tzinfo') and commit_ts.tzinfo is None:
1886
+ # Make timezone-aware if needed
1887
+ commit_ts = commit_ts.replace(tzinfo=timezone.utc)
1888
+ elif not hasattr(commit_ts, 'tzinfo'):
1889
+ # Convert to datetime if needed
1890
+ commit_ts = datetime.fromisoformat(str(commit_ts))
1891
+ if commit_ts.tzinfo is None:
1892
+ commit_ts = commit_ts.replace(tzinfo=timezone.utc)
1893
+
1894
+ if current_date <= commit_ts < day_end:
1895
+ day_commits.append(c)
1896
+
1897
+ daily_data.append({
1898
+ 'date': current_date.strftime('%Y-%m-%d'),
1899
+ 'commits': len(day_commits)
1900
+ })
1901
+
1902
+ current_date = day_end
1903
+
1904
+ return daily_data
1905
+
1906
+ def _generate_quantitative_insights(
1907
+ self,
1908
+ commits: List[Dict[str, Any]],
1909
+ developer_stats: List[Dict[str, Any]]
1910
+ ) -> List[Dict[str, Any]]:
1911
+ """Generate quantitative insights from data analysis."""
1912
+
1913
+ insights = []
1914
+
1915
+ # Team productivity insights
1916
+ total_commits = len(commits)
1917
+ if total_commits > 0:
1918
+ weekly_commits = self._get_weekly_commit_counts(commits)
1919
+ if weekly_commits:
1920
+ avg_weekly = statistics.mean(weekly_commits)
1921
+ insights.append({
1922
+ "category": "productivity",
1923
+ "type": "metric",
1924
+ "title": "Weekly Commit Rate",
1925
+ "description": f"Team averages {avg_weekly:.1f} commits per week",
1926
+ "value": avg_weekly,
1927
+ "trend": self._calculate_simple_trend(weekly_commits),
1928
+ "priority": "medium"
1929
+ })
1930
+
1931
+ # Developer distribution insights
1932
+ if len(developer_stats) > 1:
1933
+ commit_counts = [dev['total_commits'] for dev in developer_stats]
1934
+ gini = self._calculate_gini_coefficient(commit_counts)
1935
+
1936
+ if gini > 0.7:
1937
+ insights.append({
1938
+ "category": "team",
1939
+ "type": "concern",
1940
+ "title": "Unbalanced Contributions",
1941
+ "description": f"Work is concentrated among few developers (Gini: {gini:.2f})",
1942
+ "value": gini,
1943
+ "priority": "high",
1944
+ "recommendation": "Consider distributing work more evenly"
1945
+ })
1946
+ elif gini < 0.3:
1947
+ insights.append({
1948
+ "category": "team",
1949
+ "type": "positive",
1950
+ "title": "Balanced Team Contributions",
1951
+ "description": f"Work is well-distributed across the team (Gini: {gini:.2f})",
1952
+ "value": gini,
1953
+ "priority": "low"
1954
+ })
1955
+
1956
+ # Code quality insights
1957
+ commit_sizes = []
1958
+ for commit in commits:
1959
+ lines = (
1960
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
1961
+ commit.get('filtered_deletions', commit.get('deletions', 0))
1962
+ )
1963
+ commit_sizes.append(lines)
1964
+
1965
+ if commit_sizes:
1966
+ avg_size = statistics.mean(commit_sizes)
1967
+ if avg_size > 300:
1968
+ insights.append({
1969
+ "category": "quality",
1970
+ "type": "concern",
1971
+ "title": "Large Commit Sizes",
1972
+ "description": f"Average commit size is {avg_size:.0f} lines",
1973
+ "value": avg_size,
1974
+ "priority": "medium",
1975
+ "recommendation": "Consider breaking down changes into smaller commits"
1976
+ })
1977
+ elif 20 <= avg_size <= 200:
1978
+ insights.append({
1979
+ "category": "quality",
1980
+ "type": "positive",
1981
+ "title": "Optimal Commit Sizes",
1982
+ "description": f"Average commit size of {avg_size:.0f} lines indicates good change management",
1983
+ "value": avg_size,
1984
+ "priority": "low"
1985
+ })
1986
+
1987
+ return insights
1988
+
1989
+ def _process_qualitative_insights(self, qualitative_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
1990
+ """Process qualitative analysis results into insights."""
1991
+
1992
+ insights = []
1993
+
1994
+ for item in qualitative_data:
1995
+ # Transform qualitative data into insight format
1996
+ insight = {
1997
+ "category": item.get('category', 'general'),
1998
+ "type": "qualitative",
1999
+ "title": item.get('insight', 'Qualitative Insight'),
2000
+ "description": item.get('description', ''),
2001
+ "priority": item.get('priority', 'medium'),
2002
+ "confidence": item.get('confidence', 0.5)
2003
+ }
2004
+
2005
+ if 'recommendation' in item:
2006
+ insight['recommendation'] = item['recommendation']
2007
+
2008
+ insights.append(insight)
2009
+
2010
+ return insights
2011
+
2012
+ def _prioritize_insights(self, insights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
2013
+ """Prioritize insights by importance and impact."""
2014
+
2015
+ def get_priority_score(insight):
2016
+ priority_scores = {'high': 3, 'medium': 2, 'low': 1}
2017
+ type_scores = {'concern': 3, 'positive': 1, 'metric': 2, 'qualitative': 2}
2018
+
2019
+ priority_score = priority_scores.get(insight.get('priority', 'medium'), 2)
2020
+ type_score = type_scores.get(insight.get('type', 'metric'), 2)
2021
+
2022
+ return priority_score + type_score
2023
+
2024
+ # Sort by priority score (descending)
2025
+ prioritized = sorted(insights, key=get_priority_score, reverse=True)
2026
+
2027
+ return prioritized[:10] # Return top 10 insights
2028
+
2029
+ def _categorize_insights(self, insights: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
2030
+ """Categorize insights by category."""
2031
+
2032
+ categories = defaultdict(list)
2033
+
2034
+ for insight in insights:
2035
+ category = insight.get('category', 'general')
2036
+ categories[category].append(insight)
2037
+
2038
+ return dict(categories)
2039
+
2040
+ def _build_untracked_analysis(
2041
+ self,
2042
+ commits: List[Dict[str, Any]],
2043
+ project_metrics: Dict[str, Any]
2044
+ ) -> Dict[str, Any]:
2045
+ """Build comprehensive untracked commit analysis for JSON export.
2046
+
2047
+ WHY: Untracked work analysis is critical for understanding what development
2048
+ activities are happening outside the formal process. This data enables
2049
+ process improvements, training identification, and better project visibility.
2050
+
2051
+ Args:
2052
+ commits: List of all commits
2053
+ project_metrics: Project metrics including ticket analysis
2054
+
2055
+ Returns:
2056
+ Dictionary with comprehensive untracked analysis
2057
+ """
2058
+ ticket_analysis = project_metrics.get('ticket_analysis', {})
2059
+ untracked_commits = ticket_analysis.get('untracked_commits', [])
2060
+
2061
+ if not untracked_commits:
2062
+ return {
2063
+ "summary": {
2064
+ "total_untracked": 0,
2065
+ "untracked_percentage": 0,
2066
+ "analysis_status": "no_untracked_commits"
2067
+ },
2068
+ "categories": {},
2069
+ "contributors": {},
2070
+ "projects": {},
2071
+ "trends": {},
2072
+ "recommendations": []
2073
+ }
2074
+
2075
+ # Initialize analysis structures
2076
+ categories = {}
2077
+ contributors = {}
2078
+ projects = {}
2079
+ monthly_trends = {}
2080
+
2081
+ total_commits = ticket_analysis.get('total_commits', len(commits))
2082
+ total_untracked = len(untracked_commits)
2083
+
2084
+ # Process each untracked commit
2085
+ for commit in untracked_commits:
2086
+ # Category analysis
2087
+ category = commit.get('category', 'other')
2088
+ if category not in categories:
2089
+ categories[category] = {
2090
+ 'count': 0,
2091
+ 'lines_changed': 0,
2092
+ 'files_changed': 0,
2093
+ 'examples': [],
2094
+ 'authors': set()
2095
+ }
2096
+
2097
+ categories[category]['count'] += 1
2098
+ categories[category]['lines_changed'] += commit.get('lines_changed', 0)
2099
+ categories[category]['files_changed'] += commit.get('files_changed', 0)
2100
+ categories[category]['authors'].add(commit.get('canonical_id', commit.get('author_email', 'Unknown')))
2101
+
2102
+ if len(categories[category]['examples']) < 3:
2103
+ categories[category]['examples'].append({
2104
+ 'hash': commit.get('hash', ''),
2105
+ 'message': commit.get('message', '')[:200],
2106
+ 'author': self._anonymize_value(commit.get('author', 'Unknown'), 'name'),
2107
+ 'timestamp': commit.get('timestamp'),
2108
+ 'lines_changed': commit.get('lines_changed', 0),
2109
+ 'files_changed': commit.get('files_changed', 0)
2110
+ })
2111
+
2112
+ # Contributor analysis
2113
+ author_id = commit.get('canonical_id', commit.get('author_email', 'Unknown'))
2114
+ author_name = self._anonymize_value(commit.get('author', 'Unknown'), 'name')
2115
+
2116
+ if author_id not in contributors:
2117
+ contributors[author_id] = {
2118
+ 'name': author_name,
2119
+ 'count': 0,
2120
+ 'lines_changed': 0,
2121
+ 'categories': set(),
2122
+ 'projects': set(),
2123
+ 'recent_commits': []
2124
+ }
2125
+
2126
+ contributors[author_id]['count'] += 1
2127
+ contributors[author_id]['lines_changed'] += commit.get('lines_changed', 0)
2128
+ contributors[author_id]['categories'].add(category)
2129
+ contributors[author_id]['projects'].add(commit.get('project_key', 'UNKNOWN'))
2130
+
2131
+ if len(contributors[author_id]['recent_commits']) < 5:
2132
+ contributors[author_id]['recent_commits'].append({
2133
+ 'hash': commit.get('hash', ''),
2134
+ 'message': commit.get('message', '')[:100],
2135
+ 'category': category,
2136
+ 'timestamp': commit.get('timestamp'),
2137
+ 'lines_changed': commit.get('lines_changed', 0)
2138
+ })
2139
+
2140
+ # Project analysis
2141
+ project = commit.get('project_key', 'UNKNOWN')
2142
+ if project not in projects:
2143
+ projects[project] = {
2144
+ 'count': 0,
2145
+ 'lines_changed': 0,
2146
+ 'categories': set(),
2147
+ 'contributors': set(),
2148
+ 'avg_commit_size': 0
2149
+ }
2150
+
2151
+ projects[project]['count'] += 1
2152
+ projects[project]['lines_changed'] += commit.get('lines_changed', 0)
2153
+ projects[project]['categories'].add(category)
2154
+ projects[project]['contributors'].add(author_id)
2155
+
2156
+ # Monthly trend analysis
2157
+ timestamp = commit.get('timestamp')
2158
+ if timestamp and hasattr(timestamp, 'strftime'):
2159
+ month_key = timestamp.strftime('%Y-%m')
2160
+ if month_key not in monthly_trends:
2161
+ monthly_trends[month_key] = {
2162
+ 'count': 0,
2163
+ 'categories': {},
2164
+ 'contributors': set()
2165
+ }
2166
+ monthly_trends[month_key]['count'] += 1
2167
+ monthly_trends[month_key]['contributors'].add(author_id)
2168
+
2169
+ if category not in monthly_trends[month_key]['categories']:
2170
+ monthly_trends[month_key]['categories'][category] = 0
2171
+ monthly_trends[month_key]['categories'][category] += 1
2172
+
2173
+ # Convert sets to lists and calculate derived metrics
2174
+ for category_data in categories.values():
2175
+ category_data['authors'] = len(category_data['authors'])
2176
+ category_data['avg_lines_per_commit'] = (
2177
+ category_data['lines_changed'] / category_data['count']
2178
+ if category_data['count'] > 0 else 0
2179
+ )
2180
+
2181
+ for contributor_data in contributors.values():
2182
+ contributor_data['categories'] = list(contributor_data['categories'])
2183
+ contributor_data['projects'] = list(contributor_data['projects'])
2184
+ contributor_data['avg_lines_per_commit'] = (
2185
+ contributor_data['lines_changed'] / contributor_data['count']
2186
+ if contributor_data['count'] > 0 else 0
2187
+ )
2188
+
2189
+ for project_data in projects.values():
2190
+ project_data['categories'] = list(project_data['categories'])
2191
+ project_data['contributors'] = len(project_data['contributors'])
2192
+ project_data['avg_commit_size'] = (
2193
+ project_data['lines_changed'] / project_data['count']
2194
+ if project_data['count'] > 0 else 0
2195
+ )
2196
+
2197
+ # Convert sets to counts in trends
2198
+ for trend_data in monthly_trends.values():
2199
+ trend_data['contributors'] = len(trend_data['contributors'])
2200
+
2201
+ # Generate insights and recommendations
2202
+ insights = self._generate_untracked_insights(categories, contributors, projects, total_untracked, total_commits)
2203
+ recommendations = self._generate_untracked_recommendations_json(categories, contributors, total_untracked, total_commits)
2204
+
2205
+ # Calculate quality scores
2206
+ quality_scores = self._calculate_untracked_quality_scores(categories, total_untracked, total_commits)
2207
+
2208
+ return {
2209
+ "summary": {
2210
+ "total_untracked": total_untracked,
2211
+ "total_commits": total_commits,
2212
+ "untracked_percentage": round((total_untracked / total_commits * 100), 2) if total_commits > 0 else 0,
2213
+ "avg_lines_per_untracked_commit": round(
2214
+ sum(commit.get('lines_changed', 0) for commit in untracked_commits) / total_untracked, 1
2215
+ ) if total_untracked > 0 else 0,
2216
+ "analysis_status": "complete"
2217
+ },
2218
+ "categories": categories,
2219
+ "contributors": contributors,
2220
+ "projects": projects,
2221
+ "monthly_trends": monthly_trends,
2222
+ "insights": insights,
2223
+ "recommendations": recommendations,
2224
+ "quality_scores": quality_scores
2225
+ }
2226
+
2227
+ def _generate_untracked_insights(
2228
+ self,
2229
+ categories: Dict[str, Any],
2230
+ contributors: Dict[str, Any],
2231
+ projects: Dict[str, Any],
2232
+ total_untracked: int,
2233
+ total_commits: int
2234
+ ) -> List[Dict[str, Any]]:
2235
+ """Generate insights from untracked commit analysis."""
2236
+ insights = []
2237
+
2238
+ # Category insights
2239
+ if categories:
2240
+ top_category = max(categories.items(), key=lambda x: x[1]['count'])
2241
+ category_name, category_data = top_category
2242
+ category_pct = (category_data['count'] / total_untracked * 100)
2243
+
2244
+ if category_name in ['feature', 'bug_fix']:
2245
+ insights.append({
2246
+ 'type': 'concern',
2247
+ 'category': 'process',
2248
+ 'title': f'High {category_name.replace("_", " ").title()} Untracked Rate',
2249
+ 'description': f'{category_pct:.1f}% of untracked work is {category_name.replace("_", " ")} development',
2250
+ 'impact': 'high',
2251
+ 'value': category_pct
2252
+ })
2253
+ elif category_name in ['maintenance', 'style', 'documentation']:
2254
+ insights.append({
2255
+ 'type': 'positive',
2256
+ 'category': 'process',
2257
+ 'title': f'Appropriate Untracked Work',
2258
+ 'description': f'{category_pct:.1f}% of untracked work is {category_name} - this is acceptable',
2259
+ 'impact': 'low',
2260
+ 'value': category_pct
2261
+ })
2262
+
2263
+ # Contributor concentration insights
2264
+ if len(contributors) > 1:
2265
+ contributor_counts = [data['count'] for data in contributors.values()]
2266
+ max_contributor_count = max(contributor_counts)
2267
+ contributor_concentration = (max_contributor_count / total_untracked * 100)
2268
+
2269
+ if contributor_concentration > 50:
2270
+ insights.append({
2271
+ 'type': 'concern',
2272
+ 'category': 'team',
2273
+ 'title': 'Concentrated Untracked Work',
2274
+ 'description': f'One developer accounts for {contributor_concentration:.1f}% of untracked commits',
2275
+ 'impact': 'medium',
2276
+ 'value': contributor_concentration
2277
+ })
2278
+
2279
+ # Overall coverage insight
2280
+ untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
2281
+ if untracked_pct > 40:
2282
+ insights.append({
2283
+ 'type': 'concern',
2284
+ 'category': 'coverage',
2285
+ 'title': 'High Untracked Rate',
2286
+ 'description': f'{untracked_pct:.1f}% of all commits lack ticket references',
2287
+ 'impact': 'high',
2288
+ 'value': untracked_pct
2289
+ })
2290
+ elif untracked_pct < 15:
2291
+ insights.append({
2292
+ 'type': 'positive',
2293
+ 'category': 'coverage',
2294
+ 'title': 'Excellent Tracking Coverage',
2295
+ 'description': f'Only {untracked_pct:.1f}% of commits are untracked',
2296
+ 'impact': 'low',
2297
+ 'value': untracked_pct
2298
+ })
2299
+
2300
+ return insights
2301
+
2302
+ def _generate_untracked_recommendations_json(
2303
+ self,
2304
+ categories: Dict[str, Any],
2305
+ contributors: Dict[str, Any],
2306
+ total_untracked: int,
2307
+ total_commits: int
2308
+ ) -> List[Dict[str, Any]]:
2309
+ """Generate JSON-formatted recommendations for untracked work."""
2310
+ recommendations = []
2311
+
2312
+ # Category-based recommendations
2313
+ feature_count = categories.get('feature', {}).get('count', 0)
2314
+ bug_fix_count = categories.get('bug_fix', {}).get('count', 0)
2315
+
2316
+ if feature_count > total_untracked * 0.25:
2317
+ recommendations.append({
2318
+ 'type': 'process_improvement',
2319
+ 'priority': 'high',
2320
+ 'title': 'Enforce Feature Ticket Requirements',
2321
+ 'description': 'Many feature developments lack ticket references',
2322
+ 'action': 'Require ticket creation and referencing for all new features',
2323
+ 'expected_impact': 'Improved project visibility and planning',
2324
+ 'effort': 'low'
2325
+ })
2326
+
2327
+ if bug_fix_count > total_untracked * 0.20:
2328
+ recommendations.append({
2329
+ 'type': 'process_improvement',
2330
+ 'priority': 'high',
2331
+ 'title': 'Link Bug Fixes to Issues',
2332
+ 'description': 'Bug fixes should be tracked through issue management',
2333
+ 'action': 'Create issues for bugs and reference them in fix commits',
2334
+ 'expected_impact': 'Better bug tracking and resolution visibility',
2335
+ 'effort': 'low'
2336
+ })
2337
+
2338
+ # Coverage-based recommendations
2339
+ untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
2340
+ if untracked_pct > 40:
2341
+ recommendations.append({
2342
+ 'type': 'team_training',
2343
+ 'priority': 'medium',
2344
+ 'title': 'Team Process Training',
2345
+ 'description': 'High percentage of untracked commits indicates process gaps',
2346
+ 'action': 'Provide training on ticket referencing and commit best practices',
2347
+ 'expected_impact': 'Improved process adherence and visibility',
2348
+ 'effort': 'medium'
2349
+ })
2350
+
2351
+ # Developer-specific recommendations
2352
+ if len(contributors) > 1:
2353
+ max_contributor_pct = max(
2354
+ (data['count'] / total_untracked * 100) for data in contributors.values()
2355
+ )
2356
+ if max_contributor_pct > 40:
2357
+ recommendations.append({
2358
+ 'type': 'individual_coaching',
2359
+ 'priority': 'medium',
2360
+ 'title': 'Targeted Developer Coaching',
2361
+ 'description': 'Some developers need additional guidance on process',
2362
+ 'action': 'Provide one-on-one coaching for developers with high untracked rates',
2363
+ 'expected_impact': 'More consistent process adherence across the team',
2364
+ 'effort': 'low'
2365
+ })
2366
+
2367
+ return recommendations
2368
+
2369
+ def _calculate_untracked_quality_scores(
2370
+ self,
2371
+ categories: Dict[str, Any],
2372
+ total_untracked: int,
2373
+ total_commits: int
2374
+ ) -> Dict[str, Any]:
2375
+ """Calculate quality scores for untracked work patterns."""
2376
+ scores = {}
2377
+
2378
+ # Process adherence score (lower untracked % = higher score)
2379
+ untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
2380
+ process_score = max(0, 100 - untracked_pct * 2) # Scale so 50% untracked = 0 score
2381
+ scores['process_adherence'] = round(min(100, process_score), 1)
2382
+
2383
+ # Appropriate untracked score (higher % of maintenance/docs/style = higher score)
2384
+ appropriate_categories = ['maintenance', 'documentation', 'style', 'test']
2385
+ appropriate_count = sum(
2386
+ categories.get(cat, {}).get('count', 0) for cat in appropriate_categories
2387
+ )
2388
+ appropriate_pct = (appropriate_count / total_untracked * 100) if total_untracked > 0 else 0
2389
+ scores['appropriate_untracked'] = round(appropriate_pct, 1)
2390
+
2391
+ # Work type balance score
2392
+ if categories:
2393
+ category_counts = [data['count'] for data in categories.values()]
2394
+ # Calculate distribution balance (lower Gini = more balanced)
2395
+ gini = self._calculate_gini_coefficient(category_counts)
2396
+ balance_score = max(0, 100 - (gini * 100))
2397
+ scores['work_type_balance'] = round(balance_score, 1)
2398
+ else:
2399
+ scores['work_type_balance'] = 100
2400
+
2401
+ # Overall untracked quality score
2402
+ overall_score = (
2403
+ scores['process_adherence'] * 0.5 +
2404
+ scores['appropriate_untracked'] * 0.3 +
2405
+ scores['work_type_balance'] * 0.2
2406
+ )
2407
+ scores['overall'] = round(overall_score, 1)
2408
+
2409
+ # Quality rating
2410
+ if overall_score >= 80:
2411
+ rating = 'excellent'
2412
+ elif overall_score >= 60:
2413
+ rating = 'good'
2414
+ elif overall_score >= 40:
2415
+ rating = 'fair'
2416
+ else:
2417
+ rating = 'needs_improvement'
2418
+
2419
+ scores['rating'] = rating
2420
+
2421
+ return scores
2422
+
2423
+ def _generate_actionable_recommendations(self, insights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
2424
+ """Generate actionable recommendations from insights."""
2425
+
2426
+ recommendations = []
2427
+
2428
+ # Extract recommendations from insights
2429
+ for insight in insights:
2430
+ if 'recommendation' in insight and insight.get('type') == 'concern':
2431
+ recommendations.append({
2432
+ "title": insight['title'],
2433
+ "action": insight['recommendation'],
2434
+ "priority": insight.get('priority', 'medium'),
2435
+ "category": insight.get('category', 'general'),
2436
+ "expected_impact": self._estimate_recommendation_impact(insight)
2437
+ })
2438
+
2439
+ # Add general recommendations based on patterns
2440
+ self._add_general_recommendations(recommendations, insights)
2441
+
2442
+ return recommendations[:5] # Return top 5 recommendations
2443
+
2444
+ def _estimate_recommendation_impact(self, insight: Dict[str, Any]) -> str:
2445
+ """Estimate the impact of implementing a recommendation."""
2446
+
2447
+ category = insight.get('category', '')
2448
+ priority = insight.get('priority', 'medium')
2449
+
2450
+ if priority == 'high':
2451
+ return 'high'
2452
+ elif category in ['team', 'productivity']:
2453
+ return 'medium'
2454
+ else:
2455
+ return 'low'
2456
+
2457
+ def _add_general_recommendations(
2458
+ self,
2459
+ recommendations: List[Dict[str, Any]],
2460
+ insights: List[Dict[str, Any]]
2461
+ ) -> None:
2462
+ """Add general recommendations based on insight patterns."""
2463
+
2464
+ # Check for lack of ticket coverage insights
2465
+ ticket_insights = [i for i in insights if 'ticket' in i.get('description', '').lower()]
2466
+ if not ticket_insights:
2467
+ recommendations.append({
2468
+ "title": "Improve Development Process Tracking",
2469
+ "action": "Implement consistent ticket referencing in commits and PRs",
2470
+ "priority": "medium",
2471
+ "category": "process",
2472
+ "expected_impact": "medium"
2473
+ })
2474
+
2475
+ def _calculate_simple_trend(self, values: List[float]) -> str:
2476
+ """Calculate simple trend direction from a list of values."""
2477
+
2478
+ if len(values) < 2:
2479
+ return "stable"
2480
+
2481
+ # Compare first half vs second half
2482
+ midpoint = len(values) // 2
2483
+ first_half = values[:midpoint]
2484
+ second_half = values[midpoint:]
2485
+
2486
+ first_avg = statistics.mean(first_half)
2487
+ second_avg = statistics.mean(second_half)
2488
+
2489
+ if first_avg == 0:
2490
+ return "stable"
2491
+
2492
+ change_pct = ((second_avg - first_avg) / first_avg) * 100
2493
+
2494
+ if abs(change_pct) < 10:
2495
+ return "stable"
2496
+ elif change_pct > 0:
2497
+ return "increasing"
2498
+ else:
2499
+ return "decreasing"
2500
+
2501
+ def _get_weekly_commit_counts(self, commits: List[Dict[str, Any]]) -> List[int]:
2502
+ """Get commit counts grouped by week."""
2503
+
2504
+ if not commits:
2505
+ return []
2506
+
2507
+ # Group commits by week
2508
+ weekly_counts = defaultdict(int)
2509
+
2510
+ for commit in commits:
2511
+ week_start = self._get_week_start(commit['timestamp'])
2512
+ week_key = week_start.strftime('%Y-%m-%d')
2513
+ weekly_counts[week_key] += 1
2514
+
2515
+ # Return counts in chronological order
2516
+ sorted_weeks = sorted(weekly_counts.keys())
2517
+ return [weekly_counts[week] for week in sorted_weeks]
2518
+
2519
+ def _get_daily_commit_counts(self, commits: List[Dict[str, Any]]) -> List[int]:
2520
+ """Get commit counts grouped by day."""
2521
+
2522
+ if not commits:
2523
+ return []
2524
+
2525
+ # Group commits by day
2526
+ daily_counts = defaultdict(int)
2527
+
2528
+ for commit in commits:
2529
+ day_key = commit['timestamp'].strftime('%Y-%m-%d')
2530
+ daily_counts[day_key] += 1
2531
+
2532
+ # Return counts in chronological order
2533
+ sorted_days = sorted(daily_counts.keys())
2534
+ return [daily_counts[day] for day in sorted_days]
2535
+
2536
+ def _calculate_weekly_commits(self, commits: List[Dict[str, Any]]) -> float:
2537
+ """Calculate average commits per week."""
2538
+
2539
+ weekly_counts = self._get_weekly_commit_counts(commits)
2540
+ if not weekly_counts:
2541
+ return 0
2542
+
2543
+ return round(statistics.mean(weekly_counts), 1)
2544
+
2545
+ def _find_peak_activity_day(self, commits: List[Dict[str, Any]]) -> str:
2546
+ """Find the day of week with most commits."""
2547
+
2548
+ if not commits:
2549
+ return "Unknown"
2550
+
2551
+ day_counts = defaultdict(int)
2552
+
2553
+ for commit in commits:
2554
+ if hasattr(commit['timestamp'], 'weekday'):
2555
+ day_index = commit['timestamp'].weekday()
2556
+ day_counts[day_index] += 1
2557
+
2558
+ if not day_counts:
2559
+ return "Unknown"
2560
+
2561
+ peak_day_index = max(day_counts, key=day_counts.get)
2562
+ return self._get_day_name(peak_day_index)
2563
+
2564
+ def _analyze_commit_size_distribution(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
2565
+ """Analyze distribution of commit sizes."""
2566
+
2567
+ if not commits:
2568
+ return {}
2569
+
2570
+ sizes = []
2571
+ for commit in commits:
2572
+ lines = (
2573
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
2574
+ commit.get('filtered_deletions', commit.get('deletions', 0))
2575
+ )
2576
+ sizes.append(lines)
2577
+
2578
+ if not sizes:
2579
+ return {}
2580
+
2581
+ return {
2582
+ 'mean': round(statistics.mean(sizes), 1),
2583
+ 'median': round(statistics.median(sizes), 1),
2584
+ 'std_dev': round(statistics.pstdev(sizes), 1) if len(sizes) > 1 else 0,
2585
+ 'min': min(sizes),
2586
+ 'max': max(sizes),
2587
+ 'small_commits': sum(1 for s in sizes if s < 50), # < 50 lines
2588
+ 'medium_commits': sum(1 for s in sizes if 50 <= s <= 200), # 50-200 lines
2589
+ 'large_commits': sum(1 for s in sizes if s > 200) # > 200 lines
2590
+ }
2591
+
2592
+ def _get_week_start(self, date: datetime) -> datetime:
2593
+ """Get Monday of the week for a given date."""
2594
+
2595
+ # Ensure timezone consistency
2596
+ if hasattr(date, 'tzinfo') and date.tzinfo is not None:
2597
+ if date.tzinfo != timezone.utc:
2598
+ date = date.astimezone(timezone.utc)
2599
+ else:
2600
+ date = date.replace(tzinfo=timezone.utc)
2601
+
2602
+ days_since_monday = date.weekday()
2603
+ monday = date - timedelta(days=days_since_monday)
2604
+ return monday.replace(hour=0, minute=0, second=0, microsecond=0)
2605
+
2606
+ def _calculate_gini_coefficient(self, values: List[float]) -> float:
2607
+ """Calculate Gini coefficient for measuring inequality."""
2608
+
2609
+ if not values or len(values) == 1:
2610
+ return 0.0
2611
+
2612
+ sorted_values = sorted(values)
2613
+ n = len(values)
2614
+ cumsum = np.cumsum(sorted_values)
2615
+
2616
+ return (2 * np.sum((i + 1) * sorted_values[i] for i in range(n))) / (n * cumsum[-1]) - (n + 1) / n
2617
+
2618
+ def _anonymize_value(self, value: str, field_type: str) -> str:
2619
+ """Anonymize a value if anonymization is enabled."""
2620
+
2621
+ if not self.anonymize or not value:
2622
+ return value
2623
+
2624
+ if field_type == 'email' and '@' in value:
2625
+ # Keep domain for email
2626
+ local, domain = value.split('@', 1)
2627
+ value = local # Anonymize only local part
2628
+ suffix = f"@{domain}"
2629
+ else:
2630
+ suffix = ""
2631
+
2632
+ if value not in self._anonymization_map:
2633
+ self._anonymous_counter += 1
2634
+ if field_type == 'name':
2635
+ anonymous = f"Developer{self._anonymous_counter}"
2636
+ elif field_type == 'email':
2637
+ anonymous = f"dev{self._anonymous_counter}"
2638
+ elif field_type == 'id':
2639
+ anonymous = f"ID{self._anonymous_counter:04d}"
2640
+ elif field_type == 'username':
2641
+ anonymous = f"user{self._anonymous_counter}"
2642
+ else:
2643
+ anonymous = f"anon{self._anonymous_counter}"
2644
+
2645
+ self._anonymization_map[value] = anonymous
2646
+
2647
+ return self._anonymization_map[value] + suffix
2648
+
2649
+ def _serialize_for_json(self, data: Any) -> Any:
2650
+ """Serialize data for JSON output, handling datetime objects."""
2651
+
2652
+ if isinstance(data, datetime):
2653
+ return data.isoformat()
2654
+ elif isinstance(data, dict):
2655
+ return {k: self._serialize_for_json(v) for k, v in data.items()}
2656
+ elif isinstance(data, list):
2657
+ return [self._serialize_for_json(item) for item in data]
2658
+ elif isinstance(data, set):
2659
+ return list(data) # Convert sets to lists
2660
+ elif isinstance(data, (np.integer, np.floating)):
2661
+ return float(data) # Convert numpy types to Python types
2662
+ else:
2663
+ return data
2664
+
2665
+ # Implementation of abstract methods from BaseReportGenerator
2666
+
2667
+ def generate(self, data: ReportData, output_path: Optional[Path] = None) -> ReportOutput:
2668
+ """Generate comprehensive JSON export from standardized data.
2669
+
2670
+ Args:
2671
+ data: Standardized report data
2672
+ output_path: Optional path to write the JSON to
2673
+
2674
+ Returns:
2675
+ ReportOutput containing the results
2676
+ """
2677
+ try:
2678
+ # Validate data
2679
+ if not self.validate_data(data):
2680
+ return ReportOutput(
2681
+ success=False,
2682
+ errors=["Invalid or incomplete data provided"]
2683
+ )
2684
+
2685
+ # Pre-process data
2686
+ data = self.pre_process(data)
2687
+
2688
+ # Use the main export method with ReportData fields
2689
+ if output_path:
2690
+ self.export_comprehensive_data(
2691
+ commits=data.commits or [],
2692
+ prs=data.pull_requests or [],
2693
+ developer_stats=data.developer_stats or [],
2694
+ project_metrics=data.config.get("project_metrics", {}),
2695
+ dora_metrics=data.dora_metrics or {},
2696
+ output_path=output_path,
2697
+ weeks=data.metadata.analysis_period_weeks or 12,
2698
+ pm_data=data.pm_data,
2699
+ qualitative_data=data.qualitative_results,
2700
+ enhanced_qualitative_analysis=data.config.get("enhanced_qualitative_analysis")
2701
+ )
2702
+
2703
+ return ReportOutput(
2704
+ success=True,
2705
+ file_path=output_path,
2706
+ format=self.get_format_type(),
2707
+ size_bytes=output_path.stat().st_size if output_path.exists() else 0
2708
+ )
2709
+ else:
2710
+ # Generate in-memory JSON
2711
+ end_date = datetime.now(timezone.utc)
2712
+ start_date = end_date - timedelta(weeks=data.metadata.analysis_period_weeks or 12)
2713
+
2714
+ export_data = {
2715
+ "metadata": self._build_metadata(
2716
+ data.commits or [],
2717
+ data.pull_requests or [],
2718
+ data.developer_stats or [],
2719
+ start_date,
2720
+ end_date
2721
+ ),
2722
+ "executive_summary": self._build_executive_summary(
2723
+ data.commits or [],
2724
+ data.pull_requests or [],
2725
+ data.developer_stats or [],
2726
+ data.config.get("project_metrics", {}),
2727
+ data.dora_metrics or {}
2728
+ ),
2729
+ "raw_data": self._build_raw_data_summary(
2730
+ data.commits or [],
2731
+ data.pull_requests or [],
2732
+ data.developer_stats or [],
2733
+ data.dora_metrics or {}
2734
+ )
2735
+ }
2736
+
2737
+ serialized_data = self._serialize_for_json(export_data)
2738
+ json_content = json.dumps(serialized_data, indent=2, ensure_ascii=False)
2739
+
2740
+ return ReportOutput(
2741
+ success=True,
2742
+ content=json_content,
2743
+ format=self.get_format_type(),
2744
+ size_bytes=len(json_content)
2745
+ )
2746
+
2747
+ except Exception as e:
2748
+ logger.error(f"Error generating comprehensive JSON export: {e}")
2749
+ return ReportOutput(
2750
+ success=False,
2751
+ errors=[str(e)]
2752
+ )
2753
+
2754
+ def get_required_fields(self) -> List[str]:
2755
+ """Get the list of required data fields for JSON export.
2756
+
2757
+ Returns:
2758
+ List of required field names
2759
+ """
2760
+ # Comprehensive JSON export can work with any combination of data
2761
+ # but works best with commits and developer_stats
2762
+ return [] # No strict requirements, flexible export
2763
+
2764
+ def get_format_type(self) -> str:
2765
+ """Get the format type this generator produces.
2766
+
2767
+ Returns:
2768
+ Format identifier
2769
+ """
2770
+ return ReportFormat.JSON.value