gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,2770 @@
|
|
|
1
|
+
"""Comprehensive JSON export system for GitFlow Analytics.
|
|
2
|
+
|
|
3
|
+
This module provides a comprehensive JSON export system that consolidates all report data
|
|
4
|
+
into a single structured JSON format optimized for web consumption and API integration.
|
|
5
|
+
|
|
6
|
+
WHY: Traditional CSV reports are excellent for analysis tools but lack the structure needed
|
|
7
|
+
for modern web applications and dashboards. This JSON exporter creates a self-contained,
|
|
8
|
+
hierarchical data structure that includes:
|
|
9
|
+
- Time series data for charts
|
|
10
|
+
- Cross-references between entities
|
|
11
|
+
- Anomaly detection and trend analysis
|
|
12
|
+
- Health scores and insights
|
|
13
|
+
- All existing report data in a unified format
|
|
14
|
+
|
|
15
|
+
DESIGN DECISIONS:
|
|
16
|
+
- Self-contained: All data needed for visualization is included
|
|
17
|
+
- Hierarchical: Supports drill-down from executive summary to detailed metrics
|
|
18
|
+
- Web-optimized: Compatible with common charting libraries (Chart.js, D3, etc.)
|
|
19
|
+
- Extensible: Easy to add new metrics and dimensions
|
|
20
|
+
- Consistent: Follows established patterns from existing report generators
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
import logging
|
|
25
|
+
import statistics
|
|
26
|
+
from collections import defaultdict
|
|
27
|
+
from datetime import datetime, timedelta, timezone
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
import pandas as pd
|
|
33
|
+
|
|
34
|
+
from .base import BaseReportGenerator, ReportData, ReportOutput
|
|
35
|
+
from .interfaces import ReportFormat
|
|
36
|
+
|
|
37
|
+
# Get logger for this module
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ComprehensiveJSONExporter(BaseReportGenerator):
|
|
42
|
+
"""Generate comprehensive JSON exports with advanced analytics and insights.
|
|
43
|
+
|
|
44
|
+
This exporter consolidates all GitFlow Analytics data into a single, structured
|
|
45
|
+
JSON format that's optimized for web consumption and includes:
|
|
46
|
+
|
|
47
|
+
- Executive summary with key metrics and trends
|
|
48
|
+
- Project-level data with health scores
|
|
49
|
+
- Developer profiles with contribution patterns
|
|
50
|
+
- Time series data for visualization
|
|
51
|
+
- Anomaly detection and alerting
|
|
52
|
+
- Cross-references between entities
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, anonymize: bool = False, **kwargs):
|
|
56
|
+
"""Initialize the comprehensive JSON exporter.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
anonymize: Whether to anonymize developer information
|
|
60
|
+
**kwargs: Additional arguments passed to base class
|
|
61
|
+
"""
|
|
62
|
+
super().__init__(anonymize=anonymize, **kwargs)
|
|
63
|
+
# Note: anonymization map and counter are now in base class
|
|
64
|
+
|
|
65
|
+
# Anomaly detection thresholds
|
|
66
|
+
self.anomaly_thresholds = {
|
|
67
|
+
'spike_multiplier': 2.0, # 2x normal activity = spike
|
|
68
|
+
'drop_threshold': 0.3, # 30% of normal activity = drop
|
|
69
|
+
'volatility_threshold': 1.5, # Standard deviation threshold
|
|
70
|
+
'trend_threshold': 0.2 # 20% change = significant trend
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# Health score weights
|
|
74
|
+
self.health_weights = {
|
|
75
|
+
'activity_consistency': 0.3,
|
|
76
|
+
'ticket_coverage': 0.25,
|
|
77
|
+
'collaboration': 0.2,
|
|
78
|
+
'code_quality': 0.15,
|
|
79
|
+
'velocity': 0.1
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
def export_comprehensive_data(
|
|
83
|
+
self,
|
|
84
|
+
commits: List[Dict[str, Any]],
|
|
85
|
+
prs: List[Dict[str, Any]],
|
|
86
|
+
developer_stats: List[Dict[str, Any]],
|
|
87
|
+
project_metrics: Dict[str, Any],
|
|
88
|
+
dora_metrics: Dict[str, Any],
|
|
89
|
+
output_path: Path,
|
|
90
|
+
weeks: int = 12,
|
|
91
|
+
pm_data: Optional[Dict[str, Any]] = None,
|
|
92
|
+
qualitative_data: Optional[List[Dict[str, Any]]] = None,
|
|
93
|
+
enhanced_qualitative_analysis: Optional[Dict[str, Any]] = None
|
|
94
|
+
) -> Path:
|
|
95
|
+
"""Export comprehensive analytics data to JSON format.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
commits: List of commit data
|
|
99
|
+
prs: List of pull request data
|
|
100
|
+
developer_stats: Developer statistics
|
|
101
|
+
project_metrics: Project-level metrics
|
|
102
|
+
dora_metrics: DORA metrics data
|
|
103
|
+
output_path: Path to write JSON file
|
|
104
|
+
weeks: Number of weeks analyzed
|
|
105
|
+
pm_data: PM platform integration data
|
|
106
|
+
qualitative_data: Qualitative analysis results
|
|
107
|
+
enhanced_qualitative_analysis: Enhanced multi-dimensional qualitative analysis
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Path to the generated JSON file
|
|
111
|
+
"""
|
|
112
|
+
logger.info(f"Starting comprehensive JSON export with {len(commits)} commits")
|
|
113
|
+
|
|
114
|
+
# Calculate analysis period
|
|
115
|
+
end_date = datetime.now(timezone.utc)
|
|
116
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
117
|
+
|
|
118
|
+
# Build comprehensive data structure
|
|
119
|
+
export_data = {
|
|
120
|
+
"metadata": self._build_metadata(commits, prs, developer_stats, start_date, end_date),
|
|
121
|
+
"executive_summary": self._build_executive_summary(commits, prs, developer_stats, project_metrics, dora_metrics),
|
|
122
|
+
"projects": self._build_project_data(commits, prs, developer_stats, project_metrics),
|
|
123
|
+
"developers": self._build_developer_profiles(commits, developer_stats),
|
|
124
|
+
"workflow_analysis": self._build_workflow_analysis(commits, prs, project_metrics, pm_data),
|
|
125
|
+
"time_series": self._build_time_series_data(commits, prs, weeks),
|
|
126
|
+
"insights": self._build_insights_data(commits, developer_stats, qualitative_data),
|
|
127
|
+
"untracked_analysis": self._build_untracked_analysis(commits, project_metrics),
|
|
128
|
+
"raw_data": self._build_raw_data_summary(commits, prs, developer_stats, dora_metrics)
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Add enhanced qualitative analysis if available
|
|
132
|
+
if enhanced_qualitative_analysis:
|
|
133
|
+
export_data["enhanced_qualitative_analysis"] = enhanced_qualitative_analysis
|
|
134
|
+
|
|
135
|
+
# Add PM platform data if available
|
|
136
|
+
if pm_data:
|
|
137
|
+
export_data["pm_integration"] = self._build_pm_integration_data(pm_data)
|
|
138
|
+
|
|
139
|
+
# Serialize and write JSON
|
|
140
|
+
serialized_data = self._serialize_for_json(export_data)
|
|
141
|
+
|
|
142
|
+
with open(output_path, 'w') as f:
|
|
143
|
+
json.dump(serialized_data, f, indent=2, ensure_ascii=False)
|
|
144
|
+
|
|
145
|
+
logger.info(f"Comprehensive JSON export written to {output_path}")
|
|
146
|
+
return output_path
|
|
147
|
+
|
|
148
|
+
def create_enhanced_qualitative_analysis(
|
|
149
|
+
self,
|
|
150
|
+
commits: List[Dict[str, Any]],
|
|
151
|
+
qualitative_data: Optional[List[Any]] = None,
|
|
152
|
+
developer_stats: Optional[List[Dict[str, Any]]] = None,
|
|
153
|
+
project_metrics: Optional[Dict[str, Any]] = None,
|
|
154
|
+
pm_data: Optional[Dict[str, Any]] = None,
|
|
155
|
+
weeks_analyzed: int = 12
|
|
156
|
+
) -> Optional[Dict[str, Any]]:
|
|
157
|
+
"""Create enhanced qualitative analysis using the EnhancedQualitativeAnalyzer.
|
|
158
|
+
|
|
159
|
+
This method integrates with the enhanced analyzer to generate comprehensive
|
|
160
|
+
qualitative insights across executive, project, developer, and workflow dimensions.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
commits: List of commit data
|
|
164
|
+
qualitative_data: Optional qualitative commit analysis results
|
|
165
|
+
developer_stats: Optional developer statistics
|
|
166
|
+
project_metrics: Optional project-level metrics
|
|
167
|
+
pm_data: Optional PM platform integration data
|
|
168
|
+
weeks_analyzed: Number of weeks in analysis period
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Enhanced qualitative analysis results or None if analyzer unavailable
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
# Import here to avoid circular dependencies
|
|
175
|
+
from ..qualitative.enhanced_analyzer import EnhancedQualitativeAnalyzer
|
|
176
|
+
|
|
177
|
+
# Initialize analyzer
|
|
178
|
+
analyzer = EnhancedQualitativeAnalyzer()
|
|
179
|
+
|
|
180
|
+
# Perform comprehensive analysis
|
|
181
|
+
enhanced_analysis = analyzer.analyze_comprehensive(
|
|
182
|
+
commits=commits,
|
|
183
|
+
qualitative_data=qualitative_data,
|
|
184
|
+
developer_stats=developer_stats,
|
|
185
|
+
project_metrics=project_metrics,
|
|
186
|
+
pm_data=pm_data,
|
|
187
|
+
weeks_analyzed=weeks_analyzed
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
logger.info("Enhanced qualitative analysis completed successfully")
|
|
191
|
+
return enhanced_analysis
|
|
192
|
+
|
|
193
|
+
except ImportError as e:
|
|
194
|
+
logger.warning(f"Enhanced qualitative analyzer not available: {e}")
|
|
195
|
+
return None
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.error(f"Enhanced qualitative analysis failed: {e}")
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
def _build_metadata(
|
|
201
|
+
self,
|
|
202
|
+
commits: List[Dict[str, Any]],
|
|
203
|
+
prs: List[Dict[str, Any]],
|
|
204
|
+
developer_stats: List[Dict[str, Any]],
|
|
205
|
+
start_date: datetime,
|
|
206
|
+
end_date: datetime
|
|
207
|
+
) -> Dict[str, Any]:
|
|
208
|
+
"""Build metadata section with generation info and data summary."""
|
|
209
|
+
|
|
210
|
+
# Get unique repositories and projects
|
|
211
|
+
repositories = set()
|
|
212
|
+
projects = set()
|
|
213
|
+
|
|
214
|
+
for commit in commits:
|
|
215
|
+
if commit.get('repository'):
|
|
216
|
+
repositories.add(commit['repository'])
|
|
217
|
+
if commit.get('project_key'):
|
|
218
|
+
projects.add(commit['project_key'])
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
222
|
+
"format_version": "2.0.0",
|
|
223
|
+
"generator": "GitFlow Analytics Comprehensive JSON Exporter",
|
|
224
|
+
"analysis_period": {
|
|
225
|
+
"start_date": start_date.isoformat(),
|
|
226
|
+
"end_date": end_date.isoformat(),
|
|
227
|
+
"weeks_analyzed": (end_date - start_date).days // 7,
|
|
228
|
+
"total_days": (end_date - start_date).days
|
|
229
|
+
},
|
|
230
|
+
"data_summary": {
|
|
231
|
+
"total_commits": len(commits),
|
|
232
|
+
"total_prs": len(prs),
|
|
233
|
+
"total_developers": len(developer_stats),
|
|
234
|
+
"repositories_analyzed": len(repositories),
|
|
235
|
+
"projects_identified": len(projects),
|
|
236
|
+
"repositories": sorted(list(repositories)),
|
|
237
|
+
"projects": sorted(list(projects))
|
|
238
|
+
},
|
|
239
|
+
"export_settings": {
|
|
240
|
+
"anonymized": self.anonymize,
|
|
241
|
+
"timezone": "UTC"
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
def _build_executive_summary(
|
|
246
|
+
self,
|
|
247
|
+
commits: List[Dict[str, Any]],
|
|
248
|
+
prs: List[Dict[str, Any]],
|
|
249
|
+
developer_stats: List[Dict[str, Any]],
|
|
250
|
+
project_metrics: Dict[str, Any],
|
|
251
|
+
dora_metrics: Dict[str, Any]
|
|
252
|
+
) -> Dict[str, Any]:
|
|
253
|
+
"""Build executive summary with key metrics, trends, and insights."""
|
|
254
|
+
|
|
255
|
+
# Core metrics
|
|
256
|
+
total_commits = len(commits)
|
|
257
|
+
total_prs = len(prs)
|
|
258
|
+
total_developers = len(developer_stats)
|
|
259
|
+
|
|
260
|
+
# Calculate lines changed
|
|
261
|
+
total_lines = sum(
|
|
262
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
263
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
264
|
+
for commit in commits
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Story points
|
|
268
|
+
total_story_points = sum(
|
|
269
|
+
commit.get('story_points', 0) or 0 for commit in commits
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Ticket coverage
|
|
273
|
+
ticket_analysis = project_metrics.get('ticket_analysis', {})
|
|
274
|
+
ticket_coverage = ticket_analysis.get('commit_coverage_pct', 0)
|
|
275
|
+
|
|
276
|
+
# Calculate trends (compare first half vs second half)
|
|
277
|
+
trends = self._calculate_executive_trends(commits, prs)
|
|
278
|
+
|
|
279
|
+
# Detect anomalies
|
|
280
|
+
anomalies = self._detect_executive_anomalies(commits, developer_stats)
|
|
281
|
+
|
|
282
|
+
# Identify wins and concerns
|
|
283
|
+
wins, concerns = self._identify_wins_and_concerns(
|
|
284
|
+
commits, developer_stats, project_metrics, dora_metrics
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
"key_metrics": {
|
|
289
|
+
"commits": {
|
|
290
|
+
"total": total_commits,
|
|
291
|
+
"trend_percent": trends.get('commits_trend', 0),
|
|
292
|
+
"trend_direction": self._get_trend_direction(trends.get('commits_trend', 0))
|
|
293
|
+
},
|
|
294
|
+
"lines_changed": {
|
|
295
|
+
"total": total_lines,
|
|
296
|
+
"trend_percent": trends.get('lines_trend', 0),
|
|
297
|
+
"trend_direction": self._get_trend_direction(trends.get('lines_trend', 0))
|
|
298
|
+
},
|
|
299
|
+
"story_points": {
|
|
300
|
+
"total": total_story_points,
|
|
301
|
+
"trend_percent": trends.get('story_points_trend', 0),
|
|
302
|
+
"trend_direction": self._get_trend_direction(trends.get('story_points_trend', 0))
|
|
303
|
+
},
|
|
304
|
+
"developers": {
|
|
305
|
+
"total": total_developers,
|
|
306
|
+
"active_percentage": self._calculate_active_developer_percentage(developer_stats)
|
|
307
|
+
},
|
|
308
|
+
"pull_requests": {
|
|
309
|
+
"total": total_prs,
|
|
310
|
+
"trend_percent": trends.get('prs_trend', 0),
|
|
311
|
+
"trend_direction": self._get_trend_direction(trends.get('prs_trend', 0))
|
|
312
|
+
},
|
|
313
|
+
"ticket_coverage": {
|
|
314
|
+
"percentage": round(ticket_coverage, 1),
|
|
315
|
+
"quality_rating": self._get_coverage_quality_rating(ticket_coverage)
|
|
316
|
+
}
|
|
317
|
+
},
|
|
318
|
+
"performance_indicators": {
|
|
319
|
+
"velocity": {
|
|
320
|
+
"commits_per_week": round(total_commits / max((len(set(self._get_week_start(c['timestamp']) for c in commits))), 1), 1),
|
|
321
|
+
"story_points_per_week": round(total_story_points / max((len(set(self._get_week_start(c['timestamp']) for c in commits))), 1), 1)
|
|
322
|
+
},
|
|
323
|
+
"quality": {
|
|
324
|
+
"avg_commit_size": round(total_lines / max(total_commits, 1), 1),
|
|
325
|
+
"ticket_coverage_pct": round(ticket_coverage, 1)
|
|
326
|
+
},
|
|
327
|
+
"collaboration": {
|
|
328
|
+
"developers_per_project": self._calculate_avg_developers_per_project(commits),
|
|
329
|
+
"cross_project_contributors": self._count_cross_project_contributors(commits, developer_stats)
|
|
330
|
+
}
|
|
331
|
+
},
|
|
332
|
+
"trends": trends,
|
|
333
|
+
"anomalies": anomalies,
|
|
334
|
+
"wins": wins,
|
|
335
|
+
"concerns": concerns,
|
|
336
|
+
"health_score": self._calculate_overall_health_score(commits, developer_stats, project_metrics, dora_metrics)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
def _build_project_data(
|
|
340
|
+
self,
|
|
341
|
+
commits: List[Dict[str, Any]],
|
|
342
|
+
prs: List[Dict[str, Any]],
|
|
343
|
+
developer_stats: List[Dict[str, Any]],
|
|
344
|
+
project_metrics: Dict[str, Any]
|
|
345
|
+
) -> Dict[str, Any]:
|
|
346
|
+
"""Build project-level data with health scores and contributor details."""
|
|
347
|
+
|
|
348
|
+
# Group data by project
|
|
349
|
+
project_data = defaultdict(lambda: {
|
|
350
|
+
'commits': [],
|
|
351
|
+
'prs': [],
|
|
352
|
+
'contributors': set(),
|
|
353
|
+
'lines_changed': 0,
|
|
354
|
+
'story_points': 0,
|
|
355
|
+
'files_changed': set()
|
|
356
|
+
})
|
|
357
|
+
|
|
358
|
+
# Process commits by project
|
|
359
|
+
for commit in commits:
|
|
360
|
+
project_key = commit.get('project_key', 'UNKNOWN')
|
|
361
|
+
project_data[project_key]['commits'].append(commit)
|
|
362
|
+
project_data[project_key]['contributors'].add(commit.get('canonical_id', commit.get('author_email')))
|
|
363
|
+
|
|
364
|
+
lines = (
|
|
365
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
366
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
367
|
+
)
|
|
368
|
+
project_data[project_key]['lines_changed'] += lines
|
|
369
|
+
project_data[project_key]['story_points'] += commit.get('story_points', 0) or 0
|
|
370
|
+
|
|
371
|
+
# Track files (simplified - just count)
|
|
372
|
+
files_changed = commit.get('filtered_files_changed', commit.get('files_changed', 0))
|
|
373
|
+
if files_changed:
|
|
374
|
+
# Add placeholder file references
|
|
375
|
+
for i in range(files_changed):
|
|
376
|
+
project_data[project_key]['files_changed'].add(f"file_{i}")
|
|
377
|
+
|
|
378
|
+
# Process PRs by project (if available)
|
|
379
|
+
for pr in prs:
|
|
380
|
+
# Try to determine project from PR data
|
|
381
|
+
project_key = pr.get('project_key', 'UNKNOWN')
|
|
382
|
+
project_data[project_key]['prs'].append(pr)
|
|
383
|
+
|
|
384
|
+
# Build structured project data
|
|
385
|
+
projects = {}
|
|
386
|
+
|
|
387
|
+
for project_key, data in project_data.items():
|
|
388
|
+
commits_list = data['commits']
|
|
389
|
+
contributors = data['contributors']
|
|
390
|
+
|
|
391
|
+
# Calculate project health score
|
|
392
|
+
health_score = self._calculate_project_health_score(commits_list, contributors)
|
|
393
|
+
|
|
394
|
+
# Get contributor details
|
|
395
|
+
contributor_details = self._get_project_contributor_details(commits_list, developer_stats)
|
|
396
|
+
|
|
397
|
+
# Calculate project trends
|
|
398
|
+
project_trends = self._calculate_project_trends(commits_list)
|
|
399
|
+
|
|
400
|
+
# Detect project anomalies
|
|
401
|
+
project_anomalies = self._detect_project_anomalies(commits_list)
|
|
402
|
+
|
|
403
|
+
projects[project_key] = {
|
|
404
|
+
"summary": {
|
|
405
|
+
"total_commits": len(commits_list),
|
|
406
|
+
"total_contributors": len(contributors),
|
|
407
|
+
"lines_changed": data['lines_changed'],
|
|
408
|
+
"story_points": data['story_points'],
|
|
409
|
+
"files_touched": len(data['files_changed']),
|
|
410
|
+
"pull_requests": len(data['prs'])
|
|
411
|
+
},
|
|
412
|
+
"health_score": health_score,
|
|
413
|
+
"contributors": contributor_details,
|
|
414
|
+
"activity_patterns": {
|
|
415
|
+
"commits_per_week": self._calculate_weekly_commits(commits_list),
|
|
416
|
+
"peak_activity_day": self._find_peak_activity_day(commits_list),
|
|
417
|
+
"commit_size_distribution": self._analyze_commit_size_distribution(commits_list)
|
|
418
|
+
},
|
|
419
|
+
"trends": project_trends,
|
|
420
|
+
"anomalies": project_anomalies,
|
|
421
|
+
"focus_metrics": {
|
|
422
|
+
"primary_contributors": self._identify_primary_contributors(commits_list, contributor_details),
|
|
423
|
+
"contribution_distribution": self._calculate_contribution_distribution(commits_list)
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
return projects
|
|
428
|
+
|
|
429
|
+
def _build_developer_profiles(
|
|
430
|
+
self,
|
|
431
|
+
commits: List[Dict[str, Any]],
|
|
432
|
+
developer_stats: List[Dict[str, Any]]
|
|
433
|
+
) -> Dict[str, Any]:
|
|
434
|
+
"""Build comprehensive developer profiles with contribution patterns."""
|
|
435
|
+
|
|
436
|
+
profiles = {}
|
|
437
|
+
|
|
438
|
+
for dev in developer_stats:
|
|
439
|
+
dev_id = dev['canonical_id']
|
|
440
|
+
dev_name = self._anonymize_value(dev['primary_name'], 'name')
|
|
441
|
+
|
|
442
|
+
# Get developer's commits
|
|
443
|
+
dev_commits = [c for c in commits if c.get('canonical_id') == dev_id]
|
|
444
|
+
|
|
445
|
+
# Calculate various metrics
|
|
446
|
+
projects_worked = self._get_developer_projects(dev_commits)
|
|
447
|
+
contribution_patterns = self._analyze_developer_contribution_patterns(dev_commits)
|
|
448
|
+
collaboration_metrics = self._calculate_developer_collaboration_metrics(dev_commits, developer_stats)
|
|
449
|
+
|
|
450
|
+
# Calculate developer health score
|
|
451
|
+
health_score = self._calculate_developer_health_score(dev_commits, dev)
|
|
452
|
+
|
|
453
|
+
# Identify achievements and areas for improvement
|
|
454
|
+
achievements = self._identify_developer_achievements(dev_commits, dev)
|
|
455
|
+
improvement_areas = self._identify_improvement_areas(dev_commits, dev)
|
|
456
|
+
|
|
457
|
+
profiles[dev_id] = {
|
|
458
|
+
"identity": {
|
|
459
|
+
"name": dev_name,
|
|
460
|
+
"canonical_id": dev_id,
|
|
461
|
+
"primary_email": self._anonymize_value(dev['primary_email'], 'email'),
|
|
462
|
+
"github_username": self._anonymize_value(dev.get('github_username', ''), 'username') if dev.get('github_username') else None,
|
|
463
|
+
"aliases_count": dev.get('alias_count', 1)
|
|
464
|
+
},
|
|
465
|
+
"summary": {
|
|
466
|
+
"total_commits": dev['total_commits'],
|
|
467
|
+
"total_story_points": dev['total_story_points'],
|
|
468
|
+
"projects_contributed": len(projects_worked),
|
|
469
|
+
"first_seen": dev.get('first_seen').isoformat() if dev.get('first_seen') else None,
|
|
470
|
+
"last_seen": dev.get('last_seen').isoformat() if dev.get('last_seen') else None,
|
|
471
|
+
"days_active": (dev.get('last_seen') - dev.get('first_seen')).days if dev.get('first_seen') and dev.get('last_seen') else 0
|
|
472
|
+
},
|
|
473
|
+
"health_score": health_score,
|
|
474
|
+
"projects": projects_worked,
|
|
475
|
+
"contribution_patterns": contribution_patterns,
|
|
476
|
+
"collaboration": collaboration_metrics,
|
|
477
|
+
"achievements": achievements,
|
|
478
|
+
"improvement_areas": improvement_areas,
|
|
479
|
+
"activity_timeline": self._build_developer_activity_timeline(dev_commits)
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
return profiles
|
|
483
|
+
|
|
484
|
+
def _build_workflow_analysis(
|
|
485
|
+
self,
|
|
486
|
+
commits: List[Dict[str, Any]],
|
|
487
|
+
prs: List[Dict[str, Any]],
|
|
488
|
+
project_metrics: Dict[str, Any],
|
|
489
|
+
pm_data: Optional[Dict[str, Any]]
|
|
490
|
+
) -> Dict[str, Any]:
|
|
491
|
+
"""Build workflow analysis including Git-PM correlation."""
|
|
492
|
+
|
|
493
|
+
# Analyze branching patterns
|
|
494
|
+
branching_analysis = self._analyze_branching_patterns(commits)
|
|
495
|
+
|
|
496
|
+
# Analyze commit patterns
|
|
497
|
+
commit_patterns = self._analyze_commit_timing_patterns(commits)
|
|
498
|
+
|
|
499
|
+
# Analyze PR workflow if available
|
|
500
|
+
pr_workflow = self._analyze_pr_workflow(prs) if prs else {}
|
|
501
|
+
|
|
502
|
+
# Git-PM correlation analysis
|
|
503
|
+
git_pm_correlation = {}
|
|
504
|
+
if pm_data:
|
|
505
|
+
git_pm_correlation = self._analyze_git_pm_correlation(commits, pm_data)
|
|
506
|
+
|
|
507
|
+
return {
|
|
508
|
+
"branching_strategy": branching_analysis,
|
|
509
|
+
"commit_patterns": commit_patterns,
|
|
510
|
+
"pr_workflow": pr_workflow,
|
|
511
|
+
"git_pm_correlation": git_pm_correlation,
|
|
512
|
+
"process_health": {
|
|
513
|
+
"ticket_linking_rate": project_metrics.get('ticket_analysis', {}).get('commit_coverage_pct', 0),
|
|
514
|
+
"merge_commit_rate": self._calculate_merge_commit_rate(commits),
|
|
515
|
+
"commit_message_quality": self._analyze_commit_message_quality(commits)
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
def _build_time_series_data(
|
|
520
|
+
self,
|
|
521
|
+
commits: List[Dict[str, Any]],
|
|
522
|
+
prs: List[Dict[str, Any]],
|
|
523
|
+
weeks: int
|
|
524
|
+
) -> Dict[str, Any]:
|
|
525
|
+
"""Build time series data optimized for charting libraries."""
|
|
526
|
+
|
|
527
|
+
# Calculate date range
|
|
528
|
+
end_date = datetime.now(timezone.utc)
|
|
529
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
530
|
+
|
|
531
|
+
# Generate weekly data points
|
|
532
|
+
weekly_data = self._generate_weekly_time_series(commits, prs, start_date, end_date)
|
|
533
|
+
daily_data = self._generate_daily_time_series(commits, prs, start_date, end_date)
|
|
534
|
+
|
|
535
|
+
return {
|
|
536
|
+
"weekly": {
|
|
537
|
+
"labels": [d["date"] for d in weekly_data],
|
|
538
|
+
"datasets": {
|
|
539
|
+
"commits": {
|
|
540
|
+
"label": "Commits",
|
|
541
|
+
"data": [d["commits"] for d in weekly_data],
|
|
542
|
+
"backgroundColor": "rgba(54, 162, 235, 0.2)",
|
|
543
|
+
"borderColor": "rgba(54, 162, 235, 1)"
|
|
544
|
+
},
|
|
545
|
+
"lines_changed": {
|
|
546
|
+
"label": "Lines Changed",
|
|
547
|
+
"data": [d["lines_changed"] for d in weekly_data],
|
|
548
|
+
"backgroundColor": "rgba(255, 99, 132, 0.2)",
|
|
549
|
+
"borderColor": "rgba(255, 99, 132, 1)"
|
|
550
|
+
},
|
|
551
|
+
"story_points": {
|
|
552
|
+
"label": "Story Points",
|
|
553
|
+
"data": [d["story_points"] for d in weekly_data],
|
|
554
|
+
"backgroundColor": "rgba(75, 192, 192, 0.2)",
|
|
555
|
+
"borderColor": "rgba(75, 192, 192, 1)"
|
|
556
|
+
},
|
|
557
|
+
"active_developers": {
|
|
558
|
+
"label": "Active Developers",
|
|
559
|
+
"data": [d["active_developers"] for d in weekly_data],
|
|
560
|
+
"backgroundColor": "rgba(153, 102, 255, 0.2)",
|
|
561
|
+
"borderColor": "rgba(153, 102, 255, 1)"
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
},
|
|
565
|
+
"daily": {
|
|
566
|
+
"labels": [d["date"] for d in daily_data],
|
|
567
|
+
"datasets": {
|
|
568
|
+
"commits": {
|
|
569
|
+
"label": "Daily Commits",
|
|
570
|
+
"data": [d["commits"] for d in daily_data],
|
|
571
|
+
"backgroundColor": "rgba(54, 162, 235, 0.1)",
|
|
572
|
+
"borderColor": "rgba(54, 162, 235, 1)"
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
def _build_insights_data(
|
|
579
|
+
self,
|
|
580
|
+
commits: List[Dict[str, Any]],
|
|
581
|
+
developer_stats: List[Dict[str, Any]],
|
|
582
|
+
qualitative_data: Optional[List[Dict[str, Any]]]
|
|
583
|
+
) -> Dict[str, Any]:
|
|
584
|
+
"""Build insights data with qualitative and quantitative analysis."""
|
|
585
|
+
|
|
586
|
+
# Generate quantitative insights
|
|
587
|
+
quantitative_insights = self._generate_quantitative_insights(commits, developer_stats)
|
|
588
|
+
|
|
589
|
+
# Process qualitative insights if available
|
|
590
|
+
qualitative_insights = []
|
|
591
|
+
if qualitative_data:
|
|
592
|
+
qualitative_insights = self._process_qualitative_insights(qualitative_data)
|
|
593
|
+
|
|
594
|
+
# Combine and prioritize insights
|
|
595
|
+
all_insights = quantitative_insights + qualitative_insights
|
|
596
|
+
prioritized_insights = self._prioritize_insights(all_insights)
|
|
597
|
+
|
|
598
|
+
return {
|
|
599
|
+
"quantitative": quantitative_insights,
|
|
600
|
+
"qualitative": qualitative_insights,
|
|
601
|
+
"prioritized": prioritized_insights,
|
|
602
|
+
"insight_categories": self._categorize_insights(all_insights),
|
|
603
|
+
"actionable_recommendations": self._generate_actionable_recommendations(all_insights)
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
def _build_raw_data_summary(
|
|
607
|
+
self,
|
|
608
|
+
commits: List[Dict[str, Any]],
|
|
609
|
+
prs: List[Dict[str, Any]],
|
|
610
|
+
developer_stats: List[Dict[str, Any]],
|
|
611
|
+
dora_metrics: Dict[str, Any]
|
|
612
|
+
) -> Dict[str, Any]:
|
|
613
|
+
"""Build summary of raw data for reference and validation."""
|
|
614
|
+
|
|
615
|
+
return {
|
|
616
|
+
"commits_sample": commits[:5] if commits else [], # First 5 commits as sample
|
|
617
|
+
"prs_sample": prs[:3] if prs else [], # First 3 PRs as sample
|
|
618
|
+
"developer_stats_schema": {
|
|
619
|
+
"fields": list(developer_stats[0].keys()) if developer_stats else [],
|
|
620
|
+
"sample_record": developer_stats[0] if developer_stats else {}
|
|
621
|
+
},
|
|
622
|
+
"dora_metrics": dora_metrics,
|
|
623
|
+
"data_quality": {
|
|
624
|
+
"commits_with_timestamps": sum(1 for c in commits if c.get('timestamp')),
|
|
625
|
+
"commits_with_projects": sum(1 for c in commits if c.get('project_key')),
|
|
626
|
+
"commits_with_tickets": sum(1 for c in commits if c.get('ticket_references')),
|
|
627
|
+
"developers_with_github": sum(1 for d in developer_stats if d.get('github_username'))
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
def _build_pm_integration_data(self, pm_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
632
|
+
"""Build PM platform integration data summary."""
|
|
633
|
+
|
|
634
|
+
metrics = pm_data.get('metrics', {})
|
|
635
|
+
correlations = pm_data.get('correlations', [])
|
|
636
|
+
|
|
637
|
+
return {
|
|
638
|
+
"platforms": list(metrics.get('platform_coverage', {}).keys()),
|
|
639
|
+
"total_issues": metrics.get('total_pm_issues', 0),
|
|
640
|
+
"story_point_coverage": metrics.get('story_point_analysis', {}).get('story_point_coverage_pct', 0),
|
|
641
|
+
"correlations_count": len(correlations),
|
|
642
|
+
"correlation_quality": metrics.get('correlation_quality', {}),
|
|
643
|
+
"issue_types": metrics.get('issue_type_distribution', {}),
|
|
644
|
+
"platform_summary": {
|
|
645
|
+
platform: {
|
|
646
|
+
"total_issues": data.get('total_issues', 0),
|
|
647
|
+
"linked_issues": data.get('linked_issues', 0),
|
|
648
|
+
"coverage_percentage": data.get('coverage_percentage', 0)
|
|
649
|
+
}
|
|
650
|
+
for platform, data in metrics.get('platform_coverage', {}).items()
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
# Helper methods for calculations and analysis
|
|
655
|
+
|
|
656
|
+
def _calculate_executive_trends(
|
|
657
|
+
self,
|
|
658
|
+
commits: List[Dict[str, Any]],
|
|
659
|
+
prs: List[Dict[str, Any]]
|
|
660
|
+
) -> Dict[str, float]:
|
|
661
|
+
"""Calculate trends by comparing first half vs second half of data."""
|
|
662
|
+
|
|
663
|
+
if not commits:
|
|
664
|
+
return {}
|
|
665
|
+
|
|
666
|
+
# Sort commits by timestamp
|
|
667
|
+
sorted_commits = sorted(commits, key=lambda x: x['timestamp'])
|
|
668
|
+
midpoint = len(sorted_commits) // 2
|
|
669
|
+
|
|
670
|
+
first_half = sorted_commits[:midpoint]
|
|
671
|
+
second_half = sorted_commits[midpoint:]
|
|
672
|
+
|
|
673
|
+
# Calculate metrics for each half
|
|
674
|
+
def get_half_metrics(commit_list):
|
|
675
|
+
return {
|
|
676
|
+
'commits': len(commit_list),
|
|
677
|
+
'lines': sum(
|
|
678
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
679
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
680
|
+
for c in commit_list
|
|
681
|
+
),
|
|
682
|
+
'story_points': sum(c.get('story_points', 0) or 0 for c in commit_list)
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
first_metrics = get_half_metrics(first_half)
|
|
686
|
+
second_metrics = get_half_metrics(second_half)
|
|
687
|
+
|
|
688
|
+
# Calculate percentage changes
|
|
689
|
+
trends = {}
|
|
690
|
+
for metric in ['commits', 'lines', 'story_points']:
|
|
691
|
+
if first_metrics[metric] > 0:
|
|
692
|
+
change = ((second_metrics[metric] - first_metrics[metric]) / first_metrics[metric]) * 100
|
|
693
|
+
trends[f'{metric}_trend'] = round(change, 1)
|
|
694
|
+
else:
|
|
695
|
+
trends[f'{metric}_trend'] = 0
|
|
696
|
+
|
|
697
|
+
# PR trends if available
|
|
698
|
+
if prs:
|
|
699
|
+
sorted_prs = sorted(prs, key=lambda x: x.get('merged_at', x.get('created_at', datetime.now())))
|
|
700
|
+
pr_midpoint = len(sorted_prs) // 2
|
|
701
|
+
|
|
702
|
+
first_pr_count = pr_midpoint
|
|
703
|
+
second_pr_count = len(sorted_prs) - pr_midpoint
|
|
704
|
+
|
|
705
|
+
if first_pr_count > 0:
|
|
706
|
+
pr_change = ((second_pr_count - first_pr_count) / first_pr_count) * 100
|
|
707
|
+
trends['prs_trend'] = round(pr_change, 1)
|
|
708
|
+
else:
|
|
709
|
+
trends['prs_trend'] = 0
|
|
710
|
+
|
|
711
|
+
return trends
|
|
712
|
+
|
|
713
|
+
def _detect_executive_anomalies(
|
|
714
|
+
self,
|
|
715
|
+
commits: List[Dict[str, Any]],
|
|
716
|
+
developer_stats: List[Dict[str, Any]]
|
|
717
|
+
) -> List[Dict[str, Any]]:
|
|
718
|
+
"""Detect anomalies in executive-level data."""
|
|
719
|
+
|
|
720
|
+
anomalies = []
|
|
721
|
+
|
|
722
|
+
# Check for commit spikes/drops by week
|
|
723
|
+
weekly_commits = self._get_weekly_commit_counts(commits)
|
|
724
|
+
if len(weekly_commits) >= 3:
|
|
725
|
+
mean_commits = statistics.mean(weekly_commits)
|
|
726
|
+
std_commits = statistics.pstdev(weekly_commits) if len(weekly_commits) > 1 else 0
|
|
727
|
+
|
|
728
|
+
for i, count in enumerate(weekly_commits):
|
|
729
|
+
if std_commits > 0:
|
|
730
|
+
if count > mean_commits + (std_commits * self.anomaly_thresholds['spike_multiplier']):
|
|
731
|
+
anomalies.append({
|
|
732
|
+
"type": "spike",
|
|
733
|
+
"metric": "weekly_commits",
|
|
734
|
+
"value": count,
|
|
735
|
+
"expected": round(mean_commits, 1),
|
|
736
|
+
"severity": "high" if count > mean_commits + (std_commits * 3) else "medium",
|
|
737
|
+
"week_index": i
|
|
738
|
+
})
|
|
739
|
+
elif count < mean_commits * self.anomaly_thresholds['drop_threshold']:
|
|
740
|
+
anomalies.append({
|
|
741
|
+
"type": "drop",
|
|
742
|
+
"metric": "weekly_commits",
|
|
743
|
+
"value": count,
|
|
744
|
+
"expected": round(mean_commits, 1),
|
|
745
|
+
"severity": "high" if count < mean_commits * 0.1 else "medium",
|
|
746
|
+
"week_index": i
|
|
747
|
+
})
|
|
748
|
+
|
|
749
|
+
# Check for contributor anomalies
|
|
750
|
+
commit_counts = [dev['total_commits'] for dev in developer_stats]
|
|
751
|
+
if len(commit_counts) > 1:
|
|
752
|
+
gini_coefficient = self._calculate_gini_coefficient(commit_counts)
|
|
753
|
+
if gini_coefficient > 0.8:
|
|
754
|
+
anomalies.append({
|
|
755
|
+
"type": "concentration",
|
|
756
|
+
"metric": "contribution_distribution",
|
|
757
|
+
"value": round(gini_coefficient, 2),
|
|
758
|
+
"threshold": 0.8,
|
|
759
|
+
"severity": "medium",
|
|
760
|
+
"description": "Highly concentrated contribution pattern"
|
|
761
|
+
})
|
|
762
|
+
|
|
763
|
+
return anomalies
|
|
764
|
+
|
|
765
|
+
def _identify_wins_and_concerns(
|
|
766
|
+
self,
|
|
767
|
+
commits: List[Dict[str, Any]],
|
|
768
|
+
developer_stats: List[Dict[str, Any]],
|
|
769
|
+
project_metrics: Dict[str, Any],
|
|
770
|
+
dora_metrics: Dict[str, Any]
|
|
771
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
772
|
+
"""Identify key wins and concerns from the data."""
|
|
773
|
+
|
|
774
|
+
wins = []
|
|
775
|
+
concerns = []
|
|
776
|
+
|
|
777
|
+
# Ticket coverage analysis
|
|
778
|
+
ticket_coverage = project_metrics.get('ticket_analysis', {}).get('commit_coverage_pct', 0)
|
|
779
|
+
if ticket_coverage > 80:
|
|
780
|
+
wins.append({
|
|
781
|
+
"category": "process",
|
|
782
|
+
"title": "Excellent Ticket Coverage",
|
|
783
|
+
"description": f"{ticket_coverage:.1f}% of commits linked to tickets",
|
|
784
|
+
"impact": "high"
|
|
785
|
+
})
|
|
786
|
+
elif ticket_coverage < 30:
|
|
787
|
+
concerns.append({
|
|
788
|
+
"category": "process",
|
|
789
|
+
"title": "Low Ticket Coverage",
|
|
790
|
+
"description": f"Only {ticket_coverage:.1f}% of commits linked to tickets",
|
|
791
|
+
"impact": "high",
|
|
792
|
+
"recommendation": "Improve ticket referencing in commit messages"
|
|
793
|
+
})
|
|
794
|
+
|
|
795
|
+
# Team activity analysis
|
|
796
|
+
if len(developer_stats) > 1:
|
|
797
|
+
commit_counts = [dev['total_commits'] for dev in developer_stats]
|
|
798
|
+
avg_commits = sum(commit_counts) / len(commit_counts)
|
|
799
|
+
|
|
800
|
+
if min(commit_counts) > avg_commits * 0.5:
|
|
801
|
+
wins.append({
|
|
802
|
+
"category": "team",
|
|
803
|
+
"title": "Balanced Team Contributions",
|
|
804
|
+
"description": "All team members are actively contributing",
|
|
805
|
+
"impact": "medium"
|
|
806
|
+
})
|
|
807
|
+
elif max(commit_counts) > avg_commits * 3:
|
|
808
|
+
concerns.append({
|
|
809
|
+
"category": "team",
|
|
810
|
+
"title": "Unbalanced Contributions",
|
|
811
|
+
"description": "Work is heavily concentrated among few developers",
|
|
812
|
+
"impact": "medium",
|
|
813
|
+
"recommendation": "Consider distributing work more evenly"
|
|
814
|
+
})
|
|
815
|
+
|
|
816
|
+
# Code quality indicators
|
|
817
|
+
total_lines = sum(
|
|
818
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
819
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
820
|
+
for c in commits
|
|
821
|
+
)
|
|
822
|
+
avg_commit_size = total_lines / max(len(commits), 1)
|
|
823
|
+
|
|
824
|
+
if 20 <= avg_commit_size <= 200:
|
|
825
|
+
wins.append({
|
|
826
|
+
"category": "quality",
|
|
827
|
+
"title": "Optimal Commit Size",
|
|
828
|
+
"description": f"Average commit size of {avg_commit_size:.0f} lines indicates good change management",
|
|
829
|
+
"impact": "low"
|
|
830
|
+
})
|
|
831
|
+
elif avg_commit_size > 500:
|
|
832
|
+
concerns.append({
|
|
833
|
+
"category": "quality",
|
|
834
|
+
"title": "Large Commit Sizes",
|
|
835
|
+
"description": f"Average commit size of {avg_commit_size:.0f} lines may indicate batched changes",
|
|
836
|
+
"impact": "low",
|
|
837
|
+
"recommendation": "Consider breaking down changes into smaller commits"
|
|
838
|
+
})
|
|
839
|
+
|
|
840
|
+
return wins, concerns
|
|
841
|
+
|
|
842
|
+
def _calculate_overall_health_score(
|
|
843
|
+
self,
|
|
844
|
+
commits: List[Dict[str, Any]],
|
|
845
|
+
developer_stats: List[Dict[str, Any]],
|
|
846
|
+
project_metrics: Dict[str, Any],
|
|
847
|
+
dora_metrics: Dict[str, Any]
|
|
848
|
+
) -> Dict[str, Any]:
|
|
849
|
+
"""Calculate overall project health score."""
|
|
850
|
+
|
|
851
|
+
scores = {}
|
|
852
|
+
|
|
853
|
+
# Activity consistency score (0-100)
|
|
854
|
+
weekly_commits = self._get_weekly_commit_counts(commits)
|
|
855
|
+
if weekly_commits:
|
|
856
|
+
consistency = max(0, 100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 100))
|
|
857
|
+
scores['activity_consistency'] = min(100, consistency)
|
|
858
|
+
else:
|
|
859
|
+
scores['activity_consistency'] = 0
|
|
860
|
+
|
|
861
|
+
# Ticket coverage score
|
|
862
|
+
ticket_coverage = project_metrics.get('ticket_analysis', {}).get('commit_coverage_pct', 0)
|
|
863
|
+
scores['ticket_coverage'] = min(100, ticket_coverage)
|
|
864
|
+
|
|
865
|
+
# Collaboration score (based on multi-project work and team balance)
|
|
866
|
+
if len(developer_stats) > 1:
|
|
867
|
+
commit_counts = [dev['total_commits'] for dev in developer_stats]
|
|
868
|
+
gini = self._calculate_gini_coefficient(commit_counts)
|
|
869
|
+
collaboration_score = max(0, 100 - (gini * 100))
|
|
870
|
+
scores['collaboration'] = collaboration_score
|
|
871
|
+
else:
|
|
872
|
+
scores['collaboration'] = 50 # Neutral for single developer
|
|
873
|
+
|
|
874
|
+
# Code quality score (based on commit size and patterns)
|
|
875
|
+
total_lines = sum(
|
|
876
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
877
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
878
|
+
for c in commits
|
|
879
|
+
)
|
|
880
|
+
avg_commit_size = total_lines / max(len(commits), 1)
|
|
881
|
+
|
|
882
|
+
# Optimal range is 20-200 lines per commit
|
|
883
|
+
if 20 <= avg_commit_size <= 200:
|
|
884
|
+
quality_score = 100
|
|
885
|
+
elif avg_commit_size < 20:
|
|
886
|
+
quality_score = max(0, (avg_commit_size / 20) * 100)
|
|
887
|
+
else:
|
|
888
|
+
quality_score = max(0, 100 - ((avg_commit_size - 200) / 500 * 100))
|
|
889
|
+
|
|
890
|
+
scores['code_quality'] = min(100, quality_score)
|
|
891
|
+
|
|
892
|
+
# Velocity score (commits per week vs. baseline)
|
|
893
|
+
weeks_with_activity = len([w for w in weekly_commits if w > 0])
|
|
894
|
+
velocity_score = min(100, (weeks_with_activity / max(len(weekly_commits), 1)) * 100)
|
|
895
|
+
scores['velocity'] = velocity_score
|
|
896
|
+
|
|
897
|
+
# Calculate weighted overall score
|
|
898
|
+
overall_score = sum(
|
|
899
|
+
scores.get(metric, 0) * weight
|
|
900
|
+
for metric, weight in self.health_weights.items()
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
return {
|
|
904
|
+
"overall": round(overall_score, 1),
|
|
905
|
+
"components": {k: round(v, 1) for k, v in scores.items()},
|
|
906
|
+
"weights": self.health_weights,
|
|
907
|
+
"rating": self._get_health_rating(overall_score)
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
def _get_health_rating(self, score: float) -> str:
|
|
911
|
+
"""Get health rating based on score."""
|
|
912
|
+
if score >= 80:
|
|
913
|
+
return "excellent"
|
|
914
|
+
elif score >= 60:
|
|
915
|
+
return "good"
|
|
916
|
+
elif score >= 40:
|
|
917
|
+
return "fair"
|
|
918
|
+
else:
|
|
919
|
+
return "needs_improvement"
|
|
920
|
+
|
|
921
|
+
def _get_trend_direction(self, trend_percent: float) -> str:
|
|
922
|
+
"""Get trend direction from percentage change."""
|
|
923
|
+
if abs(trend_percent) < self.anomaly_thresholds['trend_threshold'] * 100:
|
|
924
|
+
return "stable"
|
|
925
|
+
elif trend_percent > 0:
|
|
926
|
+
return "increasing"
|
|
927
|
+
else:
|
|
928
|
+
return "decreasing"
|
|
929
|
+
|
|
930
|
+
def _get_coverage_quality_rating(self, coverage: float) -> str:
|
|
931
|
+
"""Get quality rating for ticket coverage."""
|
|
932
|
+
if coverage >= 80:
|
|
933
|
+
return "excellent"
|
|
934
|
+
elif coverage >= 60:
|
|
935
|
+
return "good"
|
|
936
|
+
elif coverage >= 40:
|
|
937
|
+
return "fair"
|
|
938
|
+
else:
|
|
939
|
+
return "poor"
|
|
940
|
+
|
|
941
|
+
def _calculate_active_developer_percentage(self, developer_stats: List[Dict[str, Any]]) -> float:
|
|
942
|
+
"""Calculate percentage of developers with meaningful activity."""
|
|
943
|
+
if not developer_stats:
|
|
944
|
+
return 0
|
|
945
|
+
|
|
946
|
+
total_commits = sum(dev['total_commits'] for dev in developer_stats)
|
|
947
|
+
avg_commits = total_commits / len(developer_stats)
|
|
948
|
+
threshold = max(1, avg_commits * 0.1) # 10% of average
|
|
949
|
+
|
|
950
|
+
active_developers = sum(1 for dev in developer_stats if dev['total_commits'] >= threshold)
|
|
951
|
+
return round((active_developers / len(developer_stats)) * 100, 1)
|
|
952
|
+
|
|
953
|
+
def _calculate_avg_developers_per_project(self, commits: List[Dict[str, Any]]) -> float:
|
|
954
|
+
"""Calculate average number of developers per project."""
|
|
955
|
+
project_developers = defaultdict(set)
|
|
956
|
+
|
|
957
|
+
for commit in commits:
|
|
958
|
+
project_key = commit.get('project_key', 'UNKNOWN')
|
|
959
|
+
dev_id = commit.get('canonical_id', commit.get('author_email'))
|
|
960
|
+
project_developers[project_key].add(dev_id)
|
|
961
|
+
|
|
962
|
+
if not project_developers:
|
|
963
|
+
return 0
|
|
964
|
+
|
|
965
|
+
avg = sum(len(devs) for devs in project_developers.values()) / len(project_developers)
|
|
966
|
+
return round(avg, 1)
|
|
967
|
+
|
|
968
|
+
def _count_cross_project_contributors(
|
|
969
|
+
self,
|
|
970
|
+
commits: List[Dict[str, Any]],
|
|
971
|
+
developer_stats: List[Dict[str, Any]]
|
|
972
|
+
) -> int:
|
|
973
|
+
"""Count developers who contribute to multiple projects."""
|
|
974
|
+
developer_projects = defaultdict(set)
|
|
975
|
+
|
|
976
|
+
for commit in commits:
|
|
977
|
+
project_key = commit.get('project_key', 'UNKNOWN')
|
|
978
|
+
dev_id = commit.get('canonical_id', commit.get('author_email'))
|
|
979
|
+
developer_projects[dev_id].add(project_key)
|
|
980
|
+
|
|
981
|
+
return sum(1 for projects in developer_projects.values() if len(projects) > 1)
|
|
982
|
+
|
|
983
|
+
def _calculate_project_health_score(
|
|
984
|
+
self,
|
|
985
|
+
commits: List[Dict[str, Any]],
|
|
986
|
+
contributors: Set[str]
|
|
987
|
+
) -> Dict[str, Any]:
|
|
988
|
+
"""Calculate health score for a specific project."""
|
|
989
|
+
|
|
990
|
+
if not commits:
|
|
991
|
+
return {"overall": 0, "components": {}, "rating": "no_data"}
|
|
992
|
+
|
|
993
|
+
scores = {}
|
|
994
|
+
|
|
995
|
+
# Activity score (commits per week)
|
|
996
|
+
weekly_commits = self._get_weekly_commit_counts(commits)
|
|
997
|
+
if weekly_commits:
|
|
998
|
+
avg_weekly = statistics.mean(weekly_commits)
|
|
999
|
+
activity_score = min(100, avg_weekly * 10) # Scale appropriately
|
|
1000
|
+
scores['activity'] = activity_score
|
|
1001
|
+
else:
|
|
1002
|
+
scores['activity'] = 0
|
|
1003
|
+
|
|
1004
|
+
# Contributor diversity score
|
|
1005
|
+
if len(contributors) == 1:
|
|
1006
|
+
diversity_score = 30 # Single contributor is risky
|
|
1007
|
+
elif len(contributors) <= 3:
|
|
1008
|
+
diversity_score = 60
|
|
1009
|
+
else:
|
|
1010
|
+
diversity_score = 100
|
|
1011
|
+
scores['contributor_diversity'] = diversity_score
|
|
1012
|
+
|
|
1013
|
+
# Consistency score
|
|
1014
|
+
if len(weekly_commits) > 1:
|
|
1015
|
+
consistency = max(0, 100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 50))
|
|
1016
|
+
scores['consistency'] = consistency
|
|
1017
|
+
else:
|
|
1018
|
+
scores['consistency'] = 50
|
|
1019
|
+
|
|
1020
|
+
# Overall score (equal weights for now)
|
|
1021
|
+
overall_score = sum(scores.values()) / len(scores)
|
|
1022
|
+
|
|
1023
|
+
return {
|
|
1024
|
+
"overall": round(overall_score, 1),
|
|
1025
|
+
"components": {k: round(v, 1) for k, v in scores.items()},
|
|
1026
|
+
"rating": self._get_health_rating(overall_score)
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
def _get_project_contributor_details(
|
|
1030
|
+
self,
|
|
1031
|
+
commits: List[Dict[str, Any]],
|
|
1032
|
+
developer_stats: List[Dict[str, Any]]
|
|
1033
|
+
) -> List[Dict[str, Any]]:
|
|
1034
|
+
"""Get detailed contributor information for a project."""
|
|
1035
|
+
|
|
1036
|
+
# Create developer lookup
|
|
1037
|
+
dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
|
|
1038
|
+
|
|
1039
|
+
# Count contributions per developer
|
|
1040
|
+
contributor_commits = defaultdict(int)
|
|
1041
|
+
contributor_lines = defaultdict(int)
|
|
1042
|
+
|
|
1043
|
+
for commit in commits:
|
|
1044
|
+
dev_id = commit.get('canonical_id', commit.get('author_email'))
|
|
1045
|
+
contributor_commits[dev_id] += 1
|
|
1046
|
+
|
|
1047
|
+
lines = (
|
|
1048
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
1049
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
1050
|
+
)
|
|
1051
|
+
contributor_lines[dev_id] += lines
|
|
1052
|
+
|
|
1053
|
+
# Build contributor details
|
|
1054
|
+
contributors = []
|
|
1055
|
+
total_commits = len(commits)
|
|
1056
|
+
|
|
1057
|
+
for dev_id, commit_count in contributor_commits.items():
|
|
1058
|
+
dev = dev_lookup.get(dev_id, {})
|
|
1059
|
+
|
|
1060
|
+
contributors.append({
|
|
1061
|
+
"id": dev_id,
|
|
1062
|
+
"name": self._anonymize_value(dev.get('primary_name', 'Unknown'), 'name'),
|
|
1063
|
+
"commits": commit_count,
|
|
1064
|
+
"commits_percentage": round((commit_count / total_commits) * 100, 1),
|
|
1065
|
+
"lines_changed": contributor_lines[dev_id],
|
|
1066
|
+
"role": self._determine_contributor_role(commit_count, total_commits)
|
|
1067
|
+
})
|
|
1068
|
+
|
|
1069
|
+
# Sort by commits descending
|
|
1070
|
+
contributors.sort(key=lambda x: x['commits'], reverse=True)
|
|
1071
|
+
|
|
1072
|
+
return contributors
|
|
1073
|
+
|
|
1074
|
+
def _determine_contributor_role(self, commits: int, total_commits: int) -> str:
|
|
1075
|
+
"""Determine contributor role based on contribution percentage."""
|
|
1076
|
+
percentage = (commits / total_commits) * 100
|
|
1077
|
+
|
|
1078
|
+
if percentage >= 50:
|
|
1079
|
+
return "primary"
|
|
1080
|
+
elif percentage >= 25:
|
|
1081
|
+
return "major"
|
|
1082
|
+
elif percentage >= 10:
|
|
1083
|
+
return "regular"
|
|
1084
|
+
else:
|
|
1085
|
+
return "occasional"
|
|
1086
|
+
|
|
1087
|
+
def _calculate_project_trends(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1088
|
+
"""Calculate trends for a specific project."""
|
|
1089
|
+
|
|
1090
|
+
if len(commits) < 4: # Need sufficient data for trends
|
|
1091
|
+
return {"insufficient_data": True}
|
|
1092
|
+
|
|
1093
|
+
# Sort by timestamp
|
|
1094
|
+
sorted_commits = sorted(commits, key=lambda x: x['timestamp'])
|
|
1095
|
+
|
|
1096
|
+
# Split into quarters for trend analysis
|
|
1097
|
+
quarter_size = len(sorted_commits) // 4
|
|
1098
|
+
quarters = [
|
|
1099
|
+
sorted_commits[i*quarter_size:(i+1)*quarter_size]
|
|
1100
|
+
for i in range(4)
|
|
1101
|
+
]
|
|
1102
|
+
|
|
1103
|
+
# Handle remainder commits
|
|
1104
|
+
if len(sorted_commits) % 4:
|
|
1105
|
+
quarters[-1].extend(sorted_commits[4*quarter_size:])
|
|
1106
|
+
|
|
1107
|
+
# Calculate metrics per quarter
|
|
1108
|
+
quarter_metrics = []
|
|
1109
|
+
for quarter in quarters:
|
|
1110
|
+
metrics = {
|
|
1111
|
+
'commits': len(quarter),
|
|
1112
|
+
'lines': sum(
|
|
1113
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
1114
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
1115
|
+
for c in quarter
|
|
1116
|
+
),
|
|
1117
|
+
'contributors': len(set(c.get('canonical_id', c.get('author_email')) for c in quarter))
|
|
1118
|
+
}
|
|
1119
|
+
quarter_metrics.append(metrics)
|
|
1120
|
+
|
|
1121
|
+
# Calculate trends (compare Q1 vs Q4)
|
|
1122
|
+
trends = {}
|
|
1123
|
+
for metric in ['commits', 'lines', 'contributors']:
|
|
1124
|
+
q1_value = quarter_metrics[0][metric]
|
|
1125
|
+
q4_value = quarter_metrics[-1][metric]
|
|
1126
|
+
|
|
1127
|
+
if q1_value > 0:
|
|
1128
|
+
change = ((q4_value - q1_value) / q1_value) * 100
|
|
1129
|
+
trends[f'{metric}_trend'] = round(change, 1)
|
|
1130
|
+
else:
|
|
1131
|
+
trends[f'{metric}_trend'] = 0
|
|
1132
|
+
|
|
1133
|
+
return trends
|
|
1134
|
+
|
|
1135
|
+
def _detect_project_anomalies(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
1136
|
+
"""Detect anomalies in project-specific data."""
|
|
1137
|
+
|
|
1138
|
+
if len(commits) < 7: # Need sufficient data
|
|
1139
|
+
return []
|
|
1140
|
+
|
|
1141
|
+
anomalies = []
|
|
1142
|
+
|
|
1143
|
+
# Get daily commit counts
|
|
1144
|
+
daily_commits = self._get_daily_commit_counts(commits)
|
|
1145
|
+
|
|
1146
|
+
if len(daily_commits) >= 7:
|
|
1147
|
+
mean_daily = statistics.mean(daily_commits)
|
|
1148
|
+
std_daily = statistics.pstdev(daily_commits) if len(daily_commits) > 1 else 0
|
|
1149
|
+
|
|
1150
|
+
# Find days with unusual activity
|
|
1151
|
+
for i, count in enumerate(daily_commits):
|
|
1152
|
+
if std_daily > 0 and count > mean_daily + (std_daily * 2):
|
|
1153
|
+
anomalies.append({
|
|
1154
|
+
"type": "activity_spike",
|
|
1155
|
+
"value": count,
|
|
1156
|
+
"expected": round(mean_daily, 1),
|
|
1157
|
+
"day_index": i,
|
|
1158
|
+
"severity": "medium"
|
|
1159
|
+
})
|
|
1160
|
+
|
|
1161
|
+
return anomalies
|
|
1162
|
+
|
|
1163
|
+
def _identify_primary_contributors(
|
|
1164
|
+
self,
|
|
1165
|
+
commits: List[Dict[str, Any]],
|
|
1166
|
+
contributor_details: List[Dict[str, Any]]
|
|
1167
|
+
) -> List[str]:
|
|
1168
|
+
"""Identify primary contributors (top 80% of activity)."""
|
|
1169
|
+
|
|
1170
|
+
sorted_contributors = sorted(contributor_details, key=lambda x: x['commits'], reverse=True)
|
|
1171
|
+
total_commits = sum(c['commits'] for c in contributor_details)
|
|
1172
|
+
|
|
1173
|
+
primary_contributors = []
|
|
1174
|
+
cumulative_commits = 0
|
|
1175
|
+
|
|
1176
|
+
for contributor in sorted_contributors:
|
|
1177
|
+
cumulative_commits += contributor['commits']
|
|
1178
|
+
primary_contributors.append(contributor['name'])
|
|
1179
|
+
|
|
1180
|
+
if cumulative_commits >= total_commits * 0.8:
|
|
1181
|
+
break
|
|
1182
|
+
|
|
1183
|
+
return primary_contributors
|
|
1184
|
+
|
|
1185
|
+
def _calculate_contribution_distribution(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1186
|
+
"""Calculate distribution metrics for contributions."""
|
|
1187
|
+
|
|
1188
|
+
contributor_commits = defaultdict(int)
|
|
1189
|
+
for commit in commits:
|
|
1190
|
+
dev_id = commit.get('canonical_id', commit.get('author_email'))
|
|
1191
|
+
contributor_commits[dev_id] += 1
|
|
1192
|
+
|
|
1193
|
+
commit_counts = list(contributor_commits.values())
|
|
1194
|
+
|
|
1195
|
+
if not commit_counts:
|
|
1196
|
+
return {}
|
|
1197
|
+
|
|
1198
|
+
gini = self._calculate_gini_coefficient(commit_counts)
|
|
1199
|
+
|
|
1200
|
+
return {
|
|
1201
|
+
"gini_coefficient": round(gini, 3),
|
|
1202
|
+
"concentration_level": "high" if gini > 0.7 else "medium" if gini > 0.4 else "low",
|
|
1203
|
+
"top_contributor_percentage": round((max(commit_counts) / sum(commit_counts)) * 100, 1),
|
|
1204
|
+
"contributor_count": len(commit_counts)
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
def _get_developer_projects(self, commits: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
|
|
1208
|
+
"""Get projects a developer has worked on with contribution details."""
|
|
1209
|
+
|
|
1210
|
+
project_contributions = defaultdict(lambda: {
|
|
1211
|
+
'commits': 0,
|
|
1212
|
+
'lines_changed': 0,
|
|
1213
|
+
'story_points': 0,
|
|
1214
|
+
'first_commit': None,
|
|
1215
|
+
'last_commit': None
|
|
1216
|
+
})
|
|
1217
|
+
|
|
1218
|
+
for commit in commits:
|
|
1219
|
+
project_key = commit.get('project_key', 'UNKNOWN')
|
|
1220
|
+
project_data = project_contributions[project_key]
|
|
1221
|
+
|
|
1222
|
+
project_data['commits'] += 1
|
|
1223
|
+
|
|
1224
|
+
lines = (
|
|
1225
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
1226
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
1227
|
+
)
|
|
1228
|
+
project_data['lines_changed'] += lines
|
|
1229
|
+
project_data['story_points'] += commit.get('story_points', 0) or 0
|
|
1230
|
+
|
|
1231
|
+
# Track first and last commits
|
|
1232
|
+
commit_date = commit['timestamp']
|
|
1233
|
+
if not project_data['first_commit'] or commit_date < project_data['first_commit']:
|
|
1234
|
+
project_data['first_commit'] = commit_date
|
|
1235
|
+
if not project_data['last_commit'] or commit_date > project_data['last_commit']:
|
|
1236
|
+
project_data['last_commit'] = commit_date
|
|
1237
|
+
|
|
1238
|
+
# Convert to regular dict and add percentages
|
|
1239
|
+
total_commits = len(commits)
|
|
1240
|
+
projects = {}
|
|
1241
|
+
|
|
1242
|
+
for project_key, data in project_contributions.items():
|
|
1243
|
+
projects[project_key] = {
|
|
1244
|
+
'commits': data['commits'],
|
|
1245
|
+
'commits_percentage': round((data['commits'] / total_commits) * 100, 1),
|
|
1246
|
+
'lines_changed': data['lines_changed'],
|
|
1247
|
+
'story_points': data['story_points'],
|
|
1248
|
+
'first_commit': data['first_commit'].isoformat() if data['first_commit'] else None,
|
|
1249
|
+
'last_commit': data['last_commit'].isoformat() if data['last_commit'] else None,
|
|
1250
|
+
'days_active': (data['last_commit'] - data['first_commit']).days if data['first_commit'] and data['last_commit'] else 0
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
return projects
|
|
1254
|
+
|
|
1255
|
+
def _analyze_developer_contribution_patterns(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1256
|
+
"""Analyze a developer's contribution patterns."""
|
|
1257
|
+
|
|
1258
|
+
if not commits:
|
|
1259
|
+
return {}
|
|
1260
|
+
|
|
1261
|
+
# Time-based patterns (use local hour if available)
|
|
1262
|
+
commit_hours = []
|
|
1263
|
+
for c in commits:
|
|
1264
|
+
if 'local_hour' in c:
|
|
1265
|
+
commit_hours.append(c['local_hour'])
|
|
1266
|
+
elif hasattr(c['timestamp'], 'hour'):
|
|
1267
|
+
commit_hours.append(c['timestamp'].hour)
|
|
1268
|
+
|
|
1269
|
+
commit_days = [c['timestamp'].weekday() for c in commits if hasattr(c['timestamp'], 'weekday')]
|
|
1270
|
+
|
|
1271
|
+
# Size patterns
|
|
1272
|
+
commit_sizes = []
|
|
1273
|
+
for commit in commits:
|
|
1274
|
+
lines = (
|
|
1275
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
1276
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
1277
|
+
)
|
|
1278
|
+
commit_sizes.append(lines)
|
|
1279
|
+
|
|
1280
|
+
patterns = {
|
|
1281
|
+
'total_commits': len(commits),
|
|
1282
|
+
'avg_commit_size': round(statistics.mean(commit_sizes), 1) if commit_sizes else 0,
|
|
1283
|
+
'commit_size_stddev': round(statistics.pstdev(commit_sizes), 1) if len(commit_sizes) > 1 else 0
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
if commit_hours:
|
|
1287
|
+
patterns['peak_hour'] = max(set(commit_hours), key=commit_hours.count)
|
|
1288
|
+
patterns['time_distribution'] = self._get_time_distribution_pattern(commit_hours)
|
|
1289
|
+
|
|
1290
|
+
if commit_days:
|
|
1291
|
+
patterns['peak_day'] = self._get_day_name(max(set(commit_days), key=commit_days.count))
|
|
1292
|
+
patterns['work_pattern'] = self._get_work_pattern(commit_days)
|
|
1293
|
+
|
|
1294
|
+
# Consistency patterns
|
|
1295
|
+
weekly_commits = self._get_weekly_commit_counts(commits)
|
|
1296
|
+
if len(weekly_commits) > 1:
|
|
1297
|
+
patterns['consistency_score'] = round(100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 100), 1)
|
|
1298
|
+
else:
|
|
1299
|
+
patterns['consistency_score'] = 50
|
|
1300
|
+
|
|
1301
|
+
return patterns
|
|
1302
|
+
|
|
1303
|
+
def _get_time_distribution_pattern(self, hours: List[int]) -> str:
|
|
1304
|
+
"""Determine time distribution pattern from commit hours."""
|
|
1305
|
+
avg_hour = statistics.mean(hours)
|
|
1306
|
+
|
|
1307
|
+
if avg_hour < 10:
|
|
1308
|
+
return "early_bird"
|
|
1309
|
+
elif avg_hour < 14:
|
|
1310
|
+
return "morning_focused"
|
|
1311
|
+
elif avg_hour < 18:
|
|
1312
|
+
return "afternoon_focused"
|
|
1313
|
+
else:
|
|
1314
|
+
return "night_owl"
|
|
1315
|
+
|
|
1316
|
+
def _get_day_name(self, day_index: int) -> str:
|
|
1317
|
+
"""Convert day index to day name."""
|
|
1318
|
+
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
|
|
1319
|
+
return days[day_index] if 0 <= day_index < 7 else 'Unknown'
|
|
1320
|
+
|
|
1321
|
+
def _get_work_pattern(self, days: List[int]) -> str:
|
|
1322
|
+
"""Determine work pattern from commit days."""
|
|
1323
|
+
weekday_commits = sum(1 for day in days if day < 5) # Mon-Fri
|
|
1324
|
+
weekend_commits = sum(1 for day in days if day >= 5) # Sat-Sun
|
|
1325
|
+
|
|
1326
|
+
total = len(days)
|
|
1327
|
+
weekday_pct = (weekday_commits / total) * 100 if total > 0 else 0
|
|
1328
|
+
|
|
1329
|
+
if weekday_pct > 90:
|
|
1330
|
+
return "strictly_weekdays"
|
|
1331
|
+
elif weekday_pct > 75:
|
|
1332
|
+
return "mostly_weekdays"
|
|
1333
|
+
elif weekday_pct > 50:
|
|
1334
|
+
return "mixed_schedule"
|
|
1335
|
+
else:
|
|
1336
|
+
return "weekend_warrior"
|
|
1337
|
+
|
|
1338
|
+
def _calculate_developer_collaboration_metrics(
|
|
1339
|
+
self,
|
|
1340
|
+
commits: List[Dict[str, Any]],
|
|
1341
|
+
all_developer_stats: List[Dict[str, Any]]
|
|
1342
|
+
) -> Dict[str, Any]:
|
|
1343
|
+
"""Calculate collaboration metrics for a developer."""
|
|
1344
|
+
|
|
1345
|
+
# Get projects this developer worked on
|
|
1346
|
+
dev_projects = set(c.get('project_key', 'UNKNOWN') for c in commits)
|
|
1347
|
+
|
|
1348
|
+
# Find other developers on same projects
|
|
1349
|
+
collaborators = set()
|
|
1350
|
+
for dev in all_developer_stats:
|
|
1351
|
+
dev_id = dev['canonical_id']
|
|
1352
|
+
# Simple check - assumes we can identify overlapping work
|
|
1353
|
+
# In real implementation, would need more sophisticated analysis
|
|
1354
|
+
if len(dev_projects) > 0: # Placeholder logic
|
|
1355
|
+
collaborators.add(dev_id)
|
|
1356
|
+
|
|
1357
|
+
# Remove self from collaborators
|
|
1358
|
+
dev_id = commits[0].get('canonical_id') if commits else None
|
|
1359
|
+
collaborators.discard(dev_id)
|
|
1360
|
+
|
|
1361
|
+
return {
|
|
1362
|
+
'projects_count': len(dev_projects),
|
|
1363
|
+
'potential_collaborators': len(collaborators),
|
|
1364
|
+
'cross_project_work': len(dev_projects) > 1,
|
|
1365
|
+
'collaboration_score': min(100, len(collaborators) * 10) # Simple scoring
|
|
1366
|
+
}
|
|
1367
|
+
|
|
1368
|
+
def _calculate_developer_health_score(
|
|
1369
|
+
self,
|
|
1370
|
+
commits: List[Dict[str, Any]],
|
|
1371
|
+
dev_stats: Dict[str, Any]
|
|
1372
|
+
) -> Dict[str, Any]:
|
|
1373
|
+
"""Calculate health score for a specific developer."""
|
|
1374
|
+
|
|
1375
|
+
if not commits:
|
|
1376
|
+
return {"overall": 0, "components": {}, "rating": "no_data"}
|
|
1377
|
+
|
|
1378
|
+
scores = {}
|
|
1379
|
+
|
|
1380
|
+
# Activity score based on commits per week
|
|
1381
|
+
weekly_commits = self._get_weekly_commit_counts(commits)
|
|
1382
|
+
if weekly_commits:
|
|
1383
|
+
avg_weekly = statistics.mean(weekly_commits)
|
|
1384
|
+
activity_score = min(100, avg_weekly * 20) # Scale appropriately
|
|
1385
|
+
scores['activity'] = activity_score
|
|
1386
|
+
else:
|
|
1387
|
+
scores['activity'] = 0
|
|
1388
|
+
|
|
1389
|
+
# Consistency score
|
|
1390
|
+
if len(weekly_commits) > 1:
|
|
1391
|
+
consistency = max(0, 100 - (statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 50))
|
|
1392
|
+
scores['consistency'] = consistency
|
|
1393
|
+
else:
|
|
1394
|
+
scores['consistency'] = 50
|
|
1395
|
+
|
|
1396
|
+
# Engagement score (based on projects and commit sizes)
|
|
1397
|
+
project_count = len(set(c.get('project_key', 'UNKNOWN') for c in commits))
|
|
1398
|
+
engagement_score = min(100, project_count * 25 + 25) # Bonus for multi-project work
|
|
1399
|
+
scores['engagement'] = engagement_score
|
|
1400
|
+
|
|
1401
|
+
# Overall score
|
|
1402
|
+
overall_score = sum(scores.values()) / len(scores)
|
|
1403
|
+
|
|
1404
|
+
return {
|
|
1405
|
+
"overall": round(overall_score, 1),
|
|
1406
|
+
"components": {k: round(v, 1) for k, v in scores.items()},
|
|
1407
|
+
"rating": self._get_health_rating(overall_score)
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1410
|
+
def _identify_developer_achievements(
|
|
1411
|
+
self,
|
|
1412
|
+
commits: List[Dict[str, Any]],
|
|
1413
|
+
dev_stats: Dict[str, Any]
|
|
1414
|
+
) -> List[Dict[str, Any]]:
|
|
1415
|
+
"""Identify achievements for a developer."""
|
|
1416
|
+
|
|
1417
|
+
achievements = []
|
|
1418
|
+
|
|
1419
|
+
# High commit count
|
|
1420
|
+
if dev_stats['total_commits'] > 50:
|
|
1421
|
+
achievements.append({
|
|
1422
|
+
"type": "productivity",
|
|
1423
|
+
"title": "High Productivity",
|
|
1424
|
+
"description": f"{dev_stats['total_commits']} commits in analysis period",
|
|
1425
|
+
"badge": "prolific_contributor"
|
|
1426
|
+
})
|
|
1427
|
+
|
|
1428
|
+
# Multi-project contributor
|
|
1429
|
+
projects = set(c.get('project_key', 'UNKNOWN') for c in commits)
|
|
1430
|
+
if len(projects) > 3:
|
|
1431
|
+
achievements.append({
|
|
1432
|
+
"type": "versatility",
|
|
1433
|
+
"title": "Multi-Project Contributor",
|
|
1434
|
+
"description": f"Contributed to {len(projects)} projects",
|
|
1435
|
+
"badge": "versatile_developer"
|
|
1436
|
+
})
|
|
1437
|
+
|
|
1438
|
+
# Consistent contributor
|
|
1439
|
+
weekly_commits = self._get_weekly_commit_counts(commits)
|
|
1440
|
+
if len(weekly_commits) > 4:
|
|
1441
|
+
active_weeks = sum(1 for w in weekly_commits if w > 0)
|
|
1442
|
+
consistency_rate = active_weeks / len(weekly_commits)
|
|
1443
|
+
|
|
1444
|
+
if consistency_rate > 0.8:
|
|
1445
|
+
achievements.append({
|
|
1446
|
+
"type": "consistency",
|
|
1447
|
+
"title": "Consistent Contributor",
|
|
1448
|
+
"description": f"Active in {active_weeks} out of {len(weekly_commits)} weeks",
|
|
1449
|
+
"badge": "reliable_contributor"
|
|
1450
|
+
})
|
|
1451
|
+
|
|
1452
|
+
return achievements
|
|
1453
|
+
|
|
1454
|
+
def _identify_improvement_areas(
|
|
1455
|
+
self,
|
|
1456
|
+
commits: List[Dict[str, Any]],
|
|
1457
|
+
dev_stats: Dict[str, Any]
|
|
1458
|
+
) -> List[Dict[str, Any]]:
|
|
1459
|
+
"""Identify areas for improvement for a developer."""
|
|
1460
|
+
|
|
1461
|
+
improvements = []
|
|
1462
|
+
|
|
1463
|
+
# Check ticket linking
|
|
1464
|
+
commits_with_tickets = sum(1 for c in commits if c.get('ticket_references'))
|
|
1465
|
+
ticket_rate = (commits_with_tickets / len(commits)) * 100 if commits else 0
|
|
1466
|
+
|
|
1467
|
+
if ticket_rate < 50:
|
|
1468
|
+
improvements.append({
|
|
1469
|
+
"category": "process",
|
|
1470
|
+
"title": "Improve Ticket Linking",
|
|
1471
|
+
"description": f"Only {ticket_rate:.1f}% of commits reference tickets",
|
|
1472
|
+
"priority": "medium",
|
|
1473
|
+
"suggestion": "Include ticket references in commit messages"
|
|
1474
|
+
})
|
|
1475
|
+
|
|
1476
|
+
# Check commit size consistency
|
|
1477
|
+
commit_sizes = []
|
|
1478
|
+
for commit in commits:
|
|
1479
|
+
lines = (
|
|
1480
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
1481
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
1482
|
+
)
|
|
1483
|
+
commit_sizes.append(lines)
|
|
1484
|
+
|
|
1485
|
+
if commit_sizes and len(commit_sizes) > 5:
|
|
1486
|
+
avg_size = statistics.mean(commit_sizes)
|
|
1487
|
+
if avg_size > 300:
|
|
1488
|
+
improvements.append({
|
|
1489
|
+
"category": "quality",
|
|
1490
|
+
"title": "Consider Smaller Commits",
|
|
1491
|
+
"description": f"Average commit size is {avg_size:.0f} lines",
|
|
1492
|
+
"priority": "low",
|
|
1493
|
+
"suggestion": "Break down large changes into smaller, focused commits"
|
|
1494
|
+
})
|
|
1495
|
+
|
|
1496
|
+
return improvements
|
|
1497
|
+
|
|
1498
|
+
def _build_developer_activity_timeline(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
1499
|
+
"""Build activity timeline for a developer."""
|
|
1500
|
+
|
|
1501
|
+
if not commits:
|
|
1502
|
+
return []
|
|
1503
|
+
|
|
1504
|
+
# Group commits by week
|
|
1505
|
+
weekly_activity = defaultdict(lambda: {
|
|
1506
|
+
'commits': 0,
|
|
1507
|
+
'lines_changed': 0,
|
|
1508
|
+
'projects': set()
|
|
1509
|
+
})
|
|
1510
|
+
|
|
1511
|
+
for commit in commits:
|
|
1512
|
+
week_start = self._get_week_start(commit['timestamp'])
|
|
1513
|
+
week_key = week_start.strftime('%Y-%m-%d')
|
|
1514
|
+
|
|
1515
|
+
weekly_activity[week_key]['commits'] += 1
|
|
1516
|
+
|
|
1517
|
+
lines = (
|
|
1518
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
1519
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
1520
|
+
)
|
|
1521
|
+
weekly_activity[week_key]['lines_changed'] += lines
|
|
1522
|
+
weekly_activity[week_key]['projects'].add(commit.get('project_key', 'UNKNOWN'))
|
|
1523
|
+
|
|
1524
|
+
# Convert to timeline format
|
|
1525
|
+
timeline = []
|
|
1526
|
+
for week_key in sorted(weekly_activity.keys()):
|
|
1527
|
+
data = weekly_activity[week_key]
|
|
1528
|
+
timeline.append({
|
|
1529
|
+
'week': week_key,
|
|
1530
|
+
'commits': data['commits'],
|
|
1531
|
+
'lines_changed': data['lines_changed'],
|
|
1532
|
+
'projects': len(data['projects']),
|
|
1533
|
+
'project_list': sorted(list(data['projects']))
|
|
1534
|
+
})
|
|
1535
|
+
|
|
1536
|
+
return timeline
|
|
1537
|
+
|
|
1538
|
+
def _analyze_branching_patterns(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1539
|
+
"""Analyze branching and merge patterns."""
|
|
1540
|
+
|
|
1541
|
+
merge_commits = sum(1 for c in commits if c.get('is_merge'))
|
|
1542
|
+
total_commits = len(commits)
|
|
1543
|
+
|
|
1544
|
+
merge_rate = (merge_commits / total_commits) * 100 if total_commits > 0 else 0
|
|
1545
|
+
|
|
1546
|
+
# Determine branching strategy
|
|
1547
|
+
if merge_rate < 5:
|
|
1548
|
+
strategy = "linear"
|
|
1549
|
+
elif merge_rate < 15:
|
|
1550
|
+
strategy = "feature_branches"
|
|
1551
|
+
elif merge_rate < 30:
|
|
1552
|
+
strategy = "git_flow"
|
|
1553
|
+
else:
|
|
1554
|
+
strategy = "complex_branching"
|
|
1555
|
+
|
|
1556
|
+
return {
|
|
1557
|
+
"merge_commits": merge_commits,
|
|
1558
|
+
"merge_rate_percent": round(merge_rate, 1),
|
|
1559
|
+
"strategy": strategy,
|
|
1560
|
+
"complexity_rating": "low" if merge_rate < 15 else "medium" if merge_rate < 30 else "high"
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
def _analyze_commit_timing_patterns(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1564
|
+
"""Analyze when commits typically happen."""
|
|
1565
|
+
|
|
1566
|
+
if not commits:
|
|
1567
|
+
return {}
|
|
1568
|
+
|
|
1569
|
+
# Extract timing data
|
|
1570
|
+
hours = []
|
|
1571
|
+
days = []
|
|
1572
|
+
|
|
1573
|
+
for commit in commits:
|
|
1574
|
+
timestamp = commit['timestamp']
|
|
1575
|
+
# Use local hour if available
|
|
1576
|
+
if 'local_hour' in commit:
|
|
1577
|
+
hours.append(commit['local_hour'])
|
|
1578
|
+
elif hasattr(timestamp, 'hour'):
|
|
1579
|
+
hours.append(timestamp.hour)
|
|
1580
|
+
if hasattr(timestamp, 'weekday'):
|
|
1581
|
+
days.append(timestamp.weekday())
|
|
1582
|
+
|
|
1583
|
+
patterns = {}
|
|
1584
|
+
|
|
1585
|
+
if hours:
|
|
1586
|
+
# Hour distribution
|
|
1587
|
+
hour_counts = defaultdict(int)
|
|
1588
|
+
for hour in hours:
|
|
1589
|
+
hour_counts[hour] += 1
|
|
1590
|
+
|
|
1591
|
+
peak_hour = max(hour_counts, key=hour_counts.get)
|
|
1592
|
+
patterns['peak_hour'] = peak_hour
|
|
1593
|
+
patterns['peak_hour_commits'] = hour_counts[peak_hour]
|
|
1594
|
+
|
|
1595
|
+
# Time periods
|
|
1596
|
+
morning = sum(1 for h in hours if 6 <= h < 12)
|
|
1597
|
+
afternoon = sum(1 for h in hours if 12 <= h < 18)
|
|
1598
|
+
evening = sum(1 for h in hours if 18 <= h < 24)
|
|
1599
|
+
night = sum(1 for h in hours if 0 <= h < 6)
|
|
1600
|
+
|
|
1601
|
+
total = len(hours)
|
|
1602
|
+
patterns['time_distribution'] = {
|
|
1603
|
+
'morning_pct': round((morning / total) * 100, 1),
|
|
1604
|
+
'afternoon_pct': round((afternoon / total) * 100, 1),
|
|
1605
|
+
'evening_pct': round((evening / total) * 100, 1),
|
|
1606
|
+
'night_pct': round((night / total) * 100, 1)
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
if days:
|
|
1610
|
+
# Day distribution
|
|
1611
|
+
day_counts = defaultdict(int)
|
|
1612
|
+
for day in days:
|
|
1613
|
+
day_counts[day] += 1
|
|
1614
|
+
|
|
1615
|
+
peak_day = max(day_counts, key=day_counts.get)
|
|
1616
|
+
patterns['peak_day'] = self._get_day_name(peak_day)
|
|
1617
|
+
patterns['peak_day_commits'] = day_counts[peak_day]
|
|
1618
|
+
|
|
1619
|
+
# Weekday vs weekend
|
|
1620
|
+
weekday_commits = sum(1 for d in days if d < 5)
|
|
1621
|
+
weekend_commits = sum(1 for d in days if d >= 5)
|
|
1622
|
+
|
|
1623
|
+
total = len(days)
|
|
1624
|
+
patterns['weekday_pct'] = round((weekday_commits / total) * 100, 1)
|
|
1625
|
+
patterns['weekend_pct'] = round((weekend_commits / total) * 100, 1)
|
|
1626
|
+
|
|
1627
|
+
return patterns
|
|
1628
|
+
|
|
1629
|
+
def _analyze_pr_workflow(self, prs: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1630
|
+
"""Analyze pull request workflow patterns."""
|
|
1631
|
+
|
|
1632
|
+
if not prs:
|
|
1633
|
+
return {}
|
|
1634
|
+
|
|
1635
|
+
# PR lifecycle analysis
|
|
1636
|
+
lifetimes = []
|
|
1637
|
+
sizes = []
|
|
1638
|
+
review_counts = []
|
|
1639
|
+
|
|
1640
|
+
for pr in prs:
|
|
1641
|
+
# Calculate PR lifetime
|
|
1642
|
+
created = pr.get('created_at')
|
|
1643
|
+
merged = pr.get('merged_at')
|
|
1644
|
+
|
|
1645
|
+
if created and merged:
|
|
1646
|
+
if isinstance(created, str):
|
|
1647
|
+
created = datetime.fromisoformat(created.replace('Z', '+00:00'))
|
|
1648
|
+
if isinstance(merged, str):
|
|
1649
|
+
merged = datetime.fromisoformat(merged.replace('Z', '+00:00'))
|
|
1650
|
+
|
|
1651
|
+
lifetime_hours = (merged - created).total_seconds() / 3600
|
|
1652
|
+
lifetimes.append(lifetime_hours)
|
|
1653
|
+
|
|
1654
|
+
# PR size (additions + deletions)
|
|
1655
|
+
additions = pr.get('additions', 0)
|
|
1656
|
+
deletions = pr.get('deletions', 0)
|
|
1657
|
+
sizes.append(additions + deletions)
|
|
1658
|
+
|
|
1659
|
+
# Review comments
|
|
1660
|
+
review_comments = pr.get('review_comments', 0)
|
|
1661
|
+
review_counts.append(review_comments)
|
|
1662
|
+
|
|
1663
|
+
workflow = {}
|
|
1664
|
+
|
|
1665
|
+
if lifetimes:
|
|
1666
|
+
workflow['avg_lifetime_hours'] = round(statistics.mean(lifetimes), 1)
|
|
1667
|
+
workflow['median_lifetime_hours'] = round(statistics.median(lifetimes), 1)
|
|
1668
|
+
|
|
1669
|
+
if sizes:
|
|
1670
|
+
workflow['avg_pr_size'] = round(statistics.mean(sizes), 1)
|
|
1671
|
+
workflow['median_pr_size'] = round(statistics.median(sizes), 1)
|
|
1672
|
+
|
|
1673
|
+
if review_counts:
|
|
1674
|
+
workflow['avg_review_comments'] = round(statistics.mean(review_counts), 1)
|
|
1675
|
+
workflow['prs_with_reviews'] = sum(1 for r in review_counts if r > 0)
|
|
1676
|
+
workflow['review_rate_pct'] = round((workflow['prs_with_reviews'] / len(prs)) * 100, 1)
|
|
1677
|
+
|
|
1678
|
+
return workflow
|
|
1679
|
+
|
|
1680
|
+
def _analyze_git_pm_correlation(
|
|
1681
|
+
self,
|
|
1682
|
+
commits: List[Dict[str, Any]],
|
|
1683
|
+
pm_data: Dict[str, Any]
|
|
1684
|
+
) -> Dict[str, Any]:
|
|
1685
|
+
"""Analyze correlation between Git activity and PM platform data."""
|
|
1686
|
+
|
|
1687
|
+
correlations = pm_data.get('correlations', [])
|
|
1688
|
+
metrics = pm_data.get('metrics', {})
|
|
1689
|
+
|
|
1690
|
+
if not correlations:
|
|
1691
|
+
return {"status": "no_correlations"}
|
|
1692
|
+
|
|
1693
|
+
# Analyze correlation quality
|
|
1694
|
+
high_confidence = sum(1 for c in correlations if c.get('confidence', 0) > 0.8)
|
|
1695
|
+
medium_confidence = sum(1 for c in correlations if 0.5 <= c.get('confidence', 0) <= 0.8)
|
|
1696
|
+
low_confidence = sum(1 for c in correlations if c.get('confidence', 0) < 0.5)
|
|
1697
|
+
|
|
1698
|
+
total_correlations = len(correlations)
|
|
1699
|
+
|
|
1700
|
+
# Analyze correlation methods
|
|
1701
|
+
methods = defaultdict(int)
|
|
1702
|
+
for c in correlations:
|
|
1703
|
+
method = c.get('correlation_method', 'unknown')
|
|
1704
|
+
methods[method] += 1
|
|
1705
|
+
|
|
1706
|
+
# Story point accuracy analysis
|
|
1707
|
+
story_analysis = metrics.get('story_point_analysis', {})
|
|
1708
|
+
|
|
1709
|
+
return {
|
|
1710
|
+
"total_correlations": total_correlations,
|
|
1711
|
+
"confidence_distribution": {
|
|
1712
|
+
"high": high_confidence,
|
|
1713
|
+
"medium": medium_confidence,
|
|
1714
|
+
"low": low_confidence
|
|
1715
|
+
},
|
|
1716
|
+
"confidence_rates": {
|
|
1717
|
+
"high_pct": round((high_confidence / total_correlations) * 100, 1),
|
|
1718
|
+
"medium_pct": round((medium_confidence / total_correlations) * 100, 1),
|
|
1719
|
+
"low_pct": round((low_confidence / total_correlations) * 100, 1)
|
|
1720
|
+
},
|
|
1721
|
+
"correlation_methods": dict(methods),
|
|
1722
|
+
"story_point_analysis": story_analysis,
|
|
1723
|
+
"platforms": list(metrics.get('platform_coverage', {}).keys())
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
def _calculate_merge_commit_rate(self, commits: List[Dict[str, Any]]) -> float:
|
|
1727
|
+
"""Calculate percentage of merge commits."""
|
|
1728
|
+
if not commits:
|
|
1729
|
+
return 0
|
|
1730
|
+
|
|
1731
|
+
merge_commits = sum(1 for c in commits if c.get('is_merge'))
|
|
1732
|
+
return round((merge_commits / len(commits)) * 100, 1)
|
|
1733
|
+
|
|
1734
|
+
def _analyze_commit_message_quality(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1735
|
+
"""Analyze commit message quality patterns."""
|
|
1736
|
+
|
|
1737
|
+
if not commits:
|
|
1738
|
+
return {}
|
|
1739
|
+
|
|
1740
|
+
message_lengths = []
|
|
1741
|
+
has_ticket_ref = 0
|
|
1742
|
+
conventional_commits = 0
|
|
1743
|
+
|
|
1744
|
+
# Conventional commit prefixes
|
|
1745
|
+
conventional_prefixes = ['feat:', 'fix:', 'docs:', 'style:', 'refactor:', 'test:', 'chore:']
|
|
1746
|
+
|
|
1747
|
+
for commit in commits:
|
|
1748
|
+
message = commit.get('message', '')
|
|
1749
|
+
|
|
1750
|
+
# Message length (in words)
|
|
1751
|
+
word_count = len(message.split())
|
|
1752
|
+
message_lengths.append(word_count)
|
|
1753
|
+
|
|
1754
|
+
# Ticket reference check
|
|
1755
|
+
if commit.get('ticket_references'):
|
|
1756
|
+
has_ticket_ref += 1
|
|
1757
|
+
|
|
1758
|
+
# Conventional commit check
|
|
1759
|
+
if any(message.lower().startswith(prefix) for prefix in conventional_prefixes):
|
|
1760
|
+
conventional_commits += 1
|
|
1761
|
+
|
|
1762
|
+
total_commits = len(commits)
|
|
1763
|
+
|
|
1764
|
+
quality = {}
|
|
1765
|
+
|
|
1766
|
+
if message_lengths:
|
|
1767
|
+
quality['avg_message_length_words'] = round(statistics.mean(message_lengths), 1)
|
|
1768
|
+
quality['median_message_length_words'] = round(statistics.median(message_lengths), 1)
|
|
1769
|
+
|
|
1770
|
+
quality['ticket_reference_rate_pct'] = round((has_ticket_ref / total_commits) * 100, 1)
|
|
1771
|
+
quality['conventional_commit_rate_pct'] = round((conventional_commits / total_commits) * 100, 1)
|
|
1772
|
+
|
|
1773
|
+
# Quality rating
|
|
1774
|
+
score = 0
|
|
1775
|
+
if quality.get('avg_message_length_words', 0) >= 5:
|
|
1776
|
+
score += 25
|
|
1777
|
+
if quality.get('ticket_reference_rate_pct', 0) >= 50:
|
|
1778
|
+
score += 35
|
|
1779
|
+
if quality.get('conventional_commit_rate_pct', 0) >= 30:
|
|
1780
|
+
score += 40
|
|
1781
|
+
|
|
1782
|
+
if score >= 80:
|
|
1783
|
+
quality['overall_rating'] = 'excellent'
|
|
1784
|
+
elif score >= 60:
|
|
1785
|
+
quality['overall_rating'] = 'good'
|
|
1786
|
+
elif score >= 40:
|
|
1787
|
+
quality['overall_rating'] = 'fair'
|
|
1788
|
+
else:
|
|
1789
|
+
quality['overall_rating'] = 'needs_improvement'
|
|
1790
|
+
|
|
1791
|
+
return quality
|
|
1792
|
+
|
|
1793
|
+
def _generate_weekly_time_series(
|
|
1794
|
+
self,
|
|
1795
|
+
commits: List[Dict[str, Any]],
|
|
1796
|
+
prs: List[Dict[str, Any]],
|
|
1797
|
+
start_date: datetime,
|
|
1798
|
+
end_date: datetime
|
|
1799
|
+
) -> List[Dict[str, Any]]:
|
|
1800
|
+
"""Generate weekly time series data for charts."""
|
|
1801
|
+
|
|
1802
|
+
weekly_data = []
|
|
1803
|
+
current_date = start_date
|
|
1804
|
+
|
|
1805
|
+
while current_date <= end_date:
|
|
1806
|
+
week_end = current_date + timedelta(days=7)
|
|
1807
|
+
|
|
1808
|
+
# Filter commits for this week
|
|
1809
|
+
week_commits = []
|
|
1810
|
+
for c in commits:
|
|
1811
|
+
# Ensure both timestamps are timezone-aware for comparison
|
|
1812
|
+
commit_ts = c['timestamp']
|
|
1813
|
+
if hasattr(commit_ts, 'tzinfo') and commit_ts.tzinfo is None:
|
|
1814
|
+
# Make timezone-aware if needed
|
|
1815
|
+
commit_ts = commit_ts.replace(tzinfo=timezone.utc)
|
|
1816
|
+
elif not hasattr(commit_ts, 'tzinfo'):
|
|
1817
|
+
# Convert to datetime if needed
|
|
1818
|
+
commit_ts = datetime.fromisoformat(str(commit_ts))
|
|
1819
|
+
if commit_ts.tzinfo is None:
|
|
1820
|
+
commit_ts = commit_ts.replace(tzinfo=timezone.utc)
|
|
1821
|
+
|
|
1822
|
+
if current_date <= commit_ts < week_end:
|
|
1823
|
+
week_commits.append(c)
|
|
1824
|
+
|
|
1825
|
+
# Filter PRs for this week (by merge date)
|
|
1826
|
+
week_prs = []
|
|
1827
|
+
for pr in prs:
|
|
1828
|
+
merged_at = pr.get('merged_at')
|
|
1829
|
+
if merged_at:
|
|
1830
|
+
if isinstance(merged_at, str):
|
|
1831
|
+
merged_at = datetime.fromisoformat(merged_at.replace('Z', '+00:00'))
|
|
1832
|
+
# Ensure timezone-aware for comparison
|
|
1833
|
+
if hasattr(merged_at, 'tzinfo') and merged_at.tzinfo is None:
|
|
1834
|
+
merged_at = merged_at.replace(tzinfo=timezone.utc)
|
|
1835
|
+
if current_date <= merged_at < week_end:
|
|
1836
|
+
week_prs.append(pr)
|
|
1837
|
+
|
|
1838
|
+
# Calculate metrics
|
|
1839
|
+
lines_changed = sum(
|
|
1840
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
1841
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
1842
|
+
for c in week_commits
|
|
1843
|
+
)
|
|
1844
|
+
|
|
1845
|
+
story_points = sum(c.get('story_points', 0) or 0 for c in week_commits)
|
|
1846
|
+
|
|
1847
|
+
active_developers = len(set(
|
|
1848
|
+
c.get('canonical_id', c.get('author_email'))
|
|
1849
|
+
for c in week_commits
|
|
1850
|
+
))
|
|
1851
|
+
|
|
1852
|
+
weekly_data.append({
|
|
1853
|
+
'date': current_date.strftime('%Y-%m-%d'),
|
|
1854
|
+
'commits': len(week_commits),
|
|
1855
|
+
'lines_changed': lines_changed,
|
|
1856
|
+
'story_points': story_points,
|
|
1857
|
+
'active_developers': active_developers,
|
|
1858
|
+
'pull_requests': len(week_prs)
|
|
1859
|
+
})
|
|
1860
|
+
|
|
1861
|
+
current_date = week_end
|
|
1862
|
+
|
|
1863
|
+
return weekly_data
|
|
1864
|
+
|
|
1865
|
+
def _generate_daily_time_series(
|
|
1866
|
+
self,
|
|
1867
|
+
commits: List[Dict[str, Any]],
|
|
1868
|
+
prs: List[Dict[str, Any]],
|
|
1869
|
+
start_date: datetime,
|
|
1870
|
+
end_date: datetime
|
|
1871
|
+
) -> List[Dict[str, Any]]:
|
|
1872
|
+
"""Generate daily time series data for detailed analysis."""
|
|
1873
|
+
|
|
1874
|
+
daily_data = []
|
|
1875
|
+
current_date = start_date
|
|
1876
|
+
|
|
1877
|
+
while current_date <= end_date:
|
|
1878
|
+
day_end = current_date + timedelta(days=1)
|
|
1879
|
+
|
|
1880
|
+
# Filter commits for this day
|
|
1881
|
+
day_commits = []
|
|
1882
|
+
for c in commits:
|
|
1883
|
+
# Ensure both timestamps are timezone-aware for comparison
|
|
1884
|
+
commit_ts = c['timestamp']
|
|
1885
|
+
if hasattr(commit_ts, 'tzinfo') and commit_ts.tzinfo is None:
|
|
1886
|
+
# Make timezone-aware if needed
|
|
1887
|
+
commit_ts = commit_ts.replace(tzinfo=timezone.utc)
|
|
1888
|
+
elif not hasattr(commit_ts, 'tzinfo'):
|
|
1889
|
+
# Convert to datetime if needed
|
|
1890
|
+
commit_ts = datetime.fromisoformat(str(commit_ts))
|
|
1891
|
+
if commit_ts.tzinfo is None:
|
|
1892
|
+
commit_ts = commit_ts.replace(tzinfo=timezone.utc)
|
|
1893
|
+
|
|
1894
|
+
if current_date <= commit_ts < day_end:
|
|
1895
|
+
day_commits.append(c)
|
|
1896
|
+
|
|
1897
|
+
daily_data.append({
|
|
1898
|
+
'date': current_date.strftime('%Y-%m-%d'),
|
|
1899
|
+
'commits': len(day_commits)
|
|
1900
|
+
})
|
|
1901
|
+
|
|
1902
|
+
current_date = day_end
|
|
1903
|
+
|
|
1904
|
+
return daily_data
|
|
1905
|
+
|
|
1906
|
+
def _generate_quantitative_insights(
|
|
1907
|
+
self,
|
|
1908
|
+
commits: List[Dict[str, Any]],
|
|
1909
|
+
developer_stats: List[Dict[str, Any]]
|
|
1910
|
+
) -> List[Dict[str, Any]]:
|
|
1911
|
+
"""Generate quantitative insights from data analysis."""
|
|
1912
|
+
|
|
1913
|
+
insights = []
|
|
1914
|
+
|
|
1915
|
+
# Team productivity insights
|
|
1916
|
+
total_commits = len(commits)
|
|
1917
|
+
if total_commits > 0:
|
|
1918
|
+
weekly_commits = self._get_weekly_commit_counts(commits)
|
|
1919
|
+
if weekly_commits:
|
|
1920
|
+
avg_weekly = statistics.mean(weekly_commits)
|
|
1921
|
+
insights.append({
|
|
1922
|
+
"category": "productivity",
|
|
1923
|
+
"type": "metric",
|
|
1924
|
+
"title": "Weekly Commit Rate",
|
|
1925
|
+
"description": f"Team averages {avg_weekly:.1f} commits per week",
|
|
1926
|
+
"value": avg_weekly,
|
|
1927
|
+
"trend": self._calculate_simple_trend(weekly_commits),
|
|
1928
|
+
"priority": "medium"
|
|
1929
|
+
})
|
|
1930
|
+
|
|
1931
|
+
# Developer distribution insights
|
|
1932
|
+
if len(developer_stats) > 1:
|
|
1933
|
+
commit_counts = [dev['total_commits'] for dev in developer_stats]
|
|
1934
|
+
gini = self._calculate_gini_coefficient(commit_counts)
|
|
1935
|
+
|
|
1936
|
+
if gini > 0.7:
|
|
1937
|
+
insights.append({
|
|
1938
|
+
"category": "team",
|
|
1939
|
+
"type": "concern",
|
|
1940
|
+
"title": "Unbalanced Contributions",
|
|
1941
|
+
"description": f"Work is concentrated among few developers (Gini: {gini:.2f})",
|
|
1942
|
+
"value": gini,
|
|
1943
|
+
"priority": "high",
|
|
1944
|
+
"recommendation": "Consider distributing work more evenly"
|
|
1945
|
+
})
|
|
1946
|
+
elif gini < 0.3:
|
|
1947
|
+
insights.append({
|
|
1948
|
+
"category": "team",
|
|
1949
|
+
"type": "positive",
|
|
1950
|
+
"title": "Balanced Team Contributions",
|
|
1951
|
+
"description": f"Work is well-distributed across the team (Gini: {gini:.2f})",
|
|
1952
|
+
"value": gini,
|
|
1953
|
+
"priority": "low"
|
|
1954
|
+
})
|
|
1955
|
+
|
|
1956
|
+
# Code quality insights
|
|
1957
|
+
commit_sizes = []
|
|
1958
|
+
for commit in commits:
|
|
1959
|
+
lines = (
|
|
1960
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
1961
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
1962
|
+
)
|
|
1963
|
+
commit_sizes.append(lines)
|
|
1964
|
+
|
|
1965
|
+
if commit_sizes:
|
|
1966
|
+
avg_size = statistics.mean(commit_sizes)
|
|
1967
|
+
if avg_size > 300:
|
|
1968
|
+
insights.append({
|
|
1969
|
+
"category": "quality",
|
|
1970
|
+
"type": "concern",
|
|
1971
|
+
"title": "Large Commit Sizes",
|
|
1972
|
+
"description": f"Average commit size is {avg_size:.0f} lines",
|
|
1973
|
+
"value": avg_size,
|
|
1974
|
+
"priority": "medium",
|
|
1975
|
+
"recommendation": "Consider breaking down changes into smaller commits"
|
|
1976
|
+
})
|
|
1977
|
+
elif 20 <= avg_size <= 200:
|
|
1978
|
+
insights.append({
|
|
1979
|
+
"category": "quality",
|
|
1980
|
+
"type": "positive",
|
|
1981
|
+
"title": "Optimal Commit Sizes",
|
|
1982
|
+
"description": f"Average commit size of {avg_size:.0f} lines indicates good change management",
|
|
1983
|
+
"value": avg_size,
|
|
1984
|
+
"priority": "low"
|
|
1985
|
+
})
|
|
1986
|
+
|
|
1987
|
+
return insights
|
|
1988
|
+
|
|
1989
|
+
def _process_qualitative_insights(self, qualitative_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
1990
|
+
"""Process qualitative analysis results into insights."""
|
|
1991
|
+
|
|
1992
|
+
insights = []
|
|
1993
|
+
|
|
1994
|
+
for item in qualitative_data:
|
|
1995
|
+
# Transform qualitative data into insight format
|
|
1996
|
+
insight = {
|
|
1997
|
+
"category": item.get('category', 'general'),
|
|
1998
|
+
"type": "qualitative",
|
|
1999
|
+
"title": item.get('insight', 'Qualitative Insight'),
|
|
2000
|
+
"description": item.get('description', ''),
|
|
2001
|
+
"priority": item.get('priority', 'medium'),
|
|
2002
|
+
"confidence": item.get('confidence', 0.5)
|
|
2003
|
+
}
|
|
2004
|
+
|
|
2005
|
+
if 'recommendation' in item:
|
|
2006
|
+
insight['recommendation'] = item['recommendation']
|
|
2007
|
+
|
|
2008
|
+
insights.append(insight)
|
|
2009
|
+
|
|
2010
|
+
return insights
|
|
2011
|
+
|
|
2012
|
+
def _prioritize_insights(self, insights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
2013
|
+
"""Prioritize insights by importance and impact."""
|
|
2014
|
+
|
|
2015
|
+
def get_priority_score(insight):
|
|
2016
|
+
priority_scores = {'high': 3, 'medium': 2, 'low': 1}
|
|
2017
|
+
type_scores = {'concern': 3, 'positive': 1, 'metric': 2, 'qualitative': 2}
|
|
2018
|
+
|
|
2019
|
+
priority_score = priority_scores.get(insight.get('priority', 'medium'), 2)
|
|
2020
|
+
type_score = type_scores.get(insight.get('type', 'metric'), 2)
|
|
2021
|
+
|
|
2022
|
+
return priority_score + type_score
|
|
2023
|
+
|
|
2024
|
+
# Sort by priority score (descending)
|
|
2025
|
+
prioritized = sorted(insights, key=get_priority_score, reverse=True)
|
|
2026
|
+
|
|
2027
|
+
return prioritized[:10] # Return top 10 insights
|
|
2028
|
+
|
|
2029
|
+
def _categorize_insights(self, insights: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
|
|
2030
|
+
"""Categorize insights by category."""
|
|
2031
|
+
|
|
2032
|
+
categories = defaultdict(list)
|
|
2033
|
+
|
|
2034
|
+
for insight in insights:
|
|
2035
|
+
category = insight.get('category', 'general')
|
|
2036
|
+
categories[category].append(insight)
|
|
2037
|
+
|
|
2038
|
+
return dict(categories)
|
|
2039
|
+
|
|
2040
|
+
def _build_untracked_analysis(
|
|
2041
|
+
self,
|
|
2042
|
+
commits: List[Dict[str, Any]],
|
|
2043
|
+
project_metrics: Dict[str, Any]
|
|
2044
|
+
) -> Dict[str, Any]:
|
|
2045
|
+
"""Build comprehensive untracked commit analysis for JSON export.
|
|
2046
|
+
|
|
2047
|
+
WHY: Untracked work analysis is critical for understanding what development
|
|
2048
|
+
activities are happening outside the formal process. This data enables
|
|
2049
|
+
process improvements, training identification, and better project visibility.
|
|
2050
|
+
|
|
2051
|
+
Args:
|
|
2052
|
+
commits: List of all commits
|
|
2053
|
+
project_metrics: Project metrics including ticket analysis
|
|
2054
|
+
|
|
2055
|
+
Returns:
|
|
2056
|
+
Dictionary with comprehensive untracked analysis
|
|
2057
|
+
"""
|
|
2058
|
+
ticket_analysis = project_metrics.get('ticket_analysis', {})
|
|
2059
|
+
untracked_commits = ticket_analysis.get('untracked_commits', [])
|
|
2060
|
+
|
|
2061
|
+
if not untracked_commits:
|
|
2062
|
+
return {
|
|
2063
|
+
"summary": {
|
|
2064
|
+
"total_untracked": 0,
|
|
2065
|
+
"untracked_percentage": 0,
|
|
2066
|
+
"analysis_status": "no_untracked_commits"
|
|
2067
|
+
},
|
|
2068
|
+
"categories": {},
|
|
2069
|
+
"contributors": {},
|
|
2070
|
+
"projects": {},
|
|
2071
|
+
"trends": {},
|
|
2072
|
+
"recommendations": []
|
|
2073
|
+
}
|
|
2074
|
+
|
|
2075
|
+
# Initialize analysis structures
|
|
2076
|
+
categories = {}
|
|
2077
|
+
contributors = {}
|
|
2078
|
+
projects = {}
|
|
2079
|
+
monthly_trends = {}
|
|
2080
|
+
|
|
2081
|
+
total_commits = ticket_analysis.get('total_commits', len(commits))
|
|
2082
|
+
total_untracked = len(untracked_commits)
|
|
2083
|
+
|
|
2084
|
+
# Process each untracked commit
|
|
2085
|
+
for commit in untracked_commits:
|
|
2086
|
+
# Category analysis
|
|
2087
|
+
category = commit.get('category', 'other')
|
|
2088
|
+
if category not in categories:
|
|
2089
|
+
categories[category] = {
|
|
2090
|
+
'count': 0,
|
|
2091
|
+
'lines_changed': 0,
|
|
2092
|
+
'files_changed': 0,
|
|
2093
|
+
'examples': [],
|
|
2094
|
+
'authors': set()
|
|
2095
|
+
}
|
|
2096
|
+
|
|
2097
|
+
categories[category]['count'] += 1
|
|
2098
|
+
categories[category]['lines_changed'] += commit.get('lines_changed', 0)
|
|
2099
|
+
categories[category]['files_changed'] += commit.get('files_changed', 0)
|
|
2100
|
+
categories[category]['authors'].add(commit.get('canonical_id', commit.get('author_email', 'Unknown')))
|
|
2101
|
+
|
|
2102
|
+
if len(categories[category]['examples']) < 3:
|
|
2103
|
+
categories[category]['examples'].append({
|
|
2104
|
+
'hash': commit.get('hash', ''),
|
|
2105
|
+
'message': commit.get('message', '')[:200],
|
|
2106
|
+
'author': self._anonymize_value(commit.get('author', 'Unknown'), 'name'),
|
|
2107
|
+
'timestamp': commit.get('timestamp'),
|
|
2108
|
+
'lines_changed': commit.get('lines_changed', 0),
|
|
2109
|
+
'files_changed': commit.get('files_changed', 0)
|
|
2110
|
+
})
|
|
2111
|
+
|
|
2112
|
+
# Contributor analysis
|
|
2113
|
+
author_id = commit.get('canonical_id', commit.get('author_email', 'Unknown'))
|
|
2114
|
+
author_name = self._anonymize_value(commit.get('author', 'Unknown'), 'name')
|
|
2115
|
+
|
|
2116
|
+
if author_id not in contributors:
|
|
2117
|
+
contributors[author_id] = {
|
|
2118
|
+
'name': author_name,
|
|
2119
|
+
'count': 0,
|
|
2120
|
+
'lines_changed': 0,
|
|
2121
|
+
'categories': set(),
|
|
2122
|
+
'projects': set(),
|
|
2123
|
+
'recent_commits': []
|
|
2124
|
+
}
|
|
2125
|
+
|
|
2126
|
+
contributors[author_id]['count'] += 1
|
|
2127
|
+
contributors[author_id]['lines_changed'] += commit.get('lines_changed', 0)
|
|
2128
|
+
contributors[author_id]['categories'].add(category)
|
|
2129
|
+
contributors[author_id]['projects'].add(commit.get('project_key', 'UNKNOWN'))
|
|
2130
|
+
|
|
2131
|
+
if len(contributors[author_id]['recent_commits']) < 5:
|
|
2132
|
+
contributors[author_id]['recent_commits'].append({
|
|
2133
|
+
'hash': commit.get('hash', ''),
|
|
2134
|
+
'message': commit.get('message', '')[:100],
|
|
2135
|
+
'category': category,
|
|
2136
|
+
'timestamp': commit.get('timestamp'),
|
|
2137
|
+
'lines_changed': commit.get('lines_changed', 0)
|
|
2138
|
+
})
|
|
2139
|
+
|
|
2140
|
+
# Project analysis
|
|
2141
|
+
project = commit.get('project_key', 'UNKNOWN')
|
|
2142
|
+
if project not in projects:
|
|
2143
|
+
projects[project] = {
|
|
2144
|
+
'count': 0,
|
|
2145
|
+
'lines_changed': 0,
|
|
2146
|
+
'categories': set(),
|
|
2147
|
+
'contributors': set(),
|
|
2148
|
+
'avg_commit_size': 0
|
|
2149
|
+
}
|
|
2150
|
+
|
|
2151
|
+
projects[project]['count'] += 1
|
|
2152
|
+
projects[project]['lines_changed'] += commit.get('lines_changed', 0)
|
|
2153
|
+
projects[project]['categories'].add(category)
|
|
2154
|
+
projects[project]['contributors'].add(author_id)
|
|
2155
|
+
|
|
2156
|
+
# Monthly trend analysis
|
|
2157
|
+
timestamp = commit.get('timestamp')
|
|
2158
|
+
if timestamp and hasattr(timestamp, 'strftime'):
|
|
2159
|
+
month_key = timestamp.strftime('%Y-%m')
|
|
2160
|
+
if month_key not in monthly_trends:
|
|
2161
|
+
monthly_trends[month_key] = {
|
|
2162
|
+
'count': 0,
|
|
2163
|
+
'categories': {},
|
|
2164
|
+
'contributors': set()
|
|
2165
|
+
}
|
|
2166
|
+
monthly_trends[month_key]['count'] += 1
|
|
2167
|
+
monthly_trends[month_key]['contributors'].add(author_id)
|
|
2168
|
+
|
|
2169
|
+
if category not in monthly_trends[month_key]['categories']:
|
|
2170
|
+
monthly_trends[month_key]['categories'][category] = 0
|
|
2171
|
+
monthly_trends[month_key]['categories'][category] += 1
|
|
2172
|
+
|
|
2173
|
+
# Convert sets to lists and calculate derived metrics
|
|
2174
|
+
for category_data in categories.values():
|
|
2175
|
+
category_data['authors'] = len(category_data['authors'])
|
|
2176
|
+
category_data['avg_lines_per_commit'] = (
|
|
2177
|
+
category_data['lines_changed'] / category_data['count']
|
|
2178
|
+
if category_data['count'] > 0 else 0
|
|
2179
|
+
)
|
|
2180
|
+
|
|
2181
|
+
for contributor_data in contributors.values():
|
|
2182
|
+
contributor_data['categories'] = list(contributor_data['categories'])
|
|
2183
|
+
contributor_data['projects'] = list(contributor_data['projects'])
|
|
2184
|
+
contributor_data['avg_lines_per_commit'] = (
|
|
2185
|
+
contributor_data['lines_changed'] / contributor_data['count']
|
|
2186
|
+
if contributor_data['count'] > 0 else 0
|
|
2187
|
+
)
|
|
2188
|
+
|
|
2189
|
+
for project_data in projects.values():
|
|
2190
|
+
project_data['categories'] = list(project_data['categories'])
|
|
2191
|
+
project_data['contributors'] = len(project_data['contributors'])
|
|
2192
|
+
project_data['avg_commit_size'] = (
|
|
2193
|
+
project_data['lines_changed'] / project_data['count']
|
|
2194
|
+
if project_data['count'] > 0 else 0
|
|
2195
|
+
)
|
|
2196
|
+
|
|
2197
|
+
# Convert sets to counts in trends
|
|
2198
|
+
for trend_data in monthly_trends.values():
|
|
2199
|
+
trend_data['contributors'] = len(trend_data['contributors'])
|
|
2200
|
+
|
|
2201
|
+
# Generate insights and recommendations
|
|
2202
|
+
insights = self._generate_untracked_insights(categories, contributors, projects, total_untracked, total_commits)
|
|
2203
|
+
recommendations = self._generate_untracked_recommendations_json(categories, contributors, total_untracked, total_commits)
|
|
2204
|
+
|
|
2205
|
+
# Calculate quality scores
|
|
2206
|
+
quality_scores = self._calculate_untracked_quality_scores(categories, total_untracked, total_commits)
|
|
2207
|
+
|
|
2208
|
+
return {
|
|
2209
|
+
"summary": {
|
|
2210
|
+
"total_untracked": total_untracked,
|
|
2211
|
+
"total_commits": total_commits,
|
|
2212
|
+
"untracked_percentage": round((total_untracked / total_commits * 100), 2) if total_commits > 0 else 0,
|
|
2213
|
+
"avg_lines_per_untracked_commit": round(
|
|
2214
|
+
sum(commit.get('lines_changed', 0) for commit in untracked_commits) / total_untracked, 1
|
|
2215
|
+
) if total_untracked > 0 else 0,
|
|
2216
|
+
"analysis_status": "complete"
|
|
2217
|
+
},
|
|
2218
|
+
"categories": categories,
|
|
2219
|
+
"contributors": contributors,
|
|
2220
|
+
"projects": projects,
|
|
2221
|
+
"monthly_trends": monthly_trends,
|
|
2222
|
+
"insights": insights,
|
|
2223
|
+
"recommendations": recommendations,
|
|
2224
|
+
"quality_scores": quality_scores
|
|
2225
|
+
}
|
|
2226
|
+
|
|
2227
|
+
def _generate_untracked_insights(
|
|
2228
|
+
self,
|
|
2229
|
+
categories: Dict[str, Any],
|
|
2230
|
+
contributors: Dict[str, Any],
|
|
2231
|
+
projects: Dict[str, Any],
|
|
2232
|
+
total_untracked: int,
|
|
2233
|
+
total_commits: int
|
|
2234
|
+
) -> List[Dict[str, Any]]:
|
|
2235
|
+
"""Generate insights from untracked commit analysis."""
|
|
2236
|
+
insights = []
|
|
2237
|
+
|
|
2238
|
+
# Category insights
|
|
2239
|
+
if categories:
|
|
2240
|
+
top_category = max(categories.items(), key=lambda x: x[1]['count'])
|
|
2241
|
+
category_name, category_data = top_category
|
|
2242
|
+
category_pct = (category_data['count'] / total_untracked * 100)
|
|
2243
|
+
|
|
2244
|
+
if category_name in ['feature', 'bug_fix']:
|
|
2245
|
+
insights.append({
|
|
2246
|
+
'type': 'concern',
|
|
2247
|
+
'category': 'process',
|
|
2248
|
+
'title': f'High {category_name.replace("_", " ").title()} Untracked Rate',
|
|
2249
|
+
'description': f'{category_pct:.1f}% of untracked work is {category_name.replace("_", " ")} development',
|
|
2250
|
+
'impact': 'high',
|
|
2251
|
+
'value': category_pct
|
|
2252
|
+
})
|
|
2253
|
+
elif category_name in ['maintenance', 'style', 'documentation']:
|
|
2254
|
+
insights.append({
|
|
2255
|
+
'type': 'positive',
|
|
2256
|
+
'category': 'process',
|
|
2257
|
+
'title': f'Appropriate Untracked Work',
|
|
2258
|
+
'description': f'{category_pct:.1f}% of untracked work is {category_name} - this is acceptable',
|
|
2259
|
+
'impact': 'low',
|
|
2260
|
+
'value': category_pct
|
|
2261
|
+
})
|
|
2262
|
+
|
|
2263
|
+
# Contributor concentration insights
|
|
2264
|
+
if len(contributors) > 1:
|
|
2265
|
+
contributor_counts = [data['count'] for data in contributors.values()]
|
|
2266
|
+
max_contributor_count = max(contributor_counts)
|
|
2267
|
+
contributor_concentration = (max_contributor_count / total_untracked * 100)
|
|
2268
|
+
|
|
2269
|
+
if contributor_concentration > 50:
|
|
2270
|
+
insights.append({
|
|
2271
|
+
'type': 'concern',
|
|
2272
|
+
'category': 'team',
|
|
2273
|
+
'title': 'Concentrated Untracked Work',
|
|
2274
|
+
'description': f'One developer accounts for {contributor_concentration:.1f}% of untracked commits',
|
|
2275
|
+
'impact': 'medium',
|
|
2276
|
+
'value': contributor_concentration
|
|
2277
|
+
})
|
|
2278
|
+
|
|
2279
|
+
# Overall coverage insight
|
|
2280
|
+
untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
|
|
2281
|
+
if untracked_pct > 40:
|
|
2282
|
+
insights.append({
|
|
2283
|
+
'type': 'concern',
|
|
2284
|
+
'category': 'coverage',
|
|
2285
|
+
'title': 'High Untracked Rate',
|
|
2286
|
+
'description': f'{untracked_pct:.1f}% of all commits lack ticket references',
|
|
2287
|
+
'impact': 'high',
|
|
2288
|
+
'value': untracked_pct
|
|
2289
|
+
})
|
|
2290
|
+
elif untracked_pct < 15:
|
|
2291
|
+
insights.append({
|
|
2292
|
+
'type': 'positive',
|
|
2293
|
+
'category': 'coverage',
|
|
2294
|
+
'title': 'Excellent Tracking Coverage',
|
|
2295
|
+
'description': f'Only {untracked_pct:.1f}% of commits are untracked',
|
|
2296
|
+
'impact': 'low',
|
|
2297
|
+
'value': untracked_pct
|
|
2298
|
+
})
|
|
2299
|
+
|
|
2300
|
+
return insights
|
|
2301
|
+
|
|
2302
|
+
def _generate_untracked_recommendations_json(
|
|
2303
|
+
self,
|
|
2304
|
+
categories: Dict[str, Any],
|
|
2305
|
+
contributors: Dict[str, Any],
|
|
2306
|
+
total_untracked: int,
|
|
2307
|
+
total_commits: int
|
|
2308
|
+
) -> List[Dict[str, Any]]:
|
|
2309
|
+
"""Generate JSON-formatted recommendations for untracked work."""
|
|
2310
|
+
recommendations = []
|
|
2311
|
+
|
|
2312
|
+
# Category-based recommendations
|
|
2313
|
+
feature_count = categories.get('feature', {}).get('count', 0)
|
|
2314
|
+
bug_fix_count = categories.get('bug_fix', {}).get('count', 0)
|
|
2315
|
+
|
|
2316
|
+
if feature_count > total_untracked * 0.25:
|
|
2317
|
+
recommendations.append({
|
|
2318
|
+
'type': 'process_improvement',
|
|
2319
|
+
'priority': 'high',
|
|
2320
|
+
'title': 'Enforce Feature Ticket Requirements',
|
|
2321
|
+
'description': 'Many feature developments lack ticket references',
|
|
2322
|
+
'action': 'Require ticket creation and referencing for all new features',
|
|
2323
|
+
'expected_impact': 'Improved project visibility and planning',
|
|
2324
|
+
'effort': 'low'
|
|
2325
|
+
})
|
|
2326
|
+
|
|
2327
|
+
if bug_fix_count > total_untracked * 0.20:
|
|
2328
|
+
recommendations.append({
|
|
2329
|
+
'type': 'process_improvement',
|
|
2330
|
+
'priority': 'high',
|
|
2331
|
+
'title': 'Link Bug Fixes to Issues',
|
|
2332
|
+
'description': 'Bug fixes should be tracked through issue management',
|
|
2333
|
+
'action': 'Create issues for bugs and reference them in fix commits',
|
|
2334
|
+
'expected_impact': 'Better bug tracking and resolution visibility',
|
|
2335
|
+
'effort': 'low'
|
|
2336
|
+
})
|
|
2337
|
+
|
|
2338
|
+
# Coverage-based recommendations
|
|
2339
|
+
untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
|
|
2340
|
+
if untracked_pct > 40:
|
|
2341
|
+
recommendations.append({
|
|
2342
|
+
'type': 'team_training',
|
|
2343
|
+
'priority': 'medium',
|
|
2344
|
+
'title': 'Team Process Training',
|
|
2345
|
+
'description': 'High percentage of untracked commits indicates process gaps',
|
|
2346
|
+
'action': 'Provide training on ticket referencing and commit best practices',
|
|
2347
|
+
'expected_impact': 'Improved process adherence and visibility',
|
|
2348
|
+
'effort': 'medium'
|
|
2349
|
+
})
|
|
2350
|
+
|
|
2351
|
+
# Developer-specific recommendations
|
|
2352
|
+
if len(contributors) > 1:
|
|
2353
|
+
max_contributor_pct = max(
|
|
2354
|
+
(data['count'] / total_untracked * 100) for data in contributors.values()
|
|
2355
|
+
)
|
|
2356
|
+
if max_contributor_pct > 40:
|
|
2357
|
+
recommendations.append({
|
|
2358
|
+
'type': 'individual_coaching',
|
|
2359
|
+
'priority': 'medium',
|
|
2360
|
+
'title': 'Targeted Developer Coaching',
|
|
2361
|
+
'description': 'Some developers need additional guidance on process',
|
|
2362
|
+
'action': 'Provide one-on-one coaching for developers with high untracked rates',
|
|
2363
|
+
'expected_impact': 'More consistent process adherence across the team',
|
|
2364
|
+
'effort': 'low'
|
|
2365
|
+
})
|
|
2366
|
+
|
|
2367
|
+
return recommendations
|
|
2368
|
+
|
|
2369
|
+
def _calculate_untracked_quality_scores(
|
|
2370
|
+
self,
|
|
2371
|
+
categories: Dict[str, Any],
|
|
2372
|
+
total_untracked: int,
|
|
2373
|
+
total_commits: int
|
|
2374
|
+
) -> Dict[str, Any]:
|
|
2375
|
+
"""Calculate quality scores for untracked work patterns."""
|
|
2376
|
+
scores = {}
|
|
2377
|
+
|
|
2378
|
+
# Process adherence score (lower untracked % = higher score)
|
|
2379
|
+
untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
|
|
2380
|
+
process_score = max(0, 100 - untracked_pct * 2) # Scale so 50% untracked = 0 score
|
|
2381
|
+
scores['process_adherence'] = round(min(100, process_score), 1)
|
|
2382
|
+
|
|
2383
|
+
# Appropriate untracked score (higher % of maintenance/docs/style = higher score)
|
|
2384
|
+
appropriate_categories = ['maintenance', 'documentation', 'style', 'test']
|
|
2385
|
+
appropriate_count = sum(
|
|
2386
|
+
categories.get(cat, {}).get('count', 0) for cat in appropriate_categories
|
|
2387
|
+
)
|
|
2388
|
+
appropriate_pct = (appropriate_count / total_untracked * 100) if total_untracked > 0 else 0
|
|
2389
|
+
scores['appropriate_untracked'] = round(appropriate_pct, 1)
|
|
2390
|
+
|
|
2391
|
+
# Work type balance score
|
|
2392
|
+
if categories:
|
|
2393
|
+
category_counts = [data['count'] for data in categories.values()]
|
|
2394
|
+
# Calculate distribution balance (lower Gini = more balanced)
|
|
2395
|
+
gini = self._calculate_gini_coefficient(category_counts)
|
|
2396
|
+
balance_score = max(0, 100 - (gini * 100))
|
|
2397
|
+
scores['work_type_balance'] = round(balance_score, 1)
|
|
2398
|
+
else:
|
|
2399
|
+
scores['work_type_balance'] = 100
|
|
2400
|
+
|
|
2401
|
+
# Overall untracked quality score
|
|
2402
|
+
overall_score = (
|
|
2403
|
+
scores['process_adherence'] * 0.5 +
|
|
2404
|
+
scores['appropriate_untracked'] * 0.3 +
|
|
2405
|
+
scores['work_type_balance'] * 0.2
|
|
2406
|
+
)
|
|
2407
|
+
scores['overall'] = round(overall_score, 1)
|
|
2408
|
+
|
|
2409
|
+
# Quality rating
|
|
2410
|
+
if overall_score >= 80:
|
|
2411
|
+
rating = 'excellent'
|
|
2412
|
+
elif overall_score >= 60:
|
|
2413
|
+
rating = 'good'
|
|
2414
|
+
elif overall_score >= 40:
|
|
2415
|
+
rating = 'fair'
|
|
2416
|
+
else:
|
|
2417
|
+
rating = 'needs_improvement'
|
|
2418
|
+
|
|
2419
|
+
scores['rating'] = rating
|
|
2420
|
+
|
|
2421
|
+
return scores
|
|
2422
|
+
|
|
2423
|
+
def _generate_actionable_recommendations(self, insights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
2424
|
+
"""Generate actionable recommendations from insights."""
|
|
2425
|
+
|
|
2426
|
+
recommendations = []
|
|
2427
|
+
|
|
2428
|
+
# Extract recommendations from insights
|
|
2429
|
+
for insight in insights:
|
|
2430
|
+
if 'recommendation' in insight and insight.get('type') == 'concern':
|
|
2431
|
+
recommendations.append({
|
|
2432
|
+
"title": insight['title'],
|
|
2433
|
+
"action": insight['recommendation'],
|
|
2434
|
+
"priority": insight.get('priority', 'medium'),
|
|
2435
|
+
"category": insight.get('category', 'general'),
|
|
2436
|
+
"expected_impact": self._estimate_recommendation_impact(insight)
|
|
2437
|
+
})
|
|
2438
|
+
|
|
2439
|
+
# Add general recommendations based on patterns
|
|
2440
|
+
self._add_general_recommendations(recommendations, insights)
|
|
2441
|
+
|
|
2442
|
+
return recommendations[:5] # Return top 5 recommendations
|
|
2443
|
+
|
|
2444
|
+
def _estimate_recommendation_impact(self, insight: Dict[str, Any]) -> str:
|
|
2445
|
+
"""Estimate the impact of implementing a recommendation."""
|
|
2446
|
+
|
|
2447
|
+
category = insight.get('category', '')
|
|
2448
|
+
priority = insight.get('priority', 'medium')
|
|
2449
|
+
|
|
2450
|
+
if priority == 'high':
|
|
2451
|
+
return 'high'
|
|
2452
|
+
elif category in ['team', 'productivity']:
|
|
2453
|
+
return 'medium'
|
|
2454
|
+
else:
|
|
2455
|
+
return 'low'
|
|
2456
|
+
|
|
2457
|
+
def _add_general_recommendations(
|
|
2458
|
+
self,
|
|
2459
|
+
recommendations: List[Dict[str, Any]],
|
|
2460
|
+
insights: List[Dict[str, Any]]
|
|
2461
|
+
) -> None:
|
|
2462
|
+
"""Add general recommendations based on insight patterns."""
|
|
2463
|
+
|
|
2464
|
+
# Check for lack of ticket coverage insights
|
|
2465
|
+
ticket_insights = [i for i in insights if 'ticket' in i.get('description', '').lower()]
|
|
2466
|
+
if not ticket_insights:
|
|
2467
|
+
recommendations.append({
|
|
2468
|
+
"title": "Improve Development Process Tracking",
|
|
2469
|
+
"action": "Implement consistent ticket referencing in commits and PRs",
|
|
2470
|
+
"priority": "medium",
|
|
2471
|
+
"category": "process",
|
|
2472
|
+
"expected_impact": "medium"
|
|
2473
|
+
})
|
|
2474
|
+
|
|
2475
|
+
def _calculate_simple_trend(self, values: List[float]) -> str:
|
|
2476
|
+
"""Calculate simple trend direction from a list of values."""
|
|
2477
|
+
|
|
2478
|
+
if len(values) < 2:
|
|
2479
|
+
return "stable"
|
|
2480
|
+
|
|
2481
|
+
# Compare first half vs second half
|
|
2482
|
+
midpoint = len(values) // 2
|
|
2483
|
+
first_half = values[:midpoint]
|
|
2484
|
+
second_half = values[midpoint:]
|
|
2485
|
+
|
|
2486
|
+
first_avg = statistics.mean(first_half)
|
|
2487
|
+
second_avg = statistics.mean(second_half)
|
|
2488
|
+
|
|
2489
|
+
if first_avg == 0:
|
|
2490
|
+
return "stable"
|
|
2491
|
+
|
|
2492
|
+
change_pct = ((second_avg - first_avg) / first_avg) * 100
|
|
2493
|
+
|
|
2494
|
+
if abs(change_pct) < 10:
|
|
2495
|
+
return "stable"
|
|
2496
|
+
elif change_pct > 0:
|
|
2497
|
+
return "increasing"
|
|
2498
|
+
else:
|
|
2499
|
+
return "decreasing"
|
|
2500
|
+
|
|
2501
|
+
def _get_weekly_commit_counts(self, commits: List[Dict[str, Any]]) -> List[int]:
|
|
2502
|
+
"""Get commit counts grouped by week."""
|
|
2503
|
+
|
|
2504
|
+
if not commits:
|
|
2505
|
+
return []
|
|
2506
|
+
|
|
2507
|
+
# Group commits by week
|
|
2508
|
+
weekly_counts = defaultdict(int)
|
|
2509
|
+
|
|
2510
|
+
for commit in commits:
|
|
2511
|
+
week_start = self._get_week_start(commit['timestamp'])
|
|
2512
|
+
week_key = week_start.strftime('%Y-%m-%d')
|
|
2513
|
+
weekly_counts[week_key] += 1
|
|
2514
|
+
|
|
2515
|
+
# Return counts in chronological order
|
|
2516
|
+
sorted_weeks = sorted(weekly_counts.keys())
|
|
2517
|
+
return [weekly_counts[week] for week in sorted_weeks]
|
|
2518
|
+
|
|
2519
|
+
def _get_daily_commit_counts(self, commits: List[Dict[str, Any]]) -> List[int]:
|
|
2520
|
+
"""Get commit counts grouped by day."""
|
|
2521
|
+
|
|
2522
|
+
if not commits:
|
|
2523
|
+
return []
|
|
2524
|
+
|
|
2525
|
+
# Group commits by day
|
|
2526
|
+
daily_counts = defaultdict(int)
|
|
2527
|
+
|
|
2528
|
+
for commit in commits:
|
|
2529
|
+
day_key = commit['timestamp'].strftime('%Y-%m-%d')
|
|
2530
|
+
daily_counts[day_key] += 1
|
|
2531
|
+
|
|
2532
|
+
# Return counts in chronological order
|
|
2533
|
+
sorted_days = sorted(daily_counts.keys())
|
|
2534
|
+
return [daily_counts[day] for day in sorted_days]
|
|
2535
|
+
|
|
2536
|
+
def _calculate_weekly_commits(self, commits: List[Dict[str, Any]]) -> float:
|
|
2537
|
+
"""Calculate average commits per week."""
|
|
2538
|
+
|
|
2539
|
+
weekly_counts = self._get_weekly_commit_counts(commits)
|
|
2540
|
+
if not weekly_counts:
|
|
2541
|
+
return 0
|
|
2542
|
+
|
|
2543
|
+
return round(statistics.mean(weekly_counts), 1)
|
|
2544
|
+
|
|
2545
|
+
def _find_peak_activity_day(self, commits: List[Dict[str, Any]]) -> str:
|
|
2546
|
+
"""Find the day of week with most commits."""
|
|
2547
|
+
|
|
2548
|
+
if not commits:
|
|
2549
|
+
return "Unknown"
|
|
2550
|
+
|
|
2551
|
+
day_counts = defaultdict(int)
|
|
2552
|
+
|
|
2553
|
+
for commit in commits:
|
|
2554
|
+
if hasattr(commit['timestamp'], 'weekday'):
|
|
2555
|
+
day_index = commit['timestamp'].weekday()
|
|
2556
|
+
day_counts[day_index] += 1
|
|
2557
|
+
|
|
2558
|
+
if not day_counts:
|
|
2559
|
+
return "Unknown"
|
|
2560
|
+
|
|
2561
|
+
peak_day_index = max(day_counts, key=day_counts.get)
|
|
2562
|
+
return self._get_day_name(peak_day_index)
|
|
2563
|
+
|
|
2564
|
+
def _analyze_commit_size_distribution(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
2565
|
+
"""Analyze distribution of commit sizes."""
|
|
2566
|
+
|
|
2567
|
+
if not commits:
|
|
2568
|
+
return {}
|
|
2569
|
+
|
|
2570
|
+
sizes = []
|
|
2571
|
+
for commit in commits:
|
|
2572
|
+
lines = (
|
|
2573
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
2574
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
2575
|
+
)
|
|
2576
|
+
sizes.append(lines)
|
|
2577
|
+
|
|
2578
|
+
if not sizes:
|
|
2579
|
+
return {}
|
|
2580
|
+
|
|
2581
|
+
return {
|
|
2582
|
+
'mean': round(statistics.mean(sizes), 1),
|
|
2583
|
+
'median': round(statistics.median(sizes), 1),
|
|
2584
|
+
'std_dev': round(statistics.pstdev(sizes), 1) if len(sizes) > 1 else 0,
|
|
2585
|
+
'min': min(sizes),
|
|
2586
|
+
'max': max(sizes),
|
|
2587
|
+
'small_commits': sum(1 for s in sizes if s < 50), # < 50 lines
|
|
2588
|
+
'medium_commits': sum(1 for s in sizes if 50 <= s <= 200), # 50-200 lines
|
|
2589
|
+
'large_commits': sum(1 for s in sizes if s > 200) # > 200 lines
|
|
2590
|
+
}
|
|
2591
|
+
|
|
2592
|
+
def _get_week_start(self, date: datetime) -> datetime:
|
|
2593
|
+
"""Get Monday of the week for a given date."""
|
|
2594
|
+
|
|
2595
|
+
# Ensure timezone consistency
|
|
2596
|
+
if hasattr(date, 'tzinfo') and date.tzinfo is not None:
|
|
2597
|
+
if date.tzinfo != timezone.utc:
|
|
2598
|
+
date = date.astimezone(timezone.utc)
|
|
2599
|
+
else:
|
|
2600
|
+
date = date.replace(tzinfo=timezone.utc)
|
|
2601
|
+
|
|
2602
|
+
days_since_monday = date.weekday()
|
|
2603
|
+
monday = date - timedelta(days=days_since_monday)
|
|
2604
|
+
return monday.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
2605
|
+
|
|
2606
|
+
def _calculate_gini_coefficient(self, values: List[float]) -> float:
|
|
2607
|
+
"""Calculate Gini coefficient for measuring inequality."""
|
|
2608
|
+
|
|
2609
|
+
if not values or len(values) == 1:
|
|
2610
|
+
return 0.0
|
|
2611
|
+
|
|
2612
|
+
sorted_values = sorted(values)
|
|
2613
|
+
n = len(values)
|
|
2614
|
+
cumsum = np.cumsum(sorted_values)
|
|
2615
|
+
|
|
2616
|
+
return (2 * np.sum((i + 1) * sorted_values[i] for i in range(n))) / (n * cumsum[-1]) - (n + 1) / n
|
|
2617
|
+
|
|
2618
|
+
def _anonymize_value(self, value: str, field_type: str) -> str:
|
|
2619
|
+
"""Anonymize a value if anonymization is enabled."""
|
|
2620
|
+
|
|
2621
|
+
if not self.anonymize or not value:
|
|
2622
|
+
return value
|
|
2623
|
+
|
|
2624
|
+
if field_type == 'email' and '@' in value:
|
|
2625
|
+
# Keep domain for email
|
|
2626
|
+
local, domain = value.split('@', 1)
|
|
2627
|
+
value = local # Anonymize only local part
|
|
2628
|
+
suffix = f"@{domain}"
|
|
2629
|
+
else:
|
|
2630
|
+
suffix = ""
|
|
2631
|
+
|
|
2632
|
+
if value not in self._anonymization_map:
|
|
2633
|
+
self._anonymous_counter += 1
|
|
2634
|
+
if field_type == 'name':
|
|
2635
|
+
anonymous = f"Developer{self._anonymous_counter}"
|
|
2636
|
+
elif field_type == 'email':
|
|
2637
|
+
anonymous = f"dev{self._anonymous_counter}"
|
|
2638
|
+
elif field_type == 'id':
|
|
2639
|
+
anonymous = f"ID{self._anonymous_counter:04d}"
|
|
2640
|
+
elif field_type == 'username':
|
|
2641
|
+
anonymous = f"user{self._anonymous_counter}"
|
|
2642
|
+
else:
|
|
2643
|
+
anonymous = f"anon{self._anonymous_counter}"
|
|
2644
|
+
|
|
2645
|
+
self._anonymization_map[value] = anonymous
|
|
2646
|
+
|
|
2647
|
+
return self._anonymization_map[value] + suffix
|
|
2648
|
+
|
|
2649
|
+
def _serialize_for_json(self, data: Any) -> Any:
|
|
2650
|
+
"""Serialize data for JSON output, handling datetime objects."""
|
|
2651
|
+
|
|
2652
|
+
if isinstance(data, datetime):
|
|
2653
|
+
return data.isoformat()
|
|
2654
|
+
elif isinstance(data, dict):
|
|
2655
|
+
return {k: self._serialize_for_json(v) for k, v in data.items()}
|
|
2656
|
+
elif isinstance(data, list):
|
|
2657
|
+
return [self._serialize_for_json(item) for item in data]
|
|
2658
|
+
elif isinstance(data, set):
|
|
2659
|
+
return list(data) # Convert sets to lists
|
|
2660
|
+
elif isinstance(data, (np.integer, np.floating)):
|
|
2661
|
+
return float(data) # Convert numpy types to Python types
|
|
2662
|
+
else:
|
|
2663
|
+
return data
|
|
2664
|
+
|
|
2665
|
+
# Implementation of abstract methods from BaseReportGenerator
|
|
2666
|
+
|
|
2667
|
+
def generate(self, data: ReportData, output_path: Optional[Path] = None) -> ReportOutput:
|
|
2668
|
+
"""Generate comprehensive JSON export from standardized data.
|
|
2669
|
+
|
|
2670
|
+
Args:
|
|
2671
|
+
data: Standardized report data
|
|
2672
|
+
output_path: Optional path to write the JSON to
|
|
2673
|
+
|
|
2674
|
+
Returns:
|
|
2675
|
+
ReportOutput containing the results
|
|
2676
|
+
"""
|
|
2677
|
+
try:
|
|
2678
|
+
# Validate data
|
|
2679
|
+
if not self.validate_data(data):
|
|
2680
|
+
return ReportOutput(
|
|
2681
|
+
success=False,
|
|
2682
|
+
errors=["Invalid or incomplete data provided"]
|
|
2683
|
+
)
|
|
2684
|
+
|
|
2685
|
+
# Pre-process data
|
|
2686
|
+
data = self.pre_process(data)
|
|
2687
|
+
|
|
2688
|
+
# Use the main export method with ReportData fields
|
|
2689
|
+
if output_path:
|
|
2690
|
+
self.export_comprehensive_data(
|
|
2691
|
+
commits=data.commits or [],
|
|
2692
|
+
prs=data.pull_requests or [],
|
|
2693
|
+
developer_stats=data.developer_stats or [],
|
|
2694
|
+
project_metrics=data.config.get("project_metrics", {}),
|
|
2695
|
+
dora_metrics=data.dora_metrics or {},
|
|
2696
|
+
output_path=output_path,
|
|
2697
|
+
weeks=data.metadata.analysis_period_weeks or 12,
|
|
2698
|
+
pm_data=data.pm_data,
|
|
2699
|
+
qualitative_data=data.qualitative_results,
|
|
2700
|
+
enhanced_qualitative_analysis=data.config.get("enhanced_qualitative_analysis")
|
|
2701
|
+
)
|
|
2702
|
+
|
|
2703
|
+
return ReportOutput(
|
|
2704
|
+
success=True,
|
|
2705
|
+
file_path=output_path,
|
|
2706
|
+
format=self.get_format_type(),
|
|
2707
|
+
size_bytes=output_path.stat().st_size if output_path.exists() else 0
|
|
2708
|
+
)
|
|
2709
|
+
else:
|
|
2710
|
+
# Generate in-memory JSON
|
|
2711
|
+
end_date = datetime.now(timezone.utc)
|
|
2712
|
+
start_date = end_date - timedelta(weeks=data.metadata.analysis_period_weeks or 12)
|
|
2713
|
+
|
|
2714
|
+
export_data = {
|
|
2715
|
+
"metadata": self._build_metadata(
|
|
2716
|
+
data.commits or [],
|
|
2717
|
+
data.pull_requests or [],
|
|
2718
|
+
data.developer_stats or [],
|
|
2719
|
+
start_date,
|
|
2720
|
+
end_date
|
|
2721
|
+
),
|
|
2722
|
+
"executive_summary": self._build_executive_summary(
|
|
2723
|
+
data.commits or [],
|
|
2724
|
+
data.pull_requests or [],
|
|
2725
|
+
data.developer_stats or [],
|
|
2726
|
+
data.config.get("project_metrics", {}),
|
|
2727
|
+
data.dora_metrics or {}
|
|
2728
|
+
),
|
|
2729
|
+
"raw_data": self._build_raw_data_summary(
|
|
2730
|
+
data.commits or [],
|
|
2731
|
+
data.pull_requests or [],
|
|
2732
|
+
data.developer_stats or [],
|
|
2733
|
+
data.dora_metrics or {}
|
|
2734
|
+
)
|
|
2735
|
+
}
|
|
2736
|
+
|
|
2737
|
+
serialized_data = self._serialize_for_json(export_data)
|
|
2738
|
+
json_content = json.dumps(serialized_data, indent=2, ensure_ascii=False)
|
|
2739
|
+
|
|
2740
|
+
return ReportOutput(
|
|
2741
|
+
success=True,
|
|
2742
|
+
content=json_content,
|
|
2743
|
+
format=self.get_format_type(),
|
|
2744
|
+
size_bytes=len(json_content)
|
|
2745
|
+
)
|
|
2746
|
+
|
|
2747
|
+
except Exception as e:
|
|
2748
|
+
logger.error(f"Error generating comprehensive JSON export: {e}")
|
|
2749
|
+
return ReportOutput(
|
|
2750
|
+
success=False,
|
|
2751
|
+
errors=[str(e)]
|
|
2752
|
+
)
|
|
2753
|
+
|
|
2754
|
+
def get_required_fields(self) -> List[str]:
|
|
2755
|
+
"""Get the list of required data fields for JSON export.
|
|
2756
|
+
|
|
2757
|
+
Returns:
|
|
2758
|
+
List of required field names
|
|
2759
|
+
"""
|
|
2760
|
+
# Comprehensive JSON export can work with any combination of data
|
|
2761
|
+
# but works best with commits and developer_stats
|
|
2762
|
+
return [] # No strict requirements, flexible export
|
|
2763
|
+
|
|
2764
|
+
def get_format_type(self) -> str:
|
|
2765
|
+
"""Get the format type this generator produces.
|
|
2766
|
+
|
|
2767
|
+
Returns:
|
|
2768
|
+
Format identifier
|
|
2769
|
+
"""
|
|
2770
|
+
return ReportFormat.JSON.value
|