gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2236 @@
1
+ """Enhanced qualitative analyzer for GitFlow Analytics.
2
+
3
+ This module provides sophisticated qualitative analysis across four key dimensions:
4
+ 1. Executive Summary Analysis - High-level team health and strategic insights
5
+ 2. Project Analysis - Project-specific momentum and health assessment
6
+ 3. Developer Analysis - Individual contribution patterns and career development
7
+ 4. Workflow Analysis - Process effectiveness and Git-PM correlation analysis
8
+
9
+ WHY: Traditional quantitative metrics only tell part of the story. This enhanced analyzer
10
+ combines statistical analysis with pattern recognition to generate actionable insights
11
+ for different stakeholder levels - from executives to individual developers.
12
+
13
+ DESIGN DECISIONS:
14
+ - Confidence-based scoring: All insights include confidence scores for reliability
15
+ - Multi-dimensional analysis: Each section focuses on different aspects of team performance
16
+ - Natural language generation: Produces human-readable insights and recommendations
17
+ - Anomaly detection: Identifies unusual patterns that merit attention
18
+ - Risk assessment: Flags potential issues before they become critical
19
+
20
+ INTEGRATION: Works with existing qualitative pipeline and extends JSON export format
21
+ with structured analysis results that can be consumed by dashboards and reports.
22
+ """
23
+
24
+ import logging
25
+ import statistics
26
+ from collections import defaultdict
27
+ from datetime import datetime, timedelta, timezone
28
+ from typing import Any, Optional
29
+
30
+ import numpy as np
31
+
32
+ from .models.schemas import QualitativeCommitData
33
+ from .utils.metrics import PerformanceMetrics
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class EnhancedQualitativeAnalyzer:
39
+ """Enhanced qualitative analyzer providing specialized analysis across four dimensions.
40
+
41
+ This analyzer processes quantitative commit data and generates qualitative insights
42
+ across executive, project, developer, and workflow dimensions. Each analysis includes
43
+ confidence scores, risk assessments, and actionable recommendations.
44
+ """
45
+
46
+ def __init__(self, config: Optional[dict[str, Any]] = None):
47
+ """Initialize the enhanced analyzer.
48
+
49
+ Args:
50
+ config: Configuration dictionary with analysis thresholds and parameters
51
+ """
52
+ self.config = config or {}
53
+
54
+ # Analysis thresholds and parameters
55
+ self.thresholds = {
56
+ "high_productivity_commits": 50, # Commits per analysis period
57
+ "low_productivity_commits": 5, # Minimum meaningful activity
58
+ "high_collaboration_projects": 3, # Projects for versatility
59
+ "consistent_activity_weeks": 0.7, # Percentage of weeks active
60
+ "large_commit_lines": 300, # Lines changed threshold
61
+ "critical_risk_score": 0.8, # Risk level for critical issues
62
+ "velocity_trend_threshold": 0.2, # 20% change for significant trend
63
+ "health_score_excellent": 80, # Health score thresholds
64
+ "health_score_good": 60,
65
+ "health_score_fair": 40,
66
+ "bus_factor_threshold": 0.7, # Contribution concentration limit
67
+ "ticket_coverage_excellent": 80, # Ticket linking thresholds
68
+ "ticket_coverage_poor": 30,
69
+ }
70
+
71
+ # Update thresholds from config
72
+ if "analysis_thresholds" in self.config:
73
+ self.thresholds.update(self.config["analysis_thresholds"])
74
+
75
+ self.metrics = PerformanceMetrics()
76
+ self.logger = logging.getLogger(__name__)
77
+
78
+ def analyze_comprehensive(
79
+ self,
80
+ commits: list[dict[str, Any]],
81
+ qualitative_data: Optional[list[QualitativeCommitData]] = None,
82
+ developer_stats: Optional[list[dict[str, Any]]] = None,
83
+ project_metrics: Optional[dict[str, Any]] = None,
84
+ pm_data: Optional[dict[str, Any]] = None,
85
+ weeks_analyzed: int = 12,
86
+ ) -> dict[str, Any]:
87
+ """Perform comprehensive enhanced qualitative analysis.
88
+
89
+ Args:
90
+ commits: List of commit data from GitFlow Analytics
91
+ qualitative_data: Optional qualitative commit analysis results
92
+ developer_stats: Optional developer statistics
93
+ project_metrics: Optional project-level metrics
94
+ pm_data: Optional PM platform integration data
95
+ weeks_analyzed: Number of weeks in analysis period
96
+
97
+ Returns:
98
+ Dictionary containing all four analysis dimensions
99
+ """
100
+ self.logger.info(f"Starting enhanced qualitative analysis of {len(commits)} commits")
101
+
102
+ # Prepare unified data structures
103
+ analysis_context = self._prepare_analysis_context(
104
+ commits, qualitative_data, developer_stats, project_metrics, pm_data, weeks_analyzed
105
+ )
106
+
107
+ # Perform four-dimensional analysis
108
+ executive_analysis = self._analyze_executive_summary(analysis_context)
109
+ project_analysis = self._analyze_projects(analysis_context)
110
+ developer_analysis = self._analyze_developers(analysis_context)
111
+ workflow_analysis = self._analyze_workflow(analysis_context)
112
+
113
+ # Cross-reference insights for consistency
114
+ comprehensive_analysis = {
115
+ "metadata": {
116
+ "analysis_timestamp": datetime.now(timezone.utc).isoformat(),
117
+ "commits_analyzed": len(commits),
118
+ "weeks_analyzed": weeks_analyzed,
119
+ "analysis_version": "2.0.0",
120
+ },
121
+ "executive_analysis": executive_analysis,
122
+ "project_analysis": project_analysis,
123
+ "developer_analysis": developer_analysis,
124
+ "workflow_analysis": workflow_analysis,
125
+ "cross_insights": self._generate_cross_insights(
126
+ executive_analysis, project_analysis, developer_analysis, workflow_analysis
127
+ ),
128
+ }
129
+
130
+ self.logger.info("Enhanced qualitative analysis completed")
131
+ return comprehensive_analysis
132
+
133
+ def _prepare_analysis_context(
134
+ self,
135
+ commits: list[dict[str, Any]],
136
+ qualitative_data: Optional[list[QualitativeCommitData]],
137
+ developer_stats: Optional[list[dict[str, Any]]],
138
+ project_metrics: Optional[dict[str, Any]],
139
+ pm_data: Optional[dict[str, Any]],
140
+ weeks_analyzed: int,
141
+ ) -> dict[str, Any]:
142
+ """Prepare unified analysis context with all available data."""
143
+
144
+ # Process commits data
145
+ commits_by_project = defaultdict(list)
146
+ commits_by_developer = defaultdict(list)
147
+
148
+ for commit in commits:
149
+ project_key = commit.get("project_key", "UNKNOWN")
150
+ dev_id = commit.get("canonical_id", commit.get("author_email"))
151
+
152
+ commits_by_project[project_key].append(commit)
153
+ commits_by_developer[dev_id].append(commit)
154
+
155
+ # Calculate time periods
156
+ end_date = datetime.now(timezone.utc)
157
+ start_date = end_date - timedelta(weeks=weeks_analyzed)
158
+
159
+ # Prepare qualitative mapping
160
+ qualitative_by_hash = {}
161
+ if qualitative_data:
162
+ # Handle both QualitativeCommitData objects and dictionaries
163
+ qualitative_by_hash = {}
164
+ for q in qualitative_data:
165
+ if hasattr(q, "hash"):
166
+ # QualitativeCommitData object
167
+ qualitative_by_hash[q.hash] = q
168
+ elif isinstance(q, dict) and "hash" in q:
169
+ # Dictionary format
170
+ qualitative_by_hash[q["hash"]] = q
171
+ else:
172
+ # Skip invalid entries
173
+ self.logger.warning(f"Invalid qualitative data format: {type(q)}")
174
+
175
+ return {
176
+ "commits": commits,
177
+ "commits_by_project": dict(commits_by_project),
178
+ "commits_by_developer": dict(commits_by_developer),
179
+ "qualitative_data": qualitative_by_hash,
180
+ "developer_stats": developer_stats or [],
181
+ "project_metrics": project_metrics or {},
182
+ "pm_data": pm_data or {},
183
+ "weeks_analyzed": weeks_analyzed,
184
+ "analysis_period": {"start_date": start_date, "end_date": end_date},
185
+ "total_commits": len(commits),
186
+ "unique_projects": len(commits_by_project),
187
+ "unique_developers": len(commits_by_developer),
188
+ }
189
+
190
+ def _analyze_executive_summary(self, context: dict[str, Any]) -> dict[str, Any]:
191
+ """Generate executive-level analysis with strategic insights.
192
+
193
+ WHY: Executives need high-level health assessment, trend analysis, and risk indicators
194
+ without getting lost in technical details. This analysis focuses on team productivity,
195
+ velocity trends, and strategic recommendations.
196
+ """
197
+ context["commits"]
198
+ context["total_commits"]
199
+ context["weeks_analyzed"]
200
+
201
+ # Overall team health assessment
202
+ health_assessment, health_confidence = self._assess_team_health(context)
203
+
204
+ # Velocity trend analysis
205
+ velocity_trends = self._analyze_velocity_trends(context)
206
+
207
+ # Key achievements identification
208
+ achievements = self._identify_key_achievements(context)
209
+
210
+ # Major concerns and risks
211
+ concerns = self._identify_major_concerns(context)
212
+
213
+ # Risk indicators
214
+ risk_indicators = self._assess_risk_indicators(context)
215
+
216
+ # Strategic recommendations
217
+ recommendations = self._generate_executive_recommendations(
218
+ health_assessment, velocity_trends, concerns, risk_indicators
219
+ )
220
+
221
+ return {
222
+ "health_assessment": health_assessment,
223
+ "health_confidence": health_confidence,
224
+ "velocity_trends": {
225
+ "overall_trend": velocity_trends["trend_direction"],
226
+ "trend_percentage": velocity_trends["trend_percentage"],
227
+ "weekly_average": velocity_trends["weekly_average"],
228
+ "trend_confidence": velocity_trends["confidence"],
229
+ },
230
+ "key_achievements": achievements,
231
+ "major_concerns": concerns,
232
+ "risk_indicators": risk_indicators,
233
+ "recommendations": recommendations,
234
+ "executive_summary": self._generate_executive_narrative(
235
+ health_assessment, velocity_trends, achievements, concerns
236
+ ),
237
+ }
238
+
239
+ def _analyze_projects(self, context: dict[str, Any]) -> dict[str, Any]:
240
+ """Analyze project-level momentum and health indicators.
241
+
242
+ WHY: Project managers need to understand individual project health, momentum,
243
+ and contributor dynamics to make informed resource allocation decisions.
244
+ """
245
+ projects_analysis = {}
246
+ commits_by_project = context["commits_by_project"]
247
+
248
+ for project_key, project_commits in commits_by_project.items():
249
+ if not project_commits:
250
+ continue
251
+
252
+ # Momentum classification
253
+ momentum = self._classify_project_momentum(project_commits, context)
254
+
255
+ # Health indicators
256
+ health_indicators = self._calculate_project_health_indicators(project_commits, context)
257
+
258
+ # Technical debt signals
259
+ tech_debt_signals = self._detect_technical_debt_signals(project_commits, context)
260
+
261
+ # Delivery predictability
262
+ predictability = self._assess_delivery_predictability(project_commits, context)
263
+
264
+ # Risk assessment
265
+ risk_assessment = self._assess_project_risks(project_commits, context)
266
+
267
+ # Project-specific recommendations
268
+ recommendations = self._generate_project_recommendations(
269
+ momentum, health_indicators, tech_debt_signals, risk_assessment
270
+ )
271
+
272
+ projects_analysis[project_key] = {
273
+ "momentum": momentum,
274
+ "health_indicators": health_indicators,
275
+ "technical_debt_signals": tech_debt_signals,
276
+ "delivery_predictability": predictability,
277
+ "risk_assessment": risk_assessment,
278
+ "recommendations": recommendations,
279
+ "project_narrative": self._generate_project_narrative(
280
+ project_key, momentum, health_indicators, risk_assessment
281
+ ),
282
+ }
283
+
284
+ return projects_analysis
285
+
286
+ def _analyze_developers(self, context: dict[str, Any]) -> dict[str, Any]:
287
+ """Analyze individual developer patterns and career development insights.
288
+
289
+ WHY: Developers and their managers need insights into contribution patterns,
290
+ growth trajectory, and areas for professional development.
291
+ """
292
+ developers_analysis = {}
293
+ commits_by_developer = context["commits_by_developer"]
294
+ developer_stats = context["developer_stats"]
295
+
296
+ # Create developer stats lookup
297
+ dev_stats_by_id = {}
298
+ for dev in developer_stats:
299
+ dev_stats_by_id[dev.get("canonical_id")] = dev
300
+
301
+ for dev_id, dev_commits in commits_by_developer.items():
302
+ if not dev_commits:
303
+ continue
304
+
305
+ dev_stats = dev_stats_by_id.get(dev_id, {})
306
+
307
+ # Contribution pattern analysis
308
+ contribution_pattern = self._analyze_contribution_patterns(dev_commits, context)
309
+
310
+ # Collaboration score
311
+ collaboration_score = self._calculate_collaboration_score(dev_commits, context)
312
+
313
+ # Expertise domains
314
+ expertise_domains = self._identify_expertise_domains(dev_commits, context)
315
+
316
+ # Growth trajectory analysis
317
+ growth_trajectory = self._analyze_growth_trajectory(dev_commits, context)
318
+
319
+ # Burnout indicators
320
+ burnout_indicators = self._detect_burnout_indicators(dev_commits, context)
321
+
322
+ # Career development recommendations
323
+ career_recommendations = self._generate_career_recommendations(
324
+ contribution_pattern,
325
+ collaboration_score,
326
+ expertise_domains,
327
+ growth_trajectory,
328
+ burnout_indicators,
329
+ )
330
+
331
+ developers_analysis[dev_id] = {
332
+ "contribution_pattern": contribution_pattern,
333
+ "collaboration_score": collaboration_score,
334
+ "expertise_domains": expertise_domains,
335
+ "growth_trajectory": growth_trajectory,
336
+ "burnout_indicators": burnout_indicators,
337
+ "career_recommendations": career_recommendations,
338
+ "developer_narrative": self._generate_developer_narrative(
339
+ dev_stats.get("primary_name", f"Developer {dev_id}"),
340
+ contribution_pattern,
341
+ expertise_domains,
342
+ growth_trajectory,
343
+ ),
344
+ }
345
+
346
+ return developers_analysis
347
+
348
+ def _analyze_workflow(self, context: dict[str, Any]) -> dict[str, Any]:
349
+ """Analyze workflow effectiveness and Git-PM correlation.
350
+
351
+ WHY: Team leads need to understand process effectiveness, identify bottlenecks,
352
+ and optimize workflows for better productivity and quality.
353
+ """
354
+ commits = context["commits"]
355
+ pm_data = context["pm_data"]
356
+ project_metrics = context["project_metrics"]
357
+
358
+ # Git-PM correlation effectiveness
359
+ git_pm_effectiveness = self._assess_git_pm_correlation(commits, pm_data, context)
360
+
361
+ # Process bottleneck identification
362
+ bottlenecks = self._identify_process_bottlenecks(commits, context)
363
+
364
+ # Automation opportunities
365
+ automation_opportunities = self._identify_automation_opportunities(commits, context)
366
+
367
+ # Compliance metrics
368
+ compliance_metrics = self._calculate_compliance_metrics(commits, project_metrics, context)
369
+
370
+ # Team collaboration patterns
371
+ collaboration_patterns = self._analyze_team_collaboration_patterns(commits, context)
372
+
373
+ # Process improvement recommendations
374
+ process_recommendations = self._generate_process_recommendations(
375
+ git_pm_effectiveness, bottlenecks, automation_opportunities, compliance_metrics
376
+ )
377
+
378
+ return {
379
+ "git_pm_effectiveness": git_pm_effectiveness,
380
+ "process_bottlenecks": bottlenecks,
381
+ "automation_opportunities": automation_opportunities,
382
+ "compliance_metrics": compliance_metrics,
383
+ "team_collaboration_patterns": collaboration_patterns,
384
+ "process_recommendations": process_recommendations,
385
+ "workflow_narrative": self._generate_workflow_narrative(
386
+ git_pm_effectiveness, bottlenecks, compliance_metrics
387
+ ),
388
+ }
389
+
390
+ # Executive Analysis Helper Methods
391
+
392
+ def _assess_team_health(self, context: dict[str, Any]) -> tuple[str, float]:
393
+ """Assess overall team health with confidence score."""
394
+
395
+ commits = context["commits"]
396
+ developer_stats = context["developer_stats"]
397
+ weeks = context["weeks_analyzed"]
398
+
399
+ health_factors = []
400
+
401
+ # Activity consistency factor
402
+ weekly_commits = self._get_weekly_commit_counts(commits)
403
+ if weekly_commits:
404
+ consistency = 100 - (
405
+ statistics.pstdev(weekly_commits) / max(statistics.mean(weekly_commits), 1) * 100
406
+ )
407
+ health_factors.append(max(0, min(100, consistency)))
408
+
409
+ # Developer engagement factor
410
+ if developer_stats:
411
+ active_developers = sum(
412
+ 1
413
+ for dev in developer_stats
414
+ if dev.get("total_commits", 0) > self.thresholds["low_productivity_commits"]
415
+ )
416
+ engagement_score = (active_developers / len(developer_stats)) * 100
417
+ health_factors.append(engagement_score)
418
+
419
+ # Velocity factor
420
+ avg_weekly_commits = len(commits) / max(weeks, 1)
421
+ velocity_score = min(100, avg_weekly_commits * 10) # Scale appropriately
422
+ health_factors.append(velocity_score)
423
+
424
+ # Overall health score
425
+ if health_factors:
426
+ overall_score = statistics.mean(health_factors)
427
+ confidence = min(0.95, len(health_factors) / 5.0) # More factors = higher confidence
428
+
429
+ if overall_score >= self.thresholds["health_score_excellent"]:
430
+ return "excellent", confidence
431
+ elif overall_score >= self.thresholds["health_score_good"]:
432
+ return "good", confidence
433
+ elif overall_score >= self.thresholds["health_score_fair"]:
434
+ return "fair", confidence
435
+ else:
436
+ return "needs_improvement", confidence
437
+
438
+ return "insufficient_data", 0.2
439
+
440
+ def _analyze_velocity_trends(self, context: dict[str, Any]) -> dict[str, Any]:
441
+ """Analyze velocity trends over the analysis period."""
442
+
443
+ commits = context["commits"]
444
+ weekly_commits = self._get_weekly_commit_counts(commits)
445
+
446
+ if len(weekly_commits) < 4:
447
+ return {
448
+ "trend_direction": "insufficient_data",
449
+ "trend_percentage": 0,
450
+ "weekly_average": 0,
451
+ "confidence": 0.1,
452
+ }
453
+
454
+ # Compare first quarter vs last quarter
455
+ quarter_size = len(weekly_commits) // 4
456
+ first_quarter = weekly_commits[:quarter_size] or [0]
457
+ last_quarter = weekly_commits[-quarter_size:] or [0]
458
+
459
+ first_avg = statistics.mean(first_quarter)
460
+ last_avg = statistics.mean(last_quarter)
461
+
462
+ trend_percentage = (last_avg - first_avg) / first_avg * 100 if first_avg > 0 else 0
463
+
464
+ # Determine trend direction
465
+ if abs(trend_percentage) < self.thresholds["velocity_trend_threshold"] * 100:
466
+ trend_direction = "stable"
467
+ elif trend_percentage > 0:
468
+ trend_direction = "improving"
469
+ else:
470
+ trend_direction = "declining"
471
+
472
+ # Calculate confidence based on data consistency
473
+ weekly_std = statistics.pstdev(weekly_commits) if len(weekly_commits) > 1 else 0.1
474
+ weekly_mean = statistics.mean(weekly_commits)
475
+ consistency = max(0, 1 - (weekly_std / max(weekly_mean, 0.1)))
476
+ confidence = min(0.95, consistency * 0.8 + 0.2) # Base confidence + consistency bonus
477
+
478
+ return {
479
+ "trend_direction": trend_direction,
480
+ "trend_percentage": round(trend_percentage, 1),
481
+ "weekly_average": round(statistics.mean(weekly_commits), 1),
482
+ "confidence": round(confidence, 2),
483
+ }
484
+
485
+ def _identify_key_achievements(self, context: dict[str, Any]) -> list[dict[str, Any]]:
486
+ """Identify key achievements during the analysis period."""
487
+
488
+ achievements = []
489
+ commits = context["commits"]
490
+ context["developer_stats"]
491
+ project_metrics = context["project_metrics"]
492
+
493
+ # High productivity achievement
494
+ total_commits = len(commits)
495
+ if (
496
+ total_commits
497
+ > self.thresholds["high_productivity_commits"] * context["weeks_analyzed"] / 12
498
+ ):
499
+ achievements.append(
500
+ {
501
+ "category": "productivity",
502
+ "title": "High Team Productivity",
503
+ "description": f"Team delivered {total_commits} commits across {context['unique_projects']} projects",
504
+ "impact": "high",
505
+ "confidence": 0.9,
506
+ }
507
+ )
508
+
509
+ # Consistent delivery achievement
510
+ weekly_commits = self._get_weekly_commit_counts(commits)
511
+ if weekly_commits:
512
+ active_weeks = sum(1 for w in weekly_commits if w > 0)
513
+ consistency_rate = active_weeks / len(weekly_commits)
514
+
515
+ if consistency_rate >= self.thresholds["consistent_activity_weeks"]:
516
+ achievements.append(
517
+ {
518
+ "category": "consistency",
519
+ "title": "Consistent Delivery Rhythm",
520
+ "description": f"Team maintained activity in {active_weeks} of {len(weekly_commits)} weeks",
521
+ "impact": "medium",
522
+ "confidence": 0.8,
523
+ }
524
+ )
525
+
526
+ # Cross-project collaboration achievement
527
+ if context["unique_developers"] > 1 and context["unique_projects"] > 2:
528
+ cross_project_devs = 0
529
+ for dev_commits in context["commits_by_developer"].values():
530
+ projects = set(c.get("project_key", "UNKNOWN") for c in dev_commits)
531
+ if len(projects) > 1:
532
+ cross_project_devs += 1
533
+
534
+ if cross_project_devs > context["unique_developers"] * 0.5:
535
+ achievements.append(
536
+ {
537
+ "category": "collaboration",
538
+ "title": "Strong Cross-Project Collaboration",
539
+ "description": f"{cross_project_devs} developers contributed to multiple projects",
540
+ "impact": "medium",
541
+ "confidence": 0.7,
542
+ }
543
+ )
544
+
545
+ # Ticket coverage achievement
546
+ ticket_analysis = project_metrics.get("ticket_analysis", {})
547
+ ticket_coverage = ticket_analysis.get("commit_coverage_pct", 0)
548
+ if ticket_coverage >= self.thresholds["ticket_coverage_excellent"]:
549
+ achievements.append(
550
+ {
551
+ "category": "process",
552
+ "title": "Excellent Process Adherence",
553
+ "description": f"{ticket_coverage:.1f}% of commits properly linked to tickets",
554
+ "impact": "high",
555
+ "confidence": 0.9,
556
+ }
557
+ )
558
+
559
+ return achievements
560
+
561
+ def _identify_major_concerns(self, context: dict[str, Any]) -> list[dict[str, Any]]:
562
+ """Identify major concerns that need executive attention."""
563
+
564
+ concerns = []
565
+ context["commits"]
566
+ developer_stats = context["developer_stats"]
567
+ project_metrics = context["project_metrics"]
568
+
569
+ # Bus factor concern (contribution concentration)
570
+ if developer_stats and len(developer_stats) > 1:
571
+ commit_counts = [dev.get("total_commits", 0) for dev in developer_stats]
572
+ gini_coefficient = self._calculate_gini_coefficient(commit_counts)
573
+
574
+ if gini_coefficient > self.thresholds["bus_factor_threshold"]:
575
+ top_contributor = max(developer_stats, key=lambda x: x.get("total_commits", 0))
576
+ top_percentage = (
577
+ top_contributor.get("total_commits", 0) / sum(commit_counts)
578
+ ) * 100
579
+
580
+ concerns.append(
581
+ {
582
+ "category": "risk",
583
+ "title": "High Bus Factor Risk",
584
+ "description": f"Work highly concentrated: top contributor handles {top_percentage:.1f}% of commits",
585
+ "severity": "high",
586
+ "impact": "critical",
587
+ "confidence": 0.9,
588
+ "recommendation": "Distribute knowledge and responsibilities more evenly across team",
589
+ }
590
+ )
591
+
592
+ # Declining velocity concern
593
+ velocity_trends = self._analyze_velocity_trends(context)
594
+ if (
595
+ velocity_trends["trend_direction"] == "declining"
596
+ and velocity_trends["trend_percentage"] < -20
597
+ ):
598
+ concerns.append(
599
+ {
600
+ "category": "productivity",
601
+ "title": "Declining Team Velocity",
602
+ "description": f"Commit velocity declined by {abs(velocity_trends['trend_percentage']):.1f}% over analysis period",
603
+ "severity": "high",
604
+ "impact": "high",
605
+ "confidence": velocity_trends["confidence"],
606
+ "recommendation": "Investigate productivity bottlenecks and team capacity issues",
607
+ }
608
+ )
609
+
610
+ # Poor ticket coverage concern
611
+ ticket_analysis = project_metrics.get("ticket_analysis", {})
612
+ ticket_coverage = ticket_analysis.get("commit_coverage_pct", 0)
613
+ if ticket_coverage < self.thresholds["ticket_coverage_poor"]:
614
+ concerns.append(
615
+ {
616
+ "category": "process",
617
+ "title": "Poor Process Adherence",
618
+ "description": f"Only {ticket_coverage:.1f}% of commits linked to tickets",
619
+ "severity": "medium",
620
+ "impact": "medium",
621
+ "confidence": 0.8,
622
+ "recommendation": "Implement better ticket referencing practices and training",
623
+ }
624
+ )
625
+
626
+ # Inactive developer concern
627
+ if developer_stats:
628
+ inactive_devs = sum(
629
+ 1
630
+ for dev in developer_stats
631
+ if dev.get("total_commits", 0) < self.thresholds["low_productivity_commits"]
632
+ )
633
+
634
+ if inactive_devs > len(developer_stats) * 0.3:
635
+ concerns.append(
636
+ {
637
+ "category": "team",
638
+ "title": "Team Engagement Issues",
639
+ "description": f"{inactive_devs} of {len(developer_stats)} developers have minimal activity",
640
+ "severity": "medium",
641
+ "impact": "medium",
642
+ "confidence": 0.7,
643
+ "recommendation": "Review individual workloads and engagement levels",
644
+ }
645
+ )
646
+
647
+ return concerns
648
+
649
+ def _assess_risk_indicators(self, context: dict[str, Any]) -> list[dict[str, Any]]:
650
+ """Assess various risk indicators for the team and projects."""
651
+
652
+ risk_indicators = []
653
+ commits = context["commits"]
654
+
655
+ # Large commit size risk
656
+ large_commits = sum(
657
+ 1
658
+ for c in commits
659
+ if (
660
+ c.get("filtered_insertions", c.get("insertions", 0))
661
+ + c.get("filtered_deletions", c.get("deletions", 0))
662
+ )
663
+ > self.thresholds["large_commit_lines"]
664
+ )
665
+
666
+ if large_commits > len(commits) * 0.2: # More than 20% large commits
667
+ risk_indicators.append(
668
+ {
669
+ "type": "code_quality",
670
+ "title": "Large Commit Pattern",
671
+ "description": f"{large_commits} commits exceed {self.thresholds['large_commit_lines']} lines",
672
+ "risk_level": "medium",
673
+ "impact": "Code review difficulty, potential bugs",
674
+ "confidence": 0.8,
675
+ }
676
+ )
677
+
678
+ # Weekend work pattern risk
679
+ weekend_commits = 0
680
+ for commit in commits:
681
+ if hasattr(commit.get("timestamp"), "weekday") and commit["timestamp"].weekday() >= 5:
682
+ weekend_commits += 1
683
+
684
+ weekend_percentage = (weekend_commits / len(commits)) * 100 if commits else 0
685
+ if weekend_percentage > 30: # More than 30% weekend work
686
+ risk_indicators.append(
687
+ {
688
+ "type": "work_life_balance",
689
+ "title": "High Weekend Activity",
690
+ "description": f"{weekend_percentage:.1f}% of commits made on weekends",
691
+ "risk_level": "medium",
692
+ "impact": "Potential burnout, work-life balance issues",
693
+ "confidence": 0.7,
694
+ }
695
+ )
696
+
697
+ return risk_indicators
698
+
699
+ def _generate_executive_recommendations(
700
+ self,
701
+ health_assessment: str,
702
+ velocity_trends: dict[str, Any],
703
+ concerns: list[dict[str, Any]],
704
+ risk_indicators: list[dict[str, Any]],
705
+ ) -> list[dict[str, Any]]:
706
+ """Generate strategic recommendations for executive leadership."""
707
+
708
+ recommendations = []
709
+
710
+ # Health-based recommendations
711
+ if health_assessment in ["needs_improvement", "fair"]:
712
+ recommendations.append(
713
+ {
714
+ "priority": "high",
715
+ "category": "team_health",
716
+ "title": "Improve Team Health Metrics",
717
+ "action": "Focus on consistency, engagement, and velocity improvements",
718
+ "timeline": "1-2 quarters",
719
+ "expected_impact": "Improved productivity and team morale",
720
+ }
721
+ )
722
+
723
+ # Velocity-based recommendations
724
+ if velocity_trends["trend_direction"] == "declining":
725
+ recommendations.append(
726
+ {
727
+ "priority": "high",
728
+ "category": "productivity",
729
+ "title": "Address Velocity Decline",
730
+ "action": "Investigate bottlenecks and optimize development processes",
731
+ "timeline": "4-6 weeks",
732
+ "expected_impact": "Restored or improved delivery velocity",
733
+ }
734
+ )
735
+
736
+ # Risk-based recommendations
737
+ high_severity_concerns = [c for c in concerns if c.get("severity") == "high"]
738
+ if high_severity_concerns:
739
+ recommendations.append(
740
+ {
741
+ "priority": "critical",
742
+ "category": "risk_mitigation",
743
+ "title": "Address Critical Risk Factors",
744
+ "action": f"Immediate attention needed for {len(high_severity_concerns)} high-severity issues",
745
+ "timeline": "2-4 weeks",
746
+ "expected_impact": "Reduced project risk and improved stability",
747
+ }
748
+ )
749
+
750
+ # Process improvement recommendation
751
+ if any(c.get("category") == "process" for c in concerns):
752
+ recommendations.append(
753
+ {
754
+ "priority": "medium",
755
+ "category": "process",
756
+ "title": "Strengthen Development Processes",
757
+ "action": "Implement better tracking, documentation, and compliance practices",
758
+ "timeline": "6-8 weeks",
759
+ "expected_impact": "Improved visibility and process adherence",
760
+ }
761
+ )
762
+
763
+ return recommendations[:5] # Top 5 recommendations
764
+
765
+ def _generate_executive_narrative(
766
+ self,
767
+ health_assessment: str,
768
+ velocity_trends: dict[str, Any],
769
+ achievements: list[dict[str, Any]],
770
+ concerns: list[dict[str, Any]],
771
+ ) -> str:
772
+ """Generate executive narrative summary."""
773
+
774
+ narrative_parts = []
775
+
776
+ # Health assessment
777
+ health_descriptions = {
778
+ "excellent": "operating at peak performance with strong metrics across all dimensions",
779
+ "good": "performing well with room for targeted improvements",
780
+ "fair": "showing mixed results requiring focused attention",
781
+ "needs_improvement": "facing significant challenges requiring immediate intervention",
782
+ }
783
+
784
+ narrative_parts.append(
785
+ f"The development team is currently {health_descriptions.get(health_assessment, 'in an unclear state')}."
786
+ )
787
+
788
+ # Velocity trends
789
+ if velocity_trends["trend_direction"] == "improving":
790
+ narrative_parts.append(
791
+ f"Team velocity is trending upward with a {velocity_trends['trend_percentage']:.1f}% improvement, averaging {velocity_trends['weekly_average']} commits per week."
792
+ )
793
+ elif velocity_trends["trend_direction"] == "declining":
794
+ narrative_parts.append(
795
+ f"Team velocity shows concerning decline of {abs(velocity_trends['trend_percentage']):.1f}%, requiring immediate attention to restore productivity."
796
+ )
797
+ else:
798
+ narrative_parts.append(
799
+ f"Team velocity remains stable at {velocity_trends['weekly_average']} commits per week, providing consistent delivery rhythm."
800
+ )
801
+
802
+ # Key achievements
803
+ if achievements:
804
+ high_impact_achievements = [a for a in achievements if a.get("impact") == "high"]
805
+ if high_impact_achievements:
806
+ narrative_parts.append(
807
+ f"Notable achievements include {', '.join([a['title'].lower() for a in high_impact_achievements[:2]])}."
808
+ )
809
+
810
+ # Major concerns
811
+ critical_concerns = [c for c in concerns if c.get("severity") == "high"]
812
+ if critical_concerns:
813
+ narrative_parts.append(
814
+ f"Critical attention needed for {critical_concerns[0]['title'].lower()} and other high-priority issues."
815
+ )
816
+ elif concerns:
817
+ narrative_parts.append(
818
+ f"Some areas require monitoring, particularly {concerns[0]['category']} aspects."
819
+ )
820
+
821
+ return " ".join(narrative_parts)
822
+
823
+ # Project Analysis Helper Methods
824
+
825
+ def _classify_project_momentum(
826
+ self, project_commits: list[dict[str, Any]], context: dict[str, Any]
827
+ ) -> dict[str, Any]:
828
+ """Classify project momentum as growing, stable, or declining."""
829
+
830
+ if len(project_commits) < 4:
831
+ return {
832
+ "classification": "insufficient_data",
833
+ "confidence": 0.1,
834
+ "trend_percentage": 0,
835
+ "description": "Not enough data for momentum analysis",
836
+ }
837
+
838
+ # Analyze commit trends over time
839
+ sorted_commits = sorted(project_commits, key=lambda x: x["timestamp"])
840
+ midpoint = len(sorted_commits) // 2
841
+
842
+ first_half = sorted_commits[:midpoint]
843
+ second_half = sorted_commits[midpoint:]
844
+
845
+ first_count = len(first_half)
846
+ second_count = len(second_half)
847
+
848
+ if first_count > 0:
849
+ trend_percentage = ((second_count - first_count) / first_count) * 100
850
+ else:
851
+ trend_percentage = 0
852
+
853
+ # Classification logic
854
+ if trend_percentage > 20:
855
+ classification = "growing"
856
+ description = (
857
+ f"Strong upward momentum with {trend_percentage:.1f}% increase in activity"
858
+ )
859
+ elif trend_percentage < -20:
860
+ classification = "declining"
861
+ description = (
862
+ f"Concerning decline with {abs(trend_percentage):.1f}% decrease in activity"
863
+ )
864
+ else:
865
+ classification = "stable"
866
+ description = f"Consistent activity with {abs(trend_percentage):.1f}% variance"
867
+
868
+ # Confidence based on data quality
869
+ time_span = (sorted_commits[-1]["timestamp"] - sorted_commits[0]["timestamp"]).days
870
+ confidence = min(0.9, time_span / (context["weeks_analyzed"] * 7))
871
+
872
+ return {
873
+ "classification": classification,
874
+ "confidence": confidence,
875
+ "trend_percentage": round(trend_percentage, 1),
876
+ "description": description,
877
+ }
878
+
879
+ def _calculate_project_health_indicators(
880
+ self, project_commits: list[dict[str, Any]], context: dict[str, Any]
881
+ ) -> dict[str, Any]:
882
+ """Calculate various health indicators for a project."""
883
+
884
+ # Activity level
885
+ weekly_commits = len(project_commits) / max(context["weeks_analyzed"], 1)
886
+ activity_score = min(100, weekly_commits * 15) # Scale appropriately
887
+
888
+ # Contributor diversity
889
+ contributors = set(c.get("canonical_id", c.get("author_email")) for c in project_commits)
890
+ diversity_score = min(100, len(contributors) * 25) # Max score with 4+ contributors
891
+
892
+ # PR velocity (if available)
893
+ pr_velocity_score = 75 # Default neutral score when PR data not available
894
+
895
+ # Ticket coverage
896
+ commits_with_tickets = sum(1 for c in project_commits if c.get("ticket_references"))
897
+ ticket_coverage = (
898
+ (commits_with_tickets / len(project_commits)) * 100 if project_commits else 0
899
+ )
900
+
901
+ # Overall health calculation
902
+ indicators = {
903
+ "activity_level": {
904
+ "score": round(activity_score, 1),
905
+ "description": f"{weekly_commits:.1f} commits per week",
906
+ "status": (
907
+ "excellent"
908
+ if activity_score >= 80
909
+ else "good" if activity_score >= 60 else "needs_improvement"
910
+ ),
911
+ },
912
+ "contributor_diversity": {
913
+ "score": round(diversity_score, 1),
914
+ "description": f"{len(contributors)} active contributors",
915
+ "status": (
916
+ "excellent"
917
+ if len(contributors) >= 4
918
+ else "good" if len(contributors) >= 2 else "concerning"
919
+ ),
920
+ },
921
+ "pr_velocity": {
922
+ "score": pr_velocity_score,
923
+ "description": "PR data not available",
924
+ "status": "unknown",
925
+ },
926
+ "ticket_coverage": {
927
+ "score": round(ticket_coverage, 1),
928
+ "description": f"{ticket_coverage:.1f}% commits linked to tickets",
929
+ "status": (
930
+ "excellent"
931
+ if ticket_coverage >= 80
932
+ else "good" if ticket_coverage >= 60 else "needs_improvement"
933
+ ),
934
+ },
935
+ }
936
+
937
+ # Calculate overall health score
938
+ overall_score = statistics.mean(
939
+ [
940
+ indicators["activity_level"]["score"],
941
+ indicators["contributor_diversity"]["score"],
942
+ indicators["ticket_coverage"]["score"],
943
+ ]
944
+ )
945
+
946
+ indicators["overall_health"] = {
947
+ "score": round(overall_score, 1),
948
+ "status": (
949
+ "excellent"
950
+ if overall_score >= 80
951
+ else "good" if overall_score >= 60 else "needs_improvement"
952
+ ),
953
+ }
954
+
955
+ return indicators
956
+
957
+ def _detect_technical_debt_signals(
958
+ self, project_commits: list[dict[str, Any]], context: dict[str, Any]
959
+ ) -> list[dict[str, Any]]:
960
+ """Detect signals of technical debt accumulation."""
961
+
962
+ signals = []
963
+
964
+ # Large commit pattern (potential code quality issue)
965
+ large_commits = []
966
+ for commit in project_commits:
967
+ lines_changed = commit.get(
968
+ "filtered_insertions", commit.get("insertions", 0)
969
+ ) + commit.get("filtered_deletions", commit.get("deletions", 0))
970
+ if lines_changed > self.thresholds["large_commit_lines"]:
971
+ large_commits.append(commit)
972
+
973
+ if len(large_commits) > len(project_commits) * 0.2:
974
+ signals.append(
975
+ {
976
+ "type": "large_commits",
977
+ "severity": "medium",
978
+ "description": f"{len(large_commits)} commits exceed {self.thresholds['large_commit_lines']} lines",
979
+ "impact": "Difficult code review, potential quality issues",
980
+ "recommendation": "Break down changes into smaller, focused commits",
981
+ }
982
+ )
983
+
984
+ # Fix-heavy pattern analysis
985
+ fix_commits = []
986
+ for commit in project_commits:
987
+ message = commit.get("message", "").lower()
988
+ if any(keyword in message for keyword in ["fix", "bug", "hotfix", "patch"]):
989
+ fix_commits.append(commit)
990
+
991
+ fix_percentage = (len(fix_commits) / len(project_commits)) * 100 if project_commits else 0
992
+ if fix_percentage > 30: # More than 30% fix commits
993
+ signals.append(
994
+ {
995
+ "type": "high_fix_ratio",
996
+ "severity": "high",
997
+ "description": f"{fix_percentage:.1f}% of commits are fixes",
998
+ "impact": "Indicates quality issues in initial development",
999
+ "recommendation": "Improve testing and code review processes",
1000
+ }
1001
+ )
1002
+
1003
+ return signals
1004
+
1005
+ def _assess_delivery_predictability(
1006
+ self, project_commits: list[dict[str, Any]], context: dict[str, Any]
1007
+ ) -> dict[str, Any]:
1008
+ """Assess how predictable project delivery patterns are."""
1009
+
1010
+ if len(project_commits) < 7:
1011
+ return {
1012
+ "score": 0,
1013
+ "status": "insufficient_data",
1014
+ "description": "Not enough data for predictability analysis",
1015
+ }
1016
+
1017
+ # Calculate weekly commit consistency
1018
+ weekly_counts = defaultdict(int)
1019
+ for commit in project_commits:
1020
+ week_key = self._get_week_start(commit["timestamp"]).strftime("%Y-%m-%d")
1021
+ weekly_counts[week_key] += 1
1022
+
1023
+ weekly_values = list(weekly_counts.values())
1024
+
1025
+ if len(weekly_values) < 2:
1026
+ predictability_score = 50 # Neutral score
1027
+ else:
1028
+ mean_weekly = statistics.mean(weekly_values)
1029
+ std_weekly = statistics.pstdev(weekly_values)
1030
+
1031
+ # Lower standard deviation = higher predictability
1032
+ consistency = max(0, 100 - (std_weekly / max(mean_weekly, 1) * 100))
1033
+ predictability_score = min(100, consistency)
1034
+
1035
+ # Determine status
1036
+ if predictability_score >= 80:
1037
+ status = "highly_predictable"
1038
+ elif predictability_score >= 60:
1039
+ status = "moderately_predictable"
1040
+ else:
1041
+ status = "unpredictable"
1042
+
1043
+ return {
1044
+ "score": round(predictability_score, 1),
1045
+ "status": status,
1046
+ "description": f"Delivery shows {status.replace('_', ' ')} patterns",
1047
+ }
1048
+
1049
+ def _assess_project_risks(
1050
+ self, project_commits: list[dict[str, Any]], context: dict[str, Any]
1051
+ ) -> list[dict[str, Any]]:
1052
+ """Assess various risks for the project."""
1053
+
1054
+ risks = []
1055
+
1056
+ # Single contributor dependency risk
1057
+ contributors = defaultdict(int)
1058
+ for commit in project_commits:
1059
+ dev_id = commit.get("canonical_id", commit.get("author_email"))
1060
+ contributors[dev_id] += 1
1061
+
1062
+ if len(contributors) == 1:
1063
+ risks.append(
1064
+ {
1065
+ "type": "single_contributor",
1066
+ "severity": "high",
1067
+ "description": "Project depends on single contributor",
1068
+ "probability": "high",
1069
+ "impact": "Project abandonment risk if contributor leaves",
1070
+ "mitigation": "Involve additional team members in project",
1071
+ }
1072
+ )
1073
+ elif len(contributors) > 1:
1074
+ top_contributor_pct = (max(contributors.values()) / sum(contributors.values())) * 100
1075
+ if top_contributor_pct > 80:
1076
+ risks.append(
1077
+ {
1078
+ "type": "contributor_concentration",
1079
+ "severity": "medium",
1080
+ "description": f"Top contributor handles {top_contributor_pct:.1f}% of work",
1081
+ "probability": "medium",
1082
+ "impact": "Knowledge concentration risk",
1083
+ "mitigation": "Distribute knowledge and responsibilities",
1084
+ }
1085
+ )
1086
+
1087
+ # Activity decline risk
1088
+ recent_commits = [
1089
+ c for c in project_commits if (datetime.now(timezone.utc) - c["timestamp"]).days <= 14
1090
+ ]
1091
+
1092
+ if len(recent_commits) == 0 and len(project_commits) > 5:
1093
+ risks.append(
1094
+ {
1095
+ "type": "abandonment_risk",
1096
+ "severity": "high",
1097
+ "description": "No activity in past 2 weeks",
1098
+ "probability": "medium",
1099
+ "impact": "Project may be abandoned",
1100
+ "mitigation": "Review project status and resource allocation",
1101
+ }
1102
+ )
1103
+
1104
+ return risks
1105
+
1106
+ def _generate_project_recommendations(
1107
+ self,
1108
+ momentum: dict[str, Any],
1109
+ health_indicators: dict[str, Any],
1110
+ tech_debt_signals: list[dict[str, Any]],
1111
+ risk_assessment: list[dict[str, Any]],
1112
+ ) -> list[dict[str, Any]]:
1113
+ """Generate project-specific recommendations."""
1114
+
1115
+ recommendations = []
1116
+
1117
+ # Momentum-based recommendations
1118
+ if momentum["classification"] == "declining":
1119
+ recommendations.append(
1120
+ {
1121
+ "priority": "high",
1122
+ "category": "momentum",
1123
+ "title": "Address Declining Activity",
1124
+ "action": "Investigate causes of reduced activity and reallocate resources",
1125
+ "expected_outcome": "Restored project momentum",
1126
+ }
1127
+ )
1128
+
1129
+ # Health-based recommendations
1130
+ overall_health = health_indicators.get("overall_health", {})
1131
+ if overall_health.get("status") == "needs_improvement":
1132
+ recommendations.append(
1133
+ {
1134
+ "priority": "medium",
1135
+ "category": "health",
1136
+ "title": "Improve Project Health Metrics",
1137
+ "action": "Focus on activity consistency and contributor engagement",
1138
+ "expected_outcome": "Better project sustainability",
1139
+ }
1140
+ )
1141
+
1142
+ # Technical debt recommendations
1143
+ high_severity_debt = [s for s in tech_debt_signals if s.get("severity") == "high"]
1144
+ if high_severity_debt:
1145
+ recommendations.append(
1146
+ {
1147
+ "priority": "high",
1148
+ "category": "quality",
1149
+ "title": "Address Technical Debt",
1150
+ "action": high_severity_debt[0].get(
1151
+ "recommendation", "Improve code quality practices"
1152
+ ),
1153
+ "expected_outcome": "Reduced maintenance burden",
1154
+ }
1155
+ )
1156
+
1157
+ # Risk-based recommendations
1158
+ high_severity_risks = [r for r in risk_assessment if r.get("severity") == "high"]
1159
+ if high_severity_risks:
1160
+ recommendations.append(
1161
+ {
1162
+ "priority": "critical",
1163
+ "category": "risk",
1164
+ "title": "Mitigate Critical Risks",
1165
+ "action": high_severity_risks[0].get(
1166
+ "mitigation", "Address identified risk factors"
1167
+ ),
1168
+ "expected_outcome": "Improved project stability",
1169
+ }
1170
+ )
1171
+
1172
+ return recommendations[:3] # Top 3 recommendations per project
1173
+
1174
+ def _generate_project_narrative(
1175
+ self,
1176
+ project_key: str,
1177
+ momentum: dict[str, Any],
1178
+ health_indicators: dict[str, Any],
1179
+ risk_assessment: list[dict[str, Any]],
1180
+ ) -> str:
1181
+ """Generate narrative summary for a project."""
1182
+
1183
+ narrative_parts = []
1184
+
1185
+ # Project momentum
1186
+ momentum_descriptions = {
1187
+ "growing": "showing strong growth momentum",
1188
+ "stable": "maintaining steady progress",
1189
+ "declining": "experiencing declining activity",
1190
+ "insufficient_data": "lacking sufficient activity data",
1191
+ }
1192
+
1193
+ momentum_desc = momentum_descriptions.get(momentum["classification"], "in unclear state")
1194
+ narrative_parts.append(f"Project {project_key} is {momentum_desc}.")
1195
+
1196
+ # Health status
1197
+ overall_health = health_indicators.get("overall_health", {})
1198
+ health_score = overall_health.get("score", 0)
1199
+ narrative_parts.append(f"Overall project health scores {health_score:.1f}/100.")
1200
+
1201
+ # Key strengths or concerns
1202
+ activity = health_indicators.get("activity_level", {})
1203
+ contributors = health_indicators.get("contributor_diversity", {})
1204
+
1205
+ if contributors.get("status") == "concerning":
1206
+ narrative_parts.append("Single-contributor dependency presents sustainability risk.")
1207
+ elif activity.get("status") == "excellent":
1208
+ narrative_parts.append("Strong activity levels indicate healthy development pace.")
1209
+
1210
+ # Risk highlights
1211
+ high_risks = [r for r in risk_assessment if r.get("severity") == "high"]
1212
+ if high_risks:
1213
+ narrative_parts.append(
1214
+ f"Critical attention needed for {high_risks[0]['type'].replace('_', ' ')} risk."
1215
+ )
1216
+
1217
+ return " ".join(narrative_parts)
1218
+
1219
+ # Developer Analysis Helper Methods
1220
+
1221
+ def _analyze_contribution_patterns(
1222
+ self, dev_commits: list[dict[str, Any]], context: dict[str, Any]
1223
+ ) -> dict[str, Any]:
1224
+ """Analyze individual developer contribution patterns."""
1225
+
1226
+ if not dev_commits:
1227
+ return {"pattern": "no_activity", "confidence": 0.0}
1228
+
1229
+ # Temporal consistency analysis
1230
+ weekly_commits = self._get_weekly_commit_counts(dev_commits)
1231
+ active_weeks = sum(1 for w in weekly_commits if w > 0)
1232
+ total_weeks = len(weekly_commits) if weekly_commits else 1
1233
+ consistency_rate = active_weeks / total_weeks
1234
+
1235
+ # Commit size consistency
1236
+ commit_sizes = []
1237
+ for commit in dev_commits:
1238
+ lines = commit.get("filtered_insertions", commit.get("insertions", 0)) + commit.get(
1239
+ "filtered_deletions", commit.get("deletions", 0)
1240
+ )
1241
+ commit_sizes.append(lines)
1242
+
1243
+ avg_commit_size = statistics.mean(commit_sizes) if commit_sizes else 0
1244
+ size_consistency = (
1245
+ 100 - (statistics.pstdev(commit_sizes) / max(avg_commit_size, 1) * 100)
1246
+ if len(commit_sizes) > 1
1247
+ else 50
1248
+ )
1249
+
1250
+ # Pattern classification
1251
+ total_commits = len(dev_commits)
1252
+
1253
+ if (
1254
+ total_commits >= self.thresholds["high_productivity_commits"]
1255
+ and consistency_rate >= 0.7
1256
+ ):
1257
+ pattern = "consistent_high_performer"
1258
+ confidence = 0.9
1259
+ elif total_commits >= self.thresholds["high_productivity_commits"]:
1260
+ pattern = "high_volume_irregular"
1261
+ confidence = 0.8
1262
+ elif consistency_rate >= 0.7:
1263
+ pattern = "consistent_steady"
1264
+ confidence = 0.8
1265
+ elif consistency_rate < 0.3:
1266
+ pattern = "sporadic"
1267
+ confidence = 0.7
1268
+ else:
1269
+ pattern = "moderate_irregular"
1270
+ confidence = 0.6
1271
+
1272
+ return {
1273
+ "pattern": pattern,
1274
+ "confidence": confidence,
1275
+ "consistency_rate": round(consistency_rate, 2),
1276
+ "avg_commit_size": round(avg_commit_size, 1),
1277
+ "size_consistency_score": round(max(0, size_consistency), 1),
1278
+ "total_commits": total_commits,
1279
+ "active_weeks": active_weeks,
1280
+ "description": self._get_pattern_description(pattern, consistency_rate, total_commits),
1281
+ }
1282
+
1283
+ def _get_pattern_description(
1284
+ self, pattern: str, consistency_rate: float, total_commits: int
1285
+ ) -> str:
1286
+ """Get human-readable description of contribution pattern."""
1287
+
1288
+ descriptions = {
1289
+ "consistent_high_performer": f"Highly productive with {consistency_rate:.0%} week consistency",
1290
+ "high_volume_irregular": f"High output ({total_commits} commits) but irregular timing",
1291
+ "consistent_steady": f"Steady contributor active {consistency_rate:.0%} of weeks",
1292
+ "moderate_irregular": "Moderate activity with irregular patterns",
1293
+ "sporadic": f"Sporadic activity in {consistency_rate:.0%} of weeks",
1294
+ "no_activity": "No significant activity in analysis period",
1295
+ }
1296
+
1297
+ return descriptions.get(pattern, "Unknown contribution pattern")
1298
+
1299
+ def _calculate_collaboration_score(
1300
+ self, dev_commits: list[dict[str, Any]], context: dict[str, Any]
1301
+ ) -> dict[str, Any]:
1302
+ """Calculate collaboration metrics for a developer."""
1303
+
1304
+ # Project diversity
1305
+ projects_worked = set(c.get("project_key", "UNKNOWN") for c in dev_commits)
1306
+ project_diversity_score = min(100, len(projects_worked) * 25)
1307
+
1308
+ # Cross-project contribution consistency
1309
+ project_commit_counts = defaultdict(int)
1310
+ for commit in dev_commits:
1311
+ project_key = commit.get("project_key", "UNKNOWN")
1312
+ project_commit_counts[project_key] += 1
1313
+
1314
+ if len(project_commit_counts) > 1:
1315
+ # Calculate how evenly distributed work is across projects
1316
+ commit_values = list(project_commit_counts.values())
1317
+ gini = self._calculate_gini_coefficient(commit_values)
1318
+ distribution_score = (1 - gini) * 100 # Lower Gini = more even distribution
1319
+ else:
1320
+ distribution_score = 50 # Neutral for single project
1321
+
1322
+ # Overall collaboration score
1323
+ collaboration_score = project_diversity_score * 0.6 + distribution_score * 0.4
1324
+
1325
+ # Collaboration level classification
1326
+ if collaboration_score >= 80:
1327
+ level = "highly_collaborative"
1328
+ elif collaboration_score >= 60:
1329
+ level = "moderately_collaborative"
1330
+ elif collaboration_score >= 40:
1331
+ level = "focused_contributor"
1332
+ else:
1333
+ level = "single_focus"
1334
+
1335
+ return {
1336
+ "score": round(collaboration_score, 1),
1337
+ "level": level,
1338
+ "projects_count": len(projects_worked),
1339
+ "project_diversity_score": round(project_diversity_score, 1),
1340
+ "work_distribution_score": round(distribution_score, 1),
1341
+ "projects_list": sorted(list(projects_worked)),
1342
+ "description": f"{level.replace('_', ' ').title()} - active in {len(projects_worked)} projects",
1343
+ }
1344
+
1345
+ def _identify_expertise_domains(
1346
+ self, dev_commits: list[dict[str, Any]], context: dict[str, Any]
1347
+ ) -> list[dict[str, Any]]:
1348
+ """Identify developer expertise domains based on file patterns and projects."""
1349
+
1350
+ domains = []
1351
+
1352
+ # Analyze file patterns (simplified - in real implementation would use file extensions)
1353
+ total_commits = len(dev_commits)
1354
+ project_contributions = defaultdict(int)
1355
+
1356
+ for commit in dev_commits:
1357
+ project_key = commit.get("project_key", "UNKNOWN")
1358
+ project_contributions[project_key] += 1
1359
+
1360
+ # Create expertise domains based on project contributions
1361
+ for project, commit_count in project_contributions.items():
1362
+ contribution_percentage = (commit_count / total_commits) * 100
1363
+
1364
+ if contribution_percentage >= 30:
1365
+ expertise_level = "expert"
1366
+ elif contribution_percentage >= 15:
1367
+ expertise_level = "proficient"
1368
+ else:
1369
+ expertise_level = "familiar"
1370
+
1371
+ domains.append(
1372
+ {
1373
+ "domain": project,
1374
+ "expertise_level": expertise_level,
1375
+ "contribution_percentage": round(contribution_percentage, 1),
1376
+ "commit_count": commit_count,
1377
+ "confidence": min(
1378
+ 0.9, commit_count / 20
1379
+ ), # Higher confidence with more commits
1380
+ }
1381
+ )
1382
+
1383
+ # Sort by contribution percentage
1384
+ domains.sort(key=lambda x: x["contribution_percentage"], reverse=True)
1385
+
1386
+ return domains[:5] # Top 5 domains
1387
+
1388
+ def _analyze_growth_trajectory(
1389
+ self, dev_commits: list[dict[str, Any]], context: dict[str, Any]
1390
+ ) -> dict[str, Any]:
1391
+ """Analyze developer growth trajectory over time."""
1392
+
1393
+ if len(dev_commits) < 4:
1394
+ return {
1395
+ "trajectory": "insufficient_data",
1396
+ "confidence": 0.1,
1397
+ "description": "Not enough data for growth analysis",
1398
+ }
1399
+
1400
+ # Sort commits chronologically
1401
+ sorted_commits = sorted(dev_commits, key=lambda x: x["timestamp"])
1402
+
1403
+ # Split into quarters for trend analysis
1404
+ quarter_size = len(sorted_commits) // 4
1405
+ if quarter_size == 0:
1406
+ return {
1407
+ "trajectory": "insufficient_data",
1408
+ "confidence": 0.2,
1409
+ "description": "Insufficient commit history for growth analysis",
1410
+ }
1411
+
1412
+ quarters = []
1413
+ for i in range(4):
1414
+ start_idx = i * quarter_size
1415
+ end_idx = (i + 1) * quarter_size if i < 3 else len(sorted_commits)
1416
+ quarters.append(sorted_commits[start_idx:end_idx])
1417
+
1418
+ # Analyze complexity trends (using commit size as proxy)
1419
+ quarter_complexities = []
1420
+ for quarter in quarters:
1421
+ if not quarter:
1422
+ continue
1423
+ quarter_complexity = statistics.mean(
1424
+ [
1425
+ commit.get("filtered_insertions", commit.get("insertions", 0))
1426
+ + commit.get("filtered_deletions", commit.get("deletions", 0))
1427
+ for commit in quarter
1428
+ ]
1429
+ )
1430
+ quarter_complexities.append(quarter_complexity)
1431
+
1432
+ # Analyze project diversity trends
1433
+ quarter_projects = []
1434
+ for quarter in quarters:
1435
+ projects = set(c.get("project_key", "UNKNOWN") for c in quarter)
1436
+ quarter_projects.append(len(projects))
1437
+
1438
+ # Determine trajectory
1439
+ if len(quarter_complexities) >= 2 and len(quarter_projects) >= 2:
1440
+ complexity_trend = (quarter_complexities[-1] - quarter_complexities[0]) / max(
1441
+ quarter_complexities[0], 1
1442
+ )
1443
+ project_trend = quarter_projects[-1] - quarter_projects[0]
1444
+
1445
+ if complexity_trend > 0.2 or project_trend > 0:
1446
+ trajectory = "growing"
1447
+ description = "Increasing complexity and scope of contributions"
1448
+ elif complexity_trend < -0.2 and project_trend < 0:
1449
+ trajectory = "declining"
1450
+ description = "Decreasing complexity and scope of work"
1451
+ else:
1452
+ trajectory = "stable"
1453
+ description = "Consistent level of contribution complexity"
1454
+
1455
+ confidence = min(0.8, len(sorted_commits) / 50) # Higher confidence with more data
1456
+ else:
1457
+ trajectory = "stable"
1458
+ description = "Stable contribution pattern"
1459
+ confidence = 0.5
1460
+
1461
+ return {
1462
+ "trajectory": trajectory,
1463
+ "confidence": confidence,
1464
+ "description": description,
1465
+ "complexity_trend": (
1466
+ round(complexity_trend * 100, 1) if "complexity_trend" in locals() else 0
1467
+ ),
1468
+ "project_expansion": project_trend if "project_trend" in locals() else 0,
1469
+ }
1470
+
1471
+ def _detect_burnout_indicators(
1472
+ self, dev_commits: list[dict[str, Any]], context: dict[str, Any]
1473
+ ) -> list[dict[str, Any]]:
1474
+ """Detect potential burnout indicators for a developer."""
1475
+
1476
+ indicators = []
1477
+
1478
+ # Weekend work pattern
1479
+ weekend_commits = sum(
1480
+ 1
1481
+ for c in dev_commits
1482
+ if hasattr(c.get("timestamp"), "weekday") and c["timestamp"].weekday() >= 5
1483
+ )
1484
+ weekend_percentage = (weekend_commits / len(dev_commits)) * 100 if dev_commits else 0
1485
+
1486
+ if weekend_percentage > 40: # More than 40% weekend work
1487
+ indicators.append(
1488
+ {
1489
+ "type": "excessive_weekend_work",
1490
+ "severity": "medium",
1491
+ "description": f"{weekend_percentage:.1f}% of commits made on weekends",
1492
+ "risk_level": "work_life_balance",
1493
+ "confidence": 0.7,
1494
+ }
1495
+ )
1496
+
1497
+ # Late night commits (if timezone info available)
1498
+ late_night_commits = 0
1499
+ for commit in dev_commits:
1500
+ timestamp = commit.get("timestamp")
1501
+ if hasattr(timestamp, "hour") and (timestamp.hour >= 22 or timestamp.hour <= 5):
1502
+ # 10 PM to 5 AM
1503
+ late_night_commits += 1
1504
+
1505
+ late_night_percentage = (late_night_commits / len(dev_commits)) * 100 if dev_commits else 0
1506
+ if late_night_percentage > 30:
1507
+ indicators.append(
1508
+ {
1509
+ "type": "late_night_activity",
1510
+ "severity": "medium",
1511
+ "description": f"{late_night_percentage:.1f}% of commits made late night/early morning",
1512
+ "risk_level": "work_life_balance",
1513
+ "confidence": 0.6,
1514
+ }
1515
+ )
1516
+
1517
+ # Declining commit quality (increasing size without proportional impact)
1518
+ recent_commits = sorted(dev_commits, key=lambda x: x["timestamp"])[-10:] # Last 10 commits
1519
+ if len(recent_commits) >= 5:
1520
+ recent_sizes = [
1521
+ c.get("filtered_insertions", c.get("insertions", 0))
1522
+ + c.get("filtered_deletions", c.get("deletions", 0))
1523
+ for c in recent_commits
1524
+ ]
1525
+ avg_recent_size = statistics.mean(recent_sizes)
1526
+
1527
+ if avg_recent_size > self.thresholds["large_commit_lines"]:
1528
+ indicators.append(
1529
+ {
1530
+ "type": "increasing_commit_sizes",
1531
+ "severity": "low",
1532
+ "description": f"Recent commits average {avg_recent_size:.0f} lines",
1533
+ "risk_level": "productivity",
1534
+ "confidence": 0.5,
1535
+ }
1536
+ )
1537
+
1538
+ return indicators
1539
+
1540
+ def _generate_career_recommendations(
1541
+ self,
1542
+ contribution_pattern: dict[str, Any],
1543
+ collaboration_score: dict[str, Any],
1544
+ expertise_domains: list[dict[str, Any]],
1545
+ growth_trajectory: dict[str, Any],
1546
+ burnout_indicators: list[dict[str, Any]],
1547
+ ) -> list[dict[str, Any]]:
1548
+ """Generate career development recommendations for a developer."""
1549
+
1550
+ recommendations = []
1551
+
1552
+ # Pattern-based recommendations
1553
+ pattern = contribution_pattern.get("pattern", "")
1554
+ if pattern == "sporadic":
1555
+ recommendations.append(
1556
+ {
1557
+ "category": "consistency",
1558
+ "title": "Improve Contribution Consistency",
1559
+ "action": "Establish regular development schedule and focus on smaller, frequent commits",
1560
+ "priority": "medium",
1561
+ "expected_benefit": "Better project integration and skill development",
1562
+ }
1563
+ )
1564
+ elif pattern == "high_volume_irregular":
1565
+ recommendations.append(
1566
+ {
1567
+ "category": "work_balance",
1568
+ "title": "Balance Workload Distribution",
1569
+ "action": "Spread work more evenly across time periods to improve sustainability",
1570
+ "priority": "medium",
1571
+ "expected_benefit": "Reduced burnout risk and more consistent output",
1572
+ }
1573
+ )
1574
+
1575
+ # Collaboration recommendations
1576
+ collab_level = collaboration_score.get("level", "")
1577
+ if collab_level == "single_focus":
1578
+ recommendations.append(
1579
+ {
1580
+ "category": "growth",
1581
+ "title": "Expand Project Involvement",
1582
+ "action": "Contribute to additional projects to broaden experience and impact",
1583
+ "priority": "low",
1584
+ "expected_benefit": "Increased versatility and cross-team collaboration",
1585
+ }
1586
+ )
1587
+ elif collab_level == "highly_collaborative":
1588
+ recommendations.append(
1589
+ {
1590
+ "category": "leadership",
1591
+ "title": "Consider Technical Leadership Role",
1592
+ "action": "Leverage cross-project experience to mentor others and guide architecture decisions",
1593
+ "priority": "low",
1594
+ "expected_benefit": "Career advancement and increased impact",
1595
+ }
1596
+ )
1597
+
1598
+ # Growth trajectory recommendations
1599
+ trajectory = growth_trajectory.get("trajectory", "")
1600
+ if trajectory == "declining":
1601
+ recommendations.append(
1602
+ {
1603
+ "category": "engagement",
1604
+ "title": "Address Declining Engagement",
1605
+ "action": "Discuss career goals and explore new challenges or responsibilities",
1606
+ "priority": "high",
1607
+ "expected_benefit": "Renewed motivation and career development",
1608
+ }
1609
+ )
1610
+ elif trajectory == "stable":
1611
+ recommendations.append(
1612
+ {
1613
+ "category": "development",
1614
+ "title": "Seek New Challenges",
1615
+ "action": "Take on more complex tasks or explore new technology areas",
1616
+ "priority": "medium",
1617
+ "expected_benefit": "Continued skill development and career growth",
1618
+ }
1619
+ )
1620
+
1621
+ # Burnout prevention recommendations
1622
+ if burnout_indicators:
1623
+ high_severity = [i for i in burnout_indicators if i.get("severity") == "high"]
1624
+ if high_severity or len(burnout_indicators) >= 2:
1625
+ recommendations.append(
1626
+ {
1627
+ "category": "wellbeing",
1628
+ "title": "Address Work-Life Balance",
1629
+ "action": "Review working patterns and implement better time boundaries",
1630
+ "priority": "high",
1631
+ "expected_benefit": "Improved wellbeing and sustainable productivity",
1632
+ }
1633
+ )
1634
+
1635
+ return recommendations[:4] # Top 4 recommendations
1636
+
1637
+ def _generate_developer_narrative(
1638
+ self,
1639
+ developer_name: str,
1640
+ contribution_pattern: dict[str, Any],
1641
+ expertise_domains: list[dict[str, Any]],
1642
+ growth_trajectory: dict[str, Any],
1643
+ ) -> str:
1644
+ """Generate narrative summary for a developer."""
1645
+
1646
+ narrative_parts = []
1647
+
1648
+ # Developer introduction with pattern
1649
+ pattern_desc = contribution_pattern.get("description", "shows mixed activity patterns")
1650
+ narrative_parts.append(f"{developer_name} {pattern_desc}.")
1651
+
1652
+ # Expertise areas
1653
+ if expertise_domains and len(expertise_domains) > 0:
1654
+ primary_domain = expertise_domains[0]
1655
+ if len(expertise_domains) == 1:
1656
+ narrative_parts.append(
1657
+ f"Primary expertise in {primary_domain['domain']} with {primary_domain['expertise_level']} level proficiency."
1658
+ )
1659
+ else:
1660
+ narrative_parts.append(
1661
+ f"Multi-domain contributor with {primary_domain['expertise_level']} expertise in {primary_domain['domain']} and experience across {len(expertise_domains)} areas."
1662
+ )
1663
+
1664
+ # Growth trajectory
1665
+ trajectory = growth_trajectory.get("trajectory", "stable")
1666
+ trajectory_desc = growth_trajectory.get("description", "")
1667
+ if trajectory == "growing":
1668
+ narrative_parts.append(
1669
+ f"Shows positive growth trajectory with {trajectory_desc.lower()}."
1670
+ )
1671
+ elif trajectory == "declining":
1672
+ narrative_parts.append(f"Attention needed: {trajectory_desc.lower()}.")
1673
+ else:
1674
+ narrative_parts.append(f"Maintains {trajectory_desc.lower()}.")
1675
+
1676
+ return " ".join(narrative_parts)
1677
+
1678
+ # Workflow Analysis Helper Methods
1679
+
1680
+ def _assess_git_pm_correlation(
1681
+ self, commits: list[dict[str, Any]], pm_data: dict[str, Any], context: dict[str, Any]
1682
+ ) -> dict[str, Any]:
1683
+ """Assess effectiveness of Git-PM platform correlation."""
1684
+
1685
+ if not pm_data or not pm_data.get("correlations"):
1686
+ return {
1687
+ "effectiveness": "no_integration",
1688
+ "description": "No PM platform integration detected",
1689
+ "score": 0,
1690
+ "confidence": 0.9,
1691
+ }
1692
+
1693
+ correlations = pm_data.get("correlations", [])
1694
+ total_correlations = len(correlations)
1695
+
1696
+ # Analyze correlation quality
1697
+ high_confidence = sum(1 for c in correlations if c.get("confidence", 0) > 0.8)
1698
+ medium_confidence = sum(1 for c in correlations if 0.5 <= c.get("confidence", 0) <= 0.8)
1699
+
1700
+ # Calculate effectiveness score
1701
+ if total_correlations > 0:
1702
+ quality_score = (
1703
+ (high_confidence * 1.0 + medium_confidence * 0.6) / total_correlations * 100
1704
+ )
1705
+ else:
1706
+ quality_score = 0
1707
+
1708
+ # Determine effectiveness level
1709
+ if quality_score >= 80:
1710
+ effectiveness = "highly_effective"
1711
+ elif quality_score >= 60:
1712
+ effectiveness = "moderately_effective"
1713
+ elif quality_score >= 40:
1714
+ effectiveness = "partially_effective"
1715
+ else:
1716
+ effectiveness = "ineffective"
1717
+
1718
+ return {
1719
+ "effectiveness": effectiveness,
1720
+ "description": f"{effectiveness.replace('_', ' ').title()} with {quality_score:.1f}% correlation quality",
1721
+ "score": round(quality_score, 1),
1722
+ "confidence": 0.8,
1723
+ "correlation_breakdown": {
1724
+ "total": total_correlations,
1725
+ "high_confidence": high_confidence,
1726
+ "medium_confidence": medium_confidence,
1727
+ "low_confidence": total_correlations - high_confidence - medium_confidence,
1728
+ },
1729
+ }
1730
+
1731
+ def _identify_process_bottlenecks(
1732
+ self, commits: list[dict[str, Any]], context: dict[str, Any]
1733
+ ) -> list[dict[str, Any]]:
1734
+ """Identify potential process bottlenecks."""
1735
+
1736
+ bottlenecks = []
1737
+
1738
+ # Large commit bottleneck
1739
+ large_commits = sum(
1740
+ 1
1741
+ for c in commits
1742
+ if (
1743
+ c.get("filtered_insertions", c.get("insertions", 0))
1744
+ + c.get("filtered_deletions", c.get("deletions", 0))
1745
+ )
1746
+ > self.thresholds["large_commit_lines"]
1747
+ )
1748
+
1749
+ if large_commits > len(commits) * 0.25: # More than 25% large commits
1750
+ bottlenecks.append(
1751
+ {
1752
+ "type": "large_commit_pattern",
1753
+ "severity": "medium",
1754
+ "description": f"{large_commits} commits exceed {self.thresholds['large_commit_lines']} lines",
1755
+ "impact": "Slower code reviews, increased merge conflicts",
1756
+ "recommendation": "Implement commit size guidelines and encourage smaller, focused changes",
1757
+ }
1758
+ )
1759
+
1760
+ # Irregular commit timing bottleneck
1761
+ daily_commits = defaultdict(int)
1762
+ for commit in commits:
1763
+ day_key = commit["timestamp"].strftime("%Y-%m-%d")
1764
+ daily_commits[day_key] += 1
1765
+
1766
+ daily_values = list(daily_commits.values())
1767
+ if daily_values and len(daily_values) > 7:
1768
+ daily_std = statistics.pstdev(daily_values)
1769
+ daily_mean = statistics.mean(daily_values)
1770
+
1771
+ if daily_std > daily_mean: # High variability
1772
+ bottlenecks.append(
1773
+ {
1774
+ "type": "irregular_development_rhythm",
1775
+ "severity": "low",
1776
+ "description": "Highly variable daily commit patterns",
1777
+ "impact": "Unpredictable integration and review workload",
1778
+ "recommendation": "Encourage more consistent development and integration practices",
1779
+ }
1780
+ )
1781
+
1782
+ # Ticket linking bottleneck
1783
+ commits_with_tickets = sum(1 for c in commits if c.get("ticket_references"))
1784
+ ticket_coverage = (commits_with_tickets / len(commits)) * 100 if commits else 0
1785
+
1786
+ if ticket_coverage < self.thresholds["ticket_coverage_poor"]:
1787
+ bottlenecks.append(
1788
+ {
1789
+ "type": "poor_ticket_linking",
1790
+ "severity": "medium",
1791
+ "description": f"Only {ticket_coverage:.1f}% of commits reference tickets",
1792
+ "impact": "Poor traceability and project management visibility",
1793
+ "recommendation": "Implement mandatory ticket referencing and provide training",
1794
+ }
1795
+ )
1796
+
1797
+ return bottlenecks
1798
+
1799
+ def _identify_automation_opportunities(
1800
+ self, commits: list[dict[str, Any]], context: dict[str, Any]
1801
+ ) -> list[dict[str, Any]]:
1802
+ """Identify opportunities for process automation."""
1803
+
1804
+ opportunities = []
1805
+
1806
+ # Repetitive commit message patterns
1807
+ message_patterns = defaultdict(int)
1808
+ for commit in commits:
1809
+ # Simplified pattern detection - look for common prefixes
1810
+ message = commit.get("message", "").lower()
1811
+ words = message.split()
1812
+ if words:
1813
+ first_word = words[0]
1814
+ if first_word in ["fix", "update", "add", "remove", "refactor"]:
1815
+ message_patterns[first_word] += 1
1816
+
1817
+ total_commits = len(commits)
1818
+ for pattern, count in message_patterns.items():
1819
+ percentage = (count / total_commits) * 100
1820
+ if percentage > 30: # More than 30% of commits follow this pattern
1821
+ opportunities.append(
1822
+ {
1823
+ "type": "commit_message_templates",
1824
+ "description": f"{percentage:.1f}% of commits start with '{pattern}'",
1825
+ "potential": "Implement commit message templates and validation",
1826
+ "effort": "low",
1827
+ "impact": "medium",
1828
+ }
1829
+ )
1830
+
1831
+ # Regular fix patterns suggesting test automation needs
1832
+ fix_commits = sum(
1833
+ 1
1834
+ for c in commits
1835
+ if any(keyword in c.get("message", "").lower() for keyword in ["fix", "bug", "hotfix"])
1836
+ )
1837
+ fix_percentage = (fix_commits / total_commits) * 100 if total_commits else 0
1838
+
1839
+ if fix_percentage > 25:
1840
+ opportunities.append(
1841
+ {
1842
+ "type": "automated_testing",
1843
+ "description": f"{fix_percentage:.1f}% of commits are fixes",
1844
+ "potential": "Implement comprehensive automated testing to catch issues earlier",
1845
+ "effort": "high",
1846
+ "impact": "high",
1847
+ }
1848
+ )
1849
+
1850
+ # Deployment frequency analysis
1851
+ deploy_keywords = ["deploy", "release", "version"]
1852
+ deploy_commits = sum(
1853
+ 1
1854
+ for c in commits
1855
+ if any(keyword in c.get("message", "").lower() for keyword in deploy_keywords)
1856
+ )
1857
+
1858
+ weeks_analyzed = context["weeks_analyzed"]
1859
+ deploy_frequency = deploy_commits / max(weeks_analyzed, 1)
1860
+
1861
+ if deploy_frequency < 0.5: # Less than 0.5 deploys per week
1862
+ opportunities.append(
1863
+ {
1864
+ "type": "continuous_deployment",
1865
+ "description": f"Low deployment frequency: {deploy_frequency:.1f} per week",
1866
+ "potential": "Implement continuous deployment pipeline",
1867
+ "effort": "high",
1868
+ "impact": "high",
1869
+ }
1870
+ )
1871
+
1872
+ return opportunities
1873
+
1874
+ def _calculate_compliance_metrics(
1875
+ self,
1876
+ commits: list[dict[str, Any]],
1877
+ project_metrics: dict[str, Any],
1878
+ context: dict[str, Any],
1879
+ ) -> dict[str, Any]:
1880
+ """Calculate various compliance and process adherence metrics."""
1881
+
1882
+ total_commits = len(commits)
1883
+
1884
+ # Ticket coverage compliance
1885
+ commits_with_tickets = sum(1 for c in commits if c.get("ticket_references"))
1886
+ ticket_coverage = (commits_with_tickets / total_commits) * 100 if total_commits else 0
1887
+
1888
+ # Commit message quality compliance
1889
+ descriptive_messages = sum(
1890
+ 1 for c in commits if len(c.get("message", "").split()) >= 3
1891
+ ) # At least 3 words
1892
+ message_quality = (descriptive_messages / total_commits) * 100 if total_commits else 0
1893
+
1894
+ # Size compliance (reasonable commit sizes)
1895
+ appropriate_size_commits = sum(
1896
+ 1
1897
+ for c in commits
1898
+ if 10
1899
+ <= (
1900
+ c.get("filtered_insertions", c.get("insertions", 0))
1901
+ + c.get("filtered_deletions", c.get("deletions", 0))
1902
+ )
1903
+ <= 300
1904
+ )
1905
+ size_compliance = (appropriate_size_commits / total_commits) * 100 if total_commits else 0
1906
+
1907
+ # PR approval compliance (if PR data available - placeholder)
1908
+ pr_approval_rate = 75 # Default assumption when PR data not available
1909
+
1910
+ # Overall compliance score
1911
+ compliance_factors = [ticket_coverage, message_quality, size_compliance, pr_approval_rate]
1912
+ overall_compliance = statistics.mean(compliance_factors)
1913
+
1914
+ return {
1915
+ "overall_score": round(overall_compliance, 1),
1916
+ "ticket_coverage": {
1917
+ "score": round(ticket_coverage, 1),
1918
+ "status": (
1919
+ "excellent"
1920
+ if ticket_coverage >= 80
1921
+ else "good" if ticket_coverage >= 60 else "needs_improvement"
1922
+ ),
1923
+ },
1924
+ "message_quality": {
1925
+ "score": round(message_quality, 1),
1926
+ "status": (
1927
+ "excellent"
1928
+ if message_quality >= 80
1929
+ else "good" if message_quality >= 60 else "needs_improvement"
1930
+ ),
1931
+ },
1932
+ "commit_size_compliance": {
1933
+ "score": round(size_compliance, 1),
1934
+ "status": (
1935
+ "excellent"
1936
+ if size_compliance >= 80
1937
+ else "good" if size_compliance >= 60 else "needs_improvement"
1938
+ ),
1939
+ },
1940
+ "pr_approval_rate": {"score": pr_approval_rate, "status": "good"}, # Placeholder
1941
+ }
1942
+
1943
+ def _analyze_team_collaboration_patterns(
1944
+ self, commits: list[dict[str, Any]], context: dict[str, Any]
1945
+ ) -> dict[str, Any]:
1946
+ """Analyze team collaboration patterns."""
1947
+
1948
+ # Cross-project collaboration analysis
1949
+ developer_projects = defaultdict(set)
1950
+ for commit in commits:
1951
+ dev_id = commit.get("canonical_id", commit.get("author_email"))
1952
+ project_key = commit.get("project_key", "UNKNOWN")
1953
+ developer_projects[dev_id].add(project_key)
1954
+
1955
+ # Count cross-project contributors
1956
+ cross_project_devs = sum(1 for projects in developer_projects.values() if len(projects) > 1)
1957
+ total_devs = len(developer_projects)
1958
+ cross_collaboration_rate = (cross_project_devs / total_devs) * 100 if total_devs else 0
1959
+
1960
+ # Project contributor diversity
1961
+ project_contributors = defaultdict(set)
1962
+ for commit in commits:
1963
+ dev_id = commit.get("canonical_id", commit.get("author_email"))
1964
+ project_key = commit.get("project_key", "UNKNOWN")
1965
+ project_contributors[project_key].add(dev_id)
1966
+
1967
+ avg_contributors_per_project = (
1968
+ statistics.mean([len(contributors) for contributors in project_contributors.values()])
1969
+ if project_contributors
1970
+ else 0
1971
+ )
1972
+
1973
+ # Collaboration score calculation
1974
+ collaboration_factors = [
1975
+ min(100, cross_collaboration_rate * 2), # Cross-project work
1976
+ min(100, avg_contributors_per_project * 25), # Multi-contributor projects
1977
+ ]
1978
+
1979
+ collaboration_score = statistics.mean(collaboration_factors)
1980
+
1981
+ return {
1982
+ "collaboration_score": round(collaboration_score, 1),
1983
+ "cross_project_contributors": cross_project_devs,
1984
+ "cross_collaboration_rate": round(cross_collaboration_rate, 1),
1985
+ "avg_contributors_per_project": round(avg_contributors_per_project, 1),
1986
+ "collaboration_level": (
1987
+ "high"
1988
+ if collaboration_score >= 70
1989
+ else "medium" if collaboration_score >= 40 else "low"
1990
+ ),
1991
+ "patterns": {
1992
+ "multi_project_engagement": cross_collaboration_rate >= 50,
1993
+ "team_project_distribution": avg_contributors_per_project >= 2,
1994
+ "siloed_development": cross_collaboration_rate < 20,
1995
+ },
1996
+ }
1997
+
1998
+ def _generate_process_recommendations(
1999
+ self,
2000
+ git_pm_effectiveness: dict[str, Any],
2001
+ bottlenecks: list[dict[str, Any]],
2002
+ automation_opportunities: list[dict[str, Any]],
2003
+ compliance_metrics: dict[str, Any],
2004
+ ) -> list[dict[str, Any]]:
2005
+ """Generate process improvement recommendations."""
2006
+
2007
+ recommendations = []
2008
+
2009
+ # Git-PM integration recommendations
2010
+ effectiveness = git_pm_effectiveness.get("effectiveness", "")
2011
+ if effectiveness in ["ineffective", "partially_effective"]:
2012
+ recommendations.append(
2013
+ {
2014
+ "priority": "high",
2015
+ "category": "integration",
2016
+ "title": "Improve Git-PM Integration",
2017
+ "action": "Enhance ticket referencing and correlation accuracy",
2018
+ "timeline": "4-6 weeks",
2019
+ "expected_impact": "Better project visibility and tracking",
2020
+ }
2021
+ )
2022
+
2023
+ # Bottleneck recommendations
2024
+ high_severity_bottlenecks = [b for b in bottlenecks if b.get("severity") == "high"]
2025
+ if high_severity_bottlenecks:
2026
+ bottleneck = high_severity_bottlenecks[0]
2027
+ recommendations.append(
2028
+ {
2029
+ "priority": "high",
2030
+ "category": "process_optimization",
2031
+ "title": f"Address {bottleneck['type'].replace('_', ' ').title()}",
2032
+ "action": bottleneck.get("recommendation", "Address identified bottleneck"),
2033
+ "timeline": "2-4 weeks",
2034
+ "expected_impact": bottleneck.get("impact", "Improved process efficiency"),
2035
+ }
2036
+ )
2037
+
2038
+ # Automation recommendations
2039
+ high_impact_automation = [a for a in automation_opportunities if a.get("impact") == "high"]
2040
+ if high_impact_automation:
2041
+ automation = high_impact_automation[0]
2042
+ recommendations.append(
2043
+ {
2044
+ "priority": "medium",
2045
+ "category": "automation",
2046
+ "title": f"Implement {automation['type'].replace('_', ' ').title()}",
2047
+ "action": automation["potential"],
2048
+ "timeline": "6-12 weeks" if automation.get("effort") == "high" else "2-4 weeks",
2049
+ "expected_impact": "Reduced manual effort and improved consistency",
2050
+ }
2051
+ )
2052
+
2053
+ # Compliance recommendations
2054
+ overall_compliance = compliance_metrics.get("overall_score", 0)
2055
+ if overall_compliance < 70:
2056
+ recommendations.append(
2057
+ {
2058
+ "priority": "medium",
2059
+ "category": "compliance",
2060
+ "title": "Improve Process Compliance",
2061
+ "action": "Focus on ticket linking, commit message quality, and size guidelines",
2062
+ "timeline": "4-8 weeks",
2063
+ "expected_impact": "Better process adherence and project visibility",
2064
+ }
2065
+ )
2066
+
2067
+ return recommendations[:4] # Top 4 recommendations
2068
+
2069
+ def _generate_workflow_narrative(
2070
+ self,
2071
+ git_pm_effectiveness: dict[str, Any],
2072
+ bottlenecks: list[dict[str, Any]],
2073
+ compliance_metrics: dict[str, Any],
2074
+ ) -> str:
2075
+ """Generate workflow analysis narrative."""
2076
+
2077
+ narrative_parts = []
2078
+
2079
+ # Git-PM effectiveness
2080
+ git_pm_effectiveness.get("effectiveness", "unknown")
2081
+ effectiveness_desc = git_pm_effectiveness.get("description", "integration status unclear")
2082
+ narrative_parts.append(f"Git-PM platform integration is {effectiveness_desc.lower()}.")
2083
+
2084
+ # Process health
2085
+ compliance_score = compliance_metrics.get("overall_score", 0)
2086
+ if compliance_score >= 80:
2087
+ narrative_parts.append("Development processes show strong compliance and adherence.")
2088
+ elif compliance_score >= 60:
2089
+ narrative_parts.append(
2090
+ "Development processes are generally well-followed with room for improvement."
2091
+ )
2092
+ else:
2093
+ narrative_parts.append(
2094
+ "Development processes need attention to improve compliance and effectiveness."
2095
+ )
2096
+
2097
+ # Bottleneck highlights
2098
+ high_severity_bottlenecks = [b for b in bottlenecks if b.get("severity") == "high"]
2099
+ if high_severity_bottlenecks:
2100
+ narrative_parts.append(
2101
+ f"Critical bottleneck identified: {high_severity_bottlenecks[0]['type'].replace('_', ' ')}."
2102
+ )
2103
+ elif bottlenecks:
2104
+ narrative_parts.append(
2105
+ f"Some process inefficiencies detected, particularly in {bottlenecks[0]['type'].replace('_', ' ')}."
2106
+ )
2107
+
2108
+ return " ".join(narrative_parts)
2109
+
2110
+ # Cross-Analysis Helper Methods
2111
+
2112
+ def _generate_cross_insights(
2113
+ self,
2114
+ executive_analysis: dict[str, Any],
2115
+ project_analysis: dict[str, Any],
2116
+ developer_analysis: dict[str, Any],
2117
+ workflow_analysis: dict[str, Any],
2118
+ ) -> list[dict[str, Any]]:
2119
+ """Generate insights that span multiple analysis dimensions."""
2120
+
2121
+ cross_insights = []
2122
+
2123
+ # Executive-Project alignment insight
2124
+ exec_health = executive_analysis.get("health_assessment", "unknown")
2125
+ project_health_scores = []
2126
+ for project_data in project_analysis.values():
2127
+ health_indicators = project_data.get("health_indicators", {})
2128
+ overall_health = health_indicators.get("overall_health", {})
2129
+ score = overall_health.get("score", 0)
2130
+ project_health_scores.append(score)
2131
+
2132
+ if project_health_scores:
2133
+ avg_project_health = statistics.mean(project_health_scores)
2134
+ if exec_health == "excellent" and avg_project_health < 60:
2135
+ cross_insights.append(
2136
+ {
2137
+ "type": "alignment_mismatch",
2138
+ "title": "Executive-Project Health Disconnect",
2139
+ "description": "Overall team health excellent but individual projects show concerns",
2140
+ "priority": "medium",
2141
+ "recommendation": "Investigate project-specific issues that may not be visible at team level",
2142
+ }
2143
+ )
2144
+
2145
+ # Developer-Workflow correlation insight
2146
+ high_burnout_devs = 0
2147
+ for dev_data in developer_analysis.values():
2148
+ burnout_indicators = dev_data.get("burnout_indicators", [])
2149
+ if len(burnout_indicators) >= 2:
2150
+ high_burnout_devs += 1
2151
+
2152
+ workflow_bottlenecks = workflow_analysis.get("process_bottlenecks", [])
2153
+ if high_burnout_devs > 0 and len(workflow_bottlenecks) > 2:
2154
+ cross_insights.append(
2155
+ {
2156
+ "type": "systemic_issue",
2157
+ "title": "Process Issues Contributing to Developer Stress",
2158
+ "description": f"{high_burnout_devs} developers show burnout indicators alongside {len(workflow_bottlenecks)} process bottlenecks",
2159
+ "priority": "high",
2160
+ "recommendation": "Address workflow inefficiencies to reduce developer burden",
2161
+ }
2162
+ )
2163
+
2164
+ # Project-Developer resource allocation insight
2165
+ declining_projects = sum(
2166
+ 1
2167
+ for p in project_analysis.values()
2168
+ if p.get("momentum", {}).get("classification") == "declining"
2169
+ )
2170
+ declining_developers = sum(
2171
+ 1
2172
+ for d in developer_analysis.values()
2173
+ if d.get("growth_trajectory", {}).get("trajectory") == "declining"
2174
+ )
2175
+
2176
+ if declining_projects > 0 and declining_developers > 0:
2177
+ cross_insights.append(
2178
+ {
2179
+ "type": "resource_allocation",
2180
+ "title": "Coordinated Decline Pattern",
2181
+ "description": f"{declining_projects} projects and {declining_developers} developers showing decline",
2182
+ "priority": "high",
2183
+ "recommendation": "Review resource allocation and team capacity planning",
2184
+ }
2185
+ )
2186
+
2187
+ return cross_insights
2188
+
2189
+ # Utility Helper Methods
2190
+
2191
+ def _get_weekly_commit_counts(self, commits: list[dict[str, Any]]) -> list[int]:
2192
+ """Get commit counts grouped by week."""
2193
+
2194
+ if not commits:
2195
+ return []
2196
+
2197
+ weekly_counts = defaultdict(int)
2198
+ for commit in commits:
2199
+ week_start = self._get_week_start(commit["timestamp"])
2200
+ week_key = week_start.strftime("%Y-%m-%d")
2201
+ weekly_counts[week_key] += 1
2202
+
2203
+ # Return counts in chronological order
2204
+ sorted_weeks = sorted(weekly_counts.keys())
2205
+ return [weekly_counts[week] for week in sorted_weeks]
2206
+
2207
+ def _get_week_start(self, date: datetime) -> datetime:
2208
+ """Get Monday of the week for a given date."""
2209
+
2210
+ # Ensure timezone consistency
2211
+ if hasattr(date, "tzinfo") and date.tzinfo is not None:
2212
+ if date.tzinfo != timezone.utc:
2213
+ date = date.astimezone(timezone.utc)
2214
+ else:
2215
+ date = date.replace(tzinfo=timezone.utc)
2216
+
2217
+ days_since_monday = date.weekday()
2218
+ monday = date - timedelta(days=days_since_monday)
2219
+ return monday.replace(hour=0, minute=0, second=0, microsecond=0)
2220
+
2221
+ def _calculate_gini_coefficient(self, values: list[float]) -> float:
2222
+ """Calculate Gini coefficient for measuring inequality."""
2223
+
2224
+ if not values or len(values) == 1:
2225
+ return 0.0
2226
+
2227
+ sorted_values = sorted([v for v in values if v > 0]) # Filter out zeros
2228
+ if not sorted_values:
2229
+ return 0.0
2230
+
2231
+ n = len(sorted_values)
2232
+ cumsum = np.cumsum(sorted_values)
2233
+
2234
+ return (2 * np.sum((i + 1) * sorted_values[i] for i in range(n))) / (n * cumsum[-1]) - (
2235
+ n + 1
2236
+ ) / n