gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ from datetime import datetime
5
5
  from typing import Any, Union
6
6
 
7
7
  from ..core.cache import GitAnalysisCache
8
+ from ..pm_framework.orchestrator import PMFrameworkOrchestrator
8
9
  from .github_integration import GitHubIntegration
9
10
  from .jira_integration import JIRAIntegration
10
11
 
@@ -14,6 +15,7 @@ class IntegrationOrchestrator:
14
15
 
15
16
  def __init__(self, config: Any, cache: GitAnalysisCache):
16
17
  """Initialize integration orchestrator."""
18
+ print(" 🔍 IntegrationOrchestrator.__init__ called")
17
19
  self.config = config
18
20
  self.cache = cache
19
21
  self.integrations: dict[str, Union[GitHubIntegration, JIRAIntegration]] = {}
@@ -35,19 +37,104 @@ class IntegrationOrchestrator:
35
37
  if hasattr(jira_settings, "enabled") and jira_settings.enabled:
36
38
  base_url = getattr(config.jira, "base_url", None)
37
39
  if base_url:
40
+ # Extract network and proxy settings from jira_settings
38
41
  self.integrations["jira"] = JIRAIntegration(
39
42
  base_url,
40
43
  config.jira.access_user,
41
44
  config.jira.access_token,
42
45
  cache,
43
46
  story_point_fields=getattr(jira_settings, "story_point_fields", None),
47
+ dns_timeout=getattr(jira_settings, "dns_timeout", 10),
48
+ connection_timeout=getattr(jira_settings, "connection_timeout", 30),
49
+ max_retries=getattr(jira_settings, "max_retries", 3),
50
+ backoff_factor=getattr(jira_settings, "backoff_factor", 1.0),
51
+ enable_proxy=getattr(jira_settings, "enable_proxy", False),
52
+ proxy_url=getattr(jira_settings, "proxy_url", None),
44
53
  )
45
54
 
55
+ # Initialize PM framework orchestrator
56
+ self.pm_orchestrator = None
57
+ if (
58
+ hasattr(config, "pm_integration")
59
+ and config.pm_integration
60
+ and config.pm_integration.enabled
61
+ ):
62
+ print(" 🔍 PM Integration detected - building configuration...")
63
+ try:
64
+ # Create PM platform configuration for the orchestrator
65
+ pm_config = {
66
+ "pm_platforms": {},
67
+ "analysis": {
68
+ "pm_integration": {
69
+ "enabled": config.pm_integration.enabled,
70
+ "primary_platform": config.pm_integration.primary_platform,
71
+ "correlation": config.pm_integration.correlation,
72
+ }
73
+ },
74
+ }
75
+
76
+ # Convert PM platform configs to expected format
77
+ platforms_dict = config.pm_integration.platforms
78
+ if hasattr(platforms_dict, "__dict__"):
79
+ # It's an AttrDict, convert to regular dict
80
+ platforms_dict = dict(platforms_dict)
81
+
82
+ for platform_name, platform_config in platforms_dict.items():
83
+ if hasattr(platform_config, "enabled") and platform_config.enabled:
84
+ # Convert AttrDict to regular dict
85
+ platform_config_dict = (
86
+ dict(platform_config.config)
87
+ if hasattr(platform_config.config, "__dict__")
88
+ else platform_config.config
89
+ )
90
+ platform_settings = {
91
+ "enabled": True,
92
+ **platform_config_dict,
93
+ }
94
+
95
+ # Special handling for JIRA - use credentials from top-level JIRA config
96
+ if platform_name == "jira" and hasattr(config, "jira"):
97
+ platform_settings["username"] = config.jira.access_user
98
+ platform_settings["api_token"] = config.jira.access_token
99
+ # Also ensure base_url matches if not set
100
+ if (
101
+ not platform_settings.get("base_url")
102
+ or platform_settings["base_url"] == "will_be_set_at_runtime"
103
+ ):
104
+ platform_settings["base_url"] = config.jira.base_url
105
+ # Add cache directory for ticket caching (config file directory)
106
+ if hasattr(config, "cache") and hasattr(config.cache, "directory"):
107
+ platform_settings["cache_dir"] = config.cache.directory
108
+ # Debug output to check credentials
109
+ print(
110
+ f" 🔍 JIRA config: username={platform_settings['username']}, has_token={bool(platform_settings['api_token'])}, base_url={platform_settings['base_url']}, cache_dir={platform_settings.get('cache_dir', 'not_set')}"
111
+ )
112
+
113
+ pm_config["pm_platforms"][platform_name] = platform_settings
114
+
115
+ # Debug output - show final PM config
116
+ print(
117
+ f" 🔍 Final PM config platforms: {list(pm_config.get('pm_platforms', {}).keys())}"
118
+ )
119
+ for plat_name, plat_config in pm_config.get("pm_platforms", {}).items():
120
+ print(
121
+ f" 🔍 {plat_name}: enabled={plat_config.get('enabled')}, has_username={bool(plat_config.get('username'))}, has_token={bool(plat_config.get('api_token'))}"
122
+ )
123
+
124
+ self.pm_orchestrator = PMFrameworkOrchestrator(pm_config)
125
+ print(
126
+ f"📋 PM Framework initialized with {len(self.pm_orchestrator.get_active_platforms())} platforms"
127
+ )
128
+
129
+ except Exception as e:
130
+ print(f"⚠️ Failed to initialize PM framework: {e}")
131
+ self.pm_orchestrator = None
132
+
46
133
  def enrich_repository_data(
47
134
  self, repo_config: Any, commits: list[dict[str, Any]], since: datetime
48
135
  ) -> dict[str, Any]:
49
136
  """Enrich repository data from all available integrations."""
50
- enrichment: dict[str, Any] = {"prs": [], "issues": [], "pr_metrics": {}}
137
+ enrichment: dict[str, Any] = {"prs": [], "issues": [], "pr_metrics": {}, "pm_data": {}}
51
138
 
52
139
  # GitHub enrichment
53
140
  if "github" in self.integrations and repo_config.github_repo:
@@ -65,7 +152,10 @@ class IntegrationOrchestrator:
65
152
  enrichment["pr_metrics"] = github_integration.calculate_pr_metrics(prs)
66
153
 
67
154
  except Exception as e:
155
+ import traceback
156
+
68
157
  print(f" ⚠️ GitHub enrichment failed: {e}")
158
+ print(f" Debug traceback: {traceback.format_exc()}")
69
159
 
70
160
  # JIRA enrichment for story points
71
161
  if "jira" in self.integrations:
@@ -82,6 +172,39 @@ class IntegrationOrchestrator:
82
172
  except Exception as e:
83
173
  print(f" ⚠️ JIRA enrichment failed: {e}")
84
174
 
175
+ # PM Framework enrichment
176
+ if self.pm_orchestrator and self.pm_orchestrator.is_enabled():
177
+ try:
178
+ print(" 📋 Collecting PM platform data...")
179
+
180
+ # Get all issues from PM platforms
181
+ pm_issues = self.pm_orchestrator.get_all_issues(since=since)
182
+ enrichment["pm_data"]["issues"] = pm_issues
183
+
184
+ # Correlate issues with commits
185
+ correlations = self.pm_orchestrator.correlate_issues_with_commits(
186
+ pm_issues, commits
187
+ )
188
+ enrichment["pm_data"]["correlations"] = correlations
189
+
190
+ # Calculate enhanced metrics
191
+ enhanced_metrics = self.pm_orchestrator.calculate_enhanced_metrics(
192
+ commits, enrichment["prs"], pm_issues, correlations
193
+ )
194
+ enrichment["pm_data"]["metrics"] = enhanced_metrics
195
+
196
+ # Only show correlations message if there are any correlations found
197
+ if correlations:
198
+ print(
199
+ f" ✅ PM correlations found: {len(correlations)} commits linked to issues"
200
+ )
201
+ else:
202
+ print(" 📋 PM data processed (no correlations found)")
203
+
204
+ except Exception as e:
205
+ print(f" ⚠️ PM framework enrichment failed: {e}")
206
+ enrichment["pm_data"] = {"error": str(e)}
207
+
85
208
  return enrichment
86
209
 
87
210
  def get_platform_issues(self, project_key: str, since: datetime) -> list[dict[str, Any]]:
@@ -0,0 +1,322 @@
1
+ """Developer activity scoring module using balanced metrics.
2
+
3
+ Based on research and best practices for measuring developer productivity in 2024,
4
+ this module implements a balanced scoring approach that considers:
5
+ - Commits (baseline activity)
6
+ - Pull Requests (collaboration and review)
7
+ - Lines of Code (impact, with diminishing returns)
8
+ - Code churn (deletions valued for refactoring)
9
+ """
10
+
11
+ import math
12
+ from typing import Any
13
+
14
+
15
+ class ActivityScorer:
16
+ """Calculate balanced developer activity scores based on multiple metrics."""
17
+
18
+ # Weights based on research indicating balanced approach
19
+ WEIGHTS = {
20
+ "commits": 0.25, # Each commit represents baseline effort
21
+ "prs": 0.30, # PRs indicate collaboration and review effort
22
+ "code_impact": 0.30, # Lines changed with diminishing returns
23
+ "complexity": 0.15, # File changes and complexity
24
+ }
25
+
26
+ # Scaling factors based on research
27
+ COMMIT_BASE_SCORE = 10 # Each commit worth base 10 points
28
+ PR_BASE_SCORE = 50 # Each PR worth base 50 points (5x commit)
29
+ OPTIMAL_PR_SIZE = 200 # Research shows PRs under 200 lines are optimal
30
+
31
+ def calculate_activity_score(self, metrics: dict[str, Any]) -> dict[str, Any]:
32
+ """Calculate balanced activity score for a developer.
33
+
34
+ Args:
35
+ metrics: Dictionary containing:
36
+ - commits: Number of commits
37
+ - prs_involved: Number of PRs
38
+ - lines_added: Lines added
39
+ - lines_removed: Lines removed
40
+ - files_changed: Number of files changed
41
+ - complexity_delta: Complexity change
42
+
43
+ Returns:
44
+ Dictionary with:
45
+ - raw_score: Unscaled activity score
46
+ - normalized_score: Score normalized to 0-100
47
+ - components: Breakdown of score components
48
+ """
49
+ # Extract metrics with defaults
50
+ commits = metrics.get("commits", 0)
51
+ prs = metrics.get("prs_involved", 0)
52
+ lines_added = metrics.get("lines_added", 0)
53
+ lines_removed = metrics.get("lines_removed", 0)
54
+ files_changed = metrics.get(
55
+ "files_changed_count",
56
+ (
57
+ metrics.get("files_changed", 0)
58
+ if isinstance(metrics.get("files_changed"), int)
59
+ else len(metrics.get("files_changed", []))
60
+ ),
61
+ )
62
+ complexity = metrics.get("complexity_delta", 0)
63
+
64
+ # Calculate component scores
65
+ commit_score = self._calculate_commit_score(commits)
66
+ pr_score = self._calculate_pr_score(prs, lines_added + lines_removed)
67
+ code_score = self._calculate_code_impact_score(lines_added, lines_removed)
68
+ complexity_score = self._calculate_complexity_score(files_changed, complexity)
69
+
70
+ # Weighted total
71
+ components = {
72
+ "commit_score": commit_score,
73
+ "pr_score": pr_score,
74
+ "code_impact_score": code_score,
75
+ "complexity_score": complexity_score,
76
+ }
77
+
78
+ raw_score = (
79
+ commit_score * self.WEIGHTS["commits"]
80
+ + pr_score * self.WEIGHTS["prs"]
81
+ + code_score * self.WEIGHTS["code_impact"]
82
+ + complexity_score * self.WEIGHTS["complexity"]
83
+ )
84
+
85
+ return {
86
+ "raw_score": raw_score,
87
+ "normalized_score": self._normalize_score(raw_score),
88
+ "components": components,
89
+ "activity_level": self._get_activity_level(raw_score),
90
+ }
91
+
92
+ def _calculate_commit_score(self, commits: int) -> float:
93
+ """Calculate score from commit count with diminishing returns."""
94
+ if commits == 0:
95
+ return 0
96
+
97
+ # Use logarithmic scaling for diminishing returns
98
+ # First 10 commits worth full value, then diminishing
99
+ if commits <= 10:
100
+ return commits * self.COMMIT_BASE_SCORE
101
+ else:
102
+ base = 10 * self.COMMIT_BASE_SCORE
103
+ extra = math.log10(commits - 9) * self.COMMIT_BASE_SCORE * 5
104
+ return base + extra
105
+
106
+ def _calculate_pr_score(self, prs: int, total_lines: int) -> float:
107
+ """Calculate PR score considering optimal PR sizes."""
108
+ if prs == 0:
109
+ return 0
110
+
111
+ base_score = prs * self.PR_BASE_SCORE
112
+
113
+ # Bonus for maintaining optimal PR size
114
+ avg_pr_size = total_lines / prs if prs > 0 else 0
115
+ if avg_pr_size <= self.OPTIMAL_PR_SIZE:
116
+ size_bonus = 1.2 # 20% bonus for optimal size
117
+ else:
118
+ # Penalty for oversized PRs
119
+ size_bonus = max(0.7, 1 - (avg_pr_size - self.OPTIMAL_PR_SIZE) / 1000)
120
+
121
+ return base_score * size_bonus
122
+
123
+ def _calculate_code_impact_score(self, lines_added: int, lines_removed: int) -> float:
124
+ """Calculate code impact score with balanced add/remove consideration and enhanced diminishing returns.
125
+
126
+ WHY: Massive single commits can unfairly inflate scores. This implementation
127
+ uses stronger diminishing returns to prevent score inflation from extremely
128
+ large commits while still rewarding meaningful contributions.
129
+ """
130
+ # Research shows deletions are valuable (refactoring, cleanup)
131
+ # Weight deletions at 70% of additions
132
+ effective_lines = lines_added + (lines_removed * 0.7)
133
+
134
+ if effective_lines == 0:
135
+ return 0
136
+
137
+ # Logarithmic scaling to prevent gaming with massive changes
138
+ # First 500 lines worth full value
139
+ if effective_lines <= 500:
140
+ return effective_lines * 0.2
141
+ else:
142
+ base = 500 * 0.2
143
+ # Enhanced diminishing returns for massive commits
144
+ if effective_lines <= 2000:
145
+ extra = math.log10(effective_lines - 499) * 15 # Reduced multiplier
146
+ else:
147
+ # Very large commits get even more aggressive diminishing returns
148
+ medium_extra = math.log10(2000 - 499) * 15
149
+ large_extra = math.log10(effective_lines - 1999) * 8 # Much smaller multiplier
150
+ extra = medium_extra + large_extra
151
+ return base + extra
152
+
153
+ def _calculate_complexity_score(self, files_changed: int, complexity_delta: float) -> float:
154
+ """Calculate score based on breadth and complexity of changes."""
155
+ if files_changed == 0:
156
+ return 0
157
+
158
+ # Base score from files touched (breadth of impact)
159
+ file_score = min(files_changed * 5, 50) # Cap at 50 points
160
+
161
+ # Complexity factor (can be negative for simplification)
162
+ # Reward simplification (negative complexity delta)
163
+ if complexity_delta < 0:
164
+ complexity_bonus = abs(complexity_delta) * 0.5 # Reward simplification
165
+ else:
166
+ complexity_bonus = -min(
167
+ complexity_delta * 0.2, 10
168
+ ) # Small penalty for added complexity
169
+
170
+ return max(0, file_score + complexity_bonus)
171
+
172
+ def _normalize_score(self, raw_score: float) -> float:
173
+ """Normalize score to 0-100 range."""
174
+ # Based on research, a highly productive week might have:
175
+ # - 15 commits (150 points after scaling)
176
+ # - 3 PRs of optimal size (180 points)
177
+ # - 1000 effective lines (120 points)
178
+ # - 20 files changed (50 points)
179
+ # Total: ~500 points = 100 normalized
180
+
181
+ normalized = (raw_score / 500) * 100
182
+ return min(100, normalized) # Cap at 100
183
+
184
+ def _get_activity_level(self, raw_score: float) -> str:
185
+ """Categorize activity level based on score."""
186
+ normalized = self._normalize_score(raw_score)
187
+
188
+ if normalized >= 80:
189
+ return "exceptional"
190
+ elif normalized >= 60:
191
+ return "high"
192
+ elif normalized >= 40:
193
+ return "moderate"
194
+ elif normalized >= 20:
195
+ return "low"
196
+ else:
197
+ return "minimal"
198
+
199
+ def calculate_team_relative_score(
200
+ self, individual_score: float, team_scores: list[float]
201
+ ) -> dict[str, Any]:
202
+ """Calculate relative performance within team context.
203
+
204
+ Args:
205
+ individual_score: Individual's raw activity score
206
+ team_scores: List of all team members' raw scores
207
+
208
+ Returns:
209
+ Dictionary with percentile and relative metrics
210
+ """
211
+ if not team_scores:
212
+ return {"percentile": 50, "relative_score": 1.0, "team_position": "average"}
213
+
214
+ # Calculate percentile
215
+ scores_below = sum(1 for score in team_scores if score < individual_score)
216
+ percentile = (scores_below / len(team_scores)) * 100
217
+
218
+ # Calculate relative to team average
219
+ team_avg = sum(team_scores) / len(team_scores)
220
+ relative_score = individual_score / team_avg if team_avg > 0 else 1.0
221
+
222
+ # Determine position
223
+ if percentile >= 90:
224
+ position = "top_performer"
225
+ elif percentile >= 75:
226
+ position = "above_average"
227
+ elif percentile >= 25:
228
+ position = "average"
229
+ else:
230
+ position = "below_average"
231
+
232
+ return {
233
+ "percentile": round(percentile, 1),
234
+ "relative_score": round(relative_score, 2),
235
+ "team_position": position,
236
+ "team_average": round(team_avg, 1),
237
+ }
238
+
239
+ def normalize_scores_on_curve(
240
+ self, developer_scores: dict[str, float], curve_mean: float = 50.0, curve_std: float = 15.0
241
+ ) -> dict[str, dict[str, Any]]:
242
+ """Normalize activity scores on a bell curve with quintile grouping.
243
+
244
+ Args:
245
+ developer_scores: Dictionary mapping developer IDs to raw scores
246
+ curve_mean: Target mean for the normalized distribution (default: 50)
247
+ curve_std: Target standard deviation for the distribution (default: 15)
248
+
249
+ Returns:
250
+ Dictionary with normalized scores and quintile groupings
251
+ """
252
+ if not developer_scores:
253
+ return {}
254
+
255
+ # Get all scores
256
+ scores = list(developer_scores.values())
257
+
258
+ # Calculate current statistics
259
+ current_mean = sum(scores) / len(scores)
260
+ variance = sum((x - current_mean) ** 2 for x in scores) / len(scores)
261
+ current_std = math.sqrt(variance) if variance > 0 else 1.0
262
+
263
+ # Normalize to bell curve
264
+ normalized_scores = {}
265
+ for dev_id, raw_score in developer_scores.items():
266
+ # Z-score normalization
267
+ z_score = (raw_score - current_mean) / current_std if current_std > 0 else 0
268
+
269
+ # Transform to target distribution
270
+ curved_score = curve_mean + (z_score * curve_std)
271
+
272
+ # Ensure scores stay in reasonable range (0-100)
273
+ curved_score = max(0, min(100, curved_score))
274
+
275
+ normalized_scores[dev_id] = curved_score
276
+
277
+ # Sort developers by normalized score for quintile assignment
278
+ sorted_devs = sorted(normalized_scores.items(), key=lambda x: x[1])
279
+
280
+ # Assign quintiles
281
+ results = {}
282
+ quintile_size = len(sorted_devs) / 5
283
+
284
+ for idx, (dev_id, curved_score) in enumerate(sorted_devs):
285
+ # Determine quintile (1-5)
286
+ quintile = min(5, int(idx / quintile_size) + 1)
287
+
288
+ # Determine activity level based on quintile
289
+ if quintile == 5:
290
+ activity_level = "exceptional"
291
+ level_description = "Top 20%"
292
+ elif quintile == 4:
293
+ activity_level = "high"
294
+ level_description = "60-80th percentile"
295
+ elif quintile == 3:
296
+ activity_level = "moderate"
297
+ level_description = "40-60th percentile"
298
+ elif quintile == 2:
299
+ activity_level = "low"
300
+ level_description = "20-40th percentile"
301
+ else: # quintile == 1
302
+ activity_level = "minimal"
303
+ level_description = "Bottom 20%"
304
+
305
+ # Calculate exact percentile
306
+ percentile = ((idx + 0.5) / len(sorted_devs)) * 100
307
+
308
+ results[dev_id] = {
309
+ "raw_score": developer_scores[dev_id],
310
+ "curved_score": round(curved_score, 1),
311
+ "quintile": quintile,
312
+ "activity_level": activity_level,
313
+ "level_description": level_description,
314
+ "percentile": round(percentile, 0),
315
+ "z_score": (
316
+ round((developer_scores[dev_id] - current_mean) / current_std, 2)
317
+ if current_std > 0
318
+ else 0
319
+ ),
320
+ }
321
+
322
+ return results