gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4490 -378
- gitflow_analytics/cli_rich.py +503 -0
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -398
- gitflow_analytics/core/analyzer.py +1320 -172
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +1554 -175
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +571 -185
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +77 -59
- gitflow_analytics/extractors/tickets.py +841 -89
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +258 -87
- gitflow_analytics/integrations/jira_integration.py +572 -123
- gitflow_analytics/integrations/orchestrator.py +206 -82
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +542 -179
- gitflow_analytics/models/database.py +986 -59
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +29 -0
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
- gitflow_analytics/qualitative/core/processor.py +673 -0
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +306 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
- gitflow_analytics/qualitative/utils/metrics.py +361 -0
- gitflow_analytics/qualitative/utils/text_processing.py +285 -0
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +550 -18
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1700 -216
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2289 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +724 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
- gitflow_analytics/tui/screens/configuration_screen.py +523 -0
- gitflow_analytics/tui/screens/loading_screen.py +348 -0
- gitflow_analytics/tui/screens/main_screen.py +321 -0
- gitflow_analytics/tui/screens/results_screen.py +722 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +255 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +187 -0
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,119 +1,243 @@
|
|
|
1
1
|
"""Integration orchestrator for multiple platforms."""
|
|
2
|
+
|
|
2
3
|
import json
|
|
3
4
|
from datetime import datetime
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Union
|
|
5
6
|
|
|
6
7
|
from ..core.cache import GitAnalysisCache
|
|
8
|
+
from ..pm_framework.orchestrator import PMFrameworkOrchestrator
|
|
7
9
|
from .github_integration import GitHubIntegration
|
|
8
10
|
from .jira_integration import JIRAIntegration
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class IntegrationOrchestrator:
|
|
12
14
|
"""Orchestrate integrations with multiple platforms."""
|
|
13
|
-
|
|
15
|
+
|
|
14
16
|
def __init__(self, config: Any, cache: GitAnalysisCache):
|
|
15
17
|
"""Initialize integration orchestrator."""
|
|
18
|
+
print(" 🔍 IntegrationOrchestrator.__init__ called")
|
|
16
19
|
self.config = config
|
|
17
20
|
self.cache = cache
|
|
18
|
-
self.integrations = {}
|
|
19
|
-
|
|
21
|
+
self.integrations: dict[str, Union[GitHubIntegration, JIRAIntegration]] = {}
|
|
22
|
+
|
|
20
23
|
# Initialize available integrations
|
|
21
24
|
if config.github and config.github.token:
|
|
22
|
-
self.integrations[
|
|
25
|
+
self.integrations["github"] = GitHubIntegration(
|
|
23
26
|
config.github.token,
|
|
24
27
|
cache,
|
|
25
28
|
config.github.max_retries,
|
|
26
29
|
config.github.backoff_factor,
|
|
27
|
-
allowed_ticket_platforms=getattr(config.analysis,
|
|
30
|
+
allowed_ticket_platforms=getattr(config.analysis, "ticket_platforms", None),
|
|
28
31
|
)
|
|
29
|
-
|
|
32
|
+
|
|
30
33
|
# Initialize JIRA integration if configured
|
|
31
34
|
if config.jira and config.jira.access_user and config.jira.access_token:
|
|
32
35
|
# Get JIRA specific settings if available
|
|
33
|
-
jira_settings = getattr(config,
|
|
34
|
-
if hasattr(jira_settings,
|
|
35
|
-
base_url = getattr(config.jira,
|
|
36
|
+
jira_settings = getattr(config, "jira_integration", {})
|
|
37
|
+
if hasattr(jira_settings, "enabled") and jira_settings.enabled:
|
|
38
|
+
base_url = getattr(config.jira, "base_url", None)
|
|
36
39
|
if base_url:
|
|
37
|
-
|
|
40
|
+
# Extract network and proxy settings from jira_settings
|
|
41
|
+
self.integrations["jira"] = JIRAIntegration(
|
|
38
42
|
base_url,
|
|
39
43
|
config.jira.access_user,
|
|
40
44
|
config.jira.access_token,
|
|
41
45
|
cache,
|
|
42
|
-
story_point_fields=getattr(jira_settings,
|
|
46
|
+
story_point_fields=getattr(jira_settings, "story_point_fields", None),
|
|
47
|
+
dns_timeout=getattr(jira_settings, "dns_timeout", 10),
|
|
48
|
+
connection_timeout=getattr(jira_settings, "connection_timeout", 30),
|
|
49
|
+
max_retries=getattr(jira_settings, "max_retries", 3),
|
|
50
|
+
backoff_factor=getattr(jira_settings, "backoff_factor", 1.0),
|
|
51
|
+
enable_proxy=getattr(jira_settings, "enable_proxy", False),
|
|
52
|
+
proxy_url=getattr(jira_settings, "proxy_url", None),
|
|
43
53
|
)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# GitHub enrichment
|
|
55
|
-
if 'github' in self.integrations and repo_config.github_repo:
|
|
56
|
-
github = self.integrations['github']
|
|
57
|
-
|
|
54
|
+
|
|
55
|
+
# Initialize PM framework orchestrator
|
|
56
|
+
self.pm_orchestrator = None
|
|
57
|
+
if (
|
|
58
|
+
hasattr(config, "pm_integration")
|
|
59
|
+
and config.pm_integration
|
|
60
|
+
and config.pm_integration.enabled
|
|
61
|
+
):
|
|
62
|
+
print(" 🔍 PM Integration detected - building configuration...")
|
|
58
63
|
try:
|
|
59
|
-
#
|
|
60
|
-
|
|
61
|
-
|
|
64
|
+
# Create PM platform configuration for the orchestrator
|
|
65
|
+
pm_config = {
|
|
66
|
+
"pm_platforms": {},
|
|
67
|
+
"analysis": {
|
|
68
|
+
"pm_integration": {
|
|
69
|
+
"enabled": config.pm_integration.enabled,
|
|
70
|
+
"primary_platform": config.pm_integration.primary_platform,
|
|
71
|
+
"correlation": config.pm_integration.correlation,
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Convert PM platform configs to expected format
|
|
77
|
+
platforms_dict = config.pm_integration.platforms
|
|
78
|
+
if hasattr(platforms_dict, "__dict__"):
|
|
79
|
+
# It's an AttrDict, convert to regular dict
|
|
80
|
+
platforms_dict = dict(platforms_dict)
|
|
81
|
+
|
|
82
|
+
for platform_name, platform_config in platforms_dict.items():
|
|
83
|
+
if hasattr(platform_config, "enabled") and platform_config.enabled:
|
|
84
|
+
# Convert AttrDict to regular dict
|
|
85
|
+
platform_config_dict = (
|
|
86
|
+
dict(platform_config.config)
|
|
87
|
+
if hasattr(platform_config.config, "__dict__")
|
|
88
|
+
else platform_config.config
|
|
89
|
+
)
|
|
90
|
+
platform_settings = {
|
|
91
|
+
"enabled": True,
|
|
92
|
+
**platform_config_dict,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Special handling for JIRA - use credentials from top-level JIRA config
|
|
96
|
+
if platform_name == "jira" and hasattr(config, "jira"):
|
|
97
|
+
platform_settings["username"] = config.jira.access_user
|
|
98
|
+
platform_settings["api_token"] = config.jira.access_token
|
|
99
|
+
# Also ensure base_url matches if not set
|
|
100
|
+
if (
|
|
101
|
+
not platform_settings.get("base_url")
|
|
102
|
+
or platform_settings["base_url"] == "will_be_set_at_runtime"
|
|
103
|
+
):
|
|
104
|
+
platform_settings["base_url"] = config.jira.base_url
|
|
105
|
+
# Add cache directory for ticket caching (config file directory)
|
|
106
|
+
if hasattr(config, "cache") and hasattr(config.cache, "directory"):
|
|
107
|
+
platform_settings["cache_dir"] = config.cache.directory
|
|
108
|
+
# Debug output to check credentials
|
|
109
|
+
print(
|
|
110
|
+
f" 🔍 JIRA config: username={platform_settings['username']}, has_token={bool(platform_settings['api_token'])}, base_url={platform_settings['base_url']}, cache_dir={platform_settings.get('cache_dir', 'not_set')}"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
pm_config["pm_platforms"][platform_name] = platform_settings
|
|
114
|
+
|
|
115
|
+
# Debug output - show final PM config
|
|
116
|
+
print(
|
|
117
|
+
f" 🔍 Final PM config platforms: {list(pm_config.get('pm_platforms', {}).keys())}"
|
|
118
|
+
)
|
|
119
|
+
for plat_name, plat_config in pm_config.get("pm_platforms", {}).items():
|
|
120
|
+
print(
|
|
121
|
+
f" 🔍 {plat_name}: enabled={plat_config.get('enabled')}, has_username={bool(plat_config.get('username'))}, has_token={bool(plat_config.get('api_token'))}"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
self.pm_orchestrator = PMFrameworkOrchestrator(pm_config)
|
|
125
|
+
print(
|
|
126
|
+
f"📋 PM Framework initialized with {len(self.pm_orchestrator.get_active_platforms())} platforms"
|
|
62
127
|
)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
# Calculate PR metrics
|
|
66
|
-
if prs:
|
|
67
|
-
enrichment['pr_metrics'] = github.calculate_pr_metrics(prs)
|
|
68
|
-
|
|
128
|
+
|
|
69
129
|
except Exception as e:
|
|
70
|
-
print(f"
|
|
71
|
-
|
|
130
|
+
print(f"⚠️ Failed to initialize PM framework: {e}")
|
|
131
|
+
self.pm_orchestrator = None
|
|
132
|
+
|
|
133
|
+
def enrich_repository_data(
|
|
134
|
+
self, repo_config: Any, commits: list[dict[str, Any]], since: datetime
|
|
135
|
+
) -> dict[str, Any]:
|
|
136
|
+
"""Enrich repository data from all available integrations."""
|
|
137
|
+
enrichment: dict[str, Any] = {"prs": [], "issues": [], "pr_metrics": {}, "pm_data": {}}
|
|
138
|
+
|
|
139
|
+
# GitHub enrichment
|
|
140
|
+
if "github" in self.integrations and repo_config.github_repo:
|
|
141
|
+
github_integration = self.integrations["github"]
|
|
142
|
+
if isinstance(github_integration, GitHubIntegration):
|
|
143
|
+
try:
|
|
144
|
+
# Get PR data
|
|
145
|
+
prs = github_integration.enrich_repository_with_prs(
|
|
146
|
+
repo_config.github_repo, commits, since
|
|
147
|
+
)
|
|
148
|
+
enrichment["prs"] = prs
|
|
149
|
+
|
|
150
|
+
# Calculate PR metrics
|
|
151
|
+
if prs:
|
|
152
|
+
enrichment["pr_metrics"] = github_integration.calculate_pr_metrics(prs)
|
|
153
|
+
|
|
154
|
+
except Exception as e:
|
|
155
|
+
import traceback
|
|
156
|
+
|
|
157
|
+
print(f" ⚠️ GitHub enrichment failed: {e}")
|
|
158
|
+
print(f" Debug traceback: {traceback.format_exc()}")
|
|
159
|
+
|
|
72
160
|
# JIRA enrichment for story points
|
|
73
|
-
if
|
|
74
|
-
|
|
161
|
+
if "jira" in self.integrations:
|
|
162
|
+
jira_integration = self.integrations["jira"]
|
|
163
|
+
if isinstance(jira_integration, JIRAIntegration):
|
|
164
|
+
try:
|
|
165
|
+
# Enrich commits with JIRA story points
|
|
166
|
+
jira_integration.enrich_commits_with_jira_data(commits)
|
|
167
|
+
|
|
168
|
+
# Enrich PRs with JIRA story points
|
|
169
|
+
if enrichment["prs"]:
|
|
170
|
+
jira_integration.enrich_prs_with_jira_data(enrichment["prs"])
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
print(f" ⚠️ JIRA enrichment failed: {e}")
|
|
174
|
+
|
|
175
|
+
# PM Framework enrichment
|
|
176
|
+
if self.pm_orchestrator and self.pm_orchestrator.is_enabled():
|
|
75
177
|
try:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
178
|
+
print(" 📋 Collecting PM platform data...")
|
|
179
|
+
|
|
180
|
+
# Get all issues from PM platforms
|
|
181
|
+
pm_issues = self.pm_orchestrator.get_all_issues(since=since)
|
|
182
|
+
enrichment["pm_data"]["issues"] = pm_issues
|
|
183
|
+
|
|
184
|
+
# Correlate issues with commits
|
|
185
|
+
correlations = self.pm_orchestrator.correlate_issues_with_commits(
|
|
186
|
+
pm_issues, commits
|
|
187
|
+
)
|
|
188
|
+
enrichment["pm_data"]["correlations"] = correlations
|
|
189
|
+
|
|
190
|
+
# Calculate enhanced metrics
|
|
191
|
+
enhanced_metrics = self.pm_orchestrator.calculate_enhanced_metrics(
|
|
192
|
+
commits, enrichment["prs"], pm_issues, correlations
|
|
193
|
+
)
|
|
194
|
+
enrichment["pm_data"]["metrics"] = enhanced_metrics
|
|
195
|
+
|
|
196
|
+
# Only show correlations message if there are any correlations found
|
|
197
|
+
if correlations:
|
|
198
|
+
print(
|
|
199
|
+
f" ✅ PM correlations found: {len(correlations)} commits linked to issues"
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
print(" 📋 PM data processed (no correlations found)")
|
|
203
|
+
|
|
83
204
|
except Exception as e:
|
|
84
|
-
print(f" ⚠️
|
|
85
|
-
|
|
205
|
+
print(f" ⚠️ PM framework enrichment failed: {e}")
|
|
206
|
+
enrichment["pm_data"] = {"error": str(e)}
|
|
207
|
+
|
|
86
208
|
return enrichment
|
|
87
|
-
|
|
88
|
-
def get_platform_issues(self, project_key: str, since: datetime) ->
|
|
209
|
+
|
|
210
|
+
def get_platform_issues(self, project_key: str, since: datetime) -> list[dict[str, Any]]:
|
|
89
211
|
"""Get issues from all configured platforms."""
|
|
90
|
-
all_issues = []
|
|
91
|
-
|
|
212
|
+
all_issues: list[dict[str, Any]] = []
|
|
213
|
+
|
|
92
214
|
# Check cache first
|
|
93
215
|
cached_issues = []
|
|
94
|
-
for platform in [
|
|
216
|
+
for platform in ["github", "jira", "clickup", "linear"]:
|
|
95
217
|
cached = self.cache.get_cached_issues(platform, project_key)
|
|
96
218
|
cached_issues.extend(cached)
|
|
97
|
-
|
|
219
|
+
|
|
98
220
|
if cached_issues:
|
|
99
221
|
return cached_issues
|
|
100
|
-
|
|
222
|
+
|
|
101
223
|
# Future: Fetch from APIs if not cached
|
|
102
224
|
# This is where we'd add actual API calls to each platform
|
|
103
|
-
|
|
225
|
+
|
|
104
226
|
return all_issues
|
|
105
|
-
|
|
106
|
-
def export_to_json(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
227
|
+
|
|
228
|
+
def export_to_json(
|
|
229
|
+
self,
|
|
230
|
+
commits: list[dict[str, Any]],
|
|
231
|
+
prs: list[dict[str, Any]],
|
|
232
|
+
developer_stats: list[dict[str, Any]],
|
|
233
|
+
project_metrics: dict[str, Any],
|
|
234
|
+
dora_metrics: dict[str, Any],
|
|
235
|
+
output_path: str,
|
|
236
|
+
) -> str:
|
|
113
237
|
"""Export all data to JSON format for API consumption."""
|
|
114
|
-
|
|
238
|
+
|
|
115
239
|
# Prepare data for JSON serialization
|
|
116
|
-
def serialize_dates(obj):
|
|
240
|
+
def serialize_dates(obj: Any) -> Any:
|
|
117
241
|
"""Convert datetime objects to ISO format strings."""
|
|
118
242
|
if isinstance(obj, datetime):
|
|
119
243
|
return obj.isoformat()
|
|
@@ -122,24 +246,24 @@ class IntegrationOrchestrator:
|
|
|
122
246
|
elif isinstance(obj, list):
|
|
123
247
|
return [serialize_dates(item) for item in obj]
|
|
124
248
|
return obj
|
|
125
|
-
|
|
249
|
+
|
|
126
250
|
export_data = {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
251
|
+
"metadata": {
|
|
252
|
+
"generated_at": datetime.now().isoformat(),
|
|
253
|
+
"version": "1.0",
|
|
254
|
+
"total_commits": len(commits),
|
|
255
|
+
"total_prs": len(prs),
|
|
256
|
+
"total_developers": len(developer_stats),
|
|
133
257
|
},
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
258
|
+
"commits": serialize_dates(commits),
|
|
259
|
+
"pull_requests": serialize_dates(prs),
|
|
260
|
+
"developers": serialize_dates(developer_stats),
|
|
261
|
+
"project_metrics": serialize_dates(project_metrics),
|
|
262
|
+
"dora_metrics": serialize_dates(dora_metrics),
|
|
139
263
|
}
|
|
140
|
-
|
|
264
|
+
|
|
141
265
|
# Write JSON file
|
|
142
|
-
with open(output_path,
|
|
266
|
+
with open(output_path, "w") as f:
|
|
143
267
|
json.dump(export_data, f, indent=2)
|
|
144
|
-
|
|
145
|
-
return output_path
|
|
268
|
+
|
|
269
|
+
return output_path
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Developer activity scoring module using balanced metrics.
|
|
2
|
+
|
|
3
|
+
Based on research and best practices for measuring developer productivity in 2024,
|
|
4
|
+
this module implements a balanced scoring approach that considers:
|
|
5
|
+
- Commits (baseline activity)
|
|
6
|
+
- Pull Requests (collaboration and review)
|
|
7
|
+
- Lines of Code (impact, with diminishing returns)
|
|
8
|
+
- Code churn (deletions valued for refactoring)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import math
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ActivityScorer:
|
|
16
|
+
"""Calculate balanced developer activity scores based on multiple metrics."""
|
|
17
|
+
|
|
18
|
+
# Weights based on research indicating balanced approach
|
|
19
|
+
WEIGHTS = {
|
|
20
|
+
"commits": 0.25, # Each commit represents baseline effort
|
|
21
|
+
"prs": 0.30, # PRs indicate collaboration and review effort
|
|
22
|
+
"code_impact": 0.30, # Lines changed with diminishing returns
|
|
23
|
+
"complexity": 0.15, # File changes and complexity
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Scaling factors based on research
|
|
27
|
+
COMMIT_BASE_SCORE = 10 # Each commit worth base 10 points
|
|
28
|
+
PR_BASE_SCORE = 50 # Each PR worth base 50 points (5x commit)
|
|
29
|
+
OPTIMAL_PR_SIZE = 200 # Research shows PRs under 200 lines are optimal
|
|
30
|
+
|
|
31
|
+
def calculate_activity_score(self, metrics: dict[str, Any]) -> dict[str, Any]:
|
|
32
|
+
"""Calculate balanced activity score for a developer.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
metrics: Dictionary containing:
|
|
36
|
+
- commits: Number of commits
|
|
37
|
+
- prs_involved: Number of PRs
|
|
38
|
+
- lines_added: Lines added
|
|
39
|
+
- lines_removed: Lines removed
|
|
40
|
+
- files_changed: Number of files changed
|
|
41
|
+
- complexity_delta: Complexity change
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Dictionary with:
|
|
45
|
+
- raw_score: Unscaled activity score
|
|
46
|
+
- normalized_score: Score normalized to 0-100
|
|
47
|
+
- components: Breakdown of score components
|
|
48
|
+
"""
|
|
49
|
+
# Extract metrics with defaults
|
|
50
|
+
commits = metrics.get("commits", 0)
|
|
51
|
+
prs = metrics.get("prs_involved", 0)
|
|
52
|
+
lines_added = metrics.get("lines_added", 0)
|
|
53
|
+
lines_removed = metrics.get("lines_removed", 0)
|
|
54
|
+
files_changed = metrics.get(
|
|
55
|
+
"files_changed_count",
|
|
56
|
+
(
|
|
57
|
+
metrics.get("files_changed", 0)
|
|
58
|
+
if isinstance(metrics.get("files_changed"), int)
|
|
59
|
+
else len(metrics.get("files_changed", []))
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
complexity = metrics.get("complexity_delta", 0)
|
|
63
|
+
|
|
64
|
+
# Calculate component scores
|
|
65
|
+
commit_score = self._calculate_commit_score(commits)
|
|
66
|
+
pr_score = self._calculate_pr_score(prs, lines_added + lines_removed)
|
|
67
|
+
code_score = self._calculate_code_impact_score(lines_added, lines_removed)
|
|
68
|
+
complexity_score = self._calculate_complexity_score(files_changed, complexity)
|
|
69
|
+
|
|
70
|
+
# Weighted total
|
|
71
|
+
components = {
|
|
72
|
+
"commit_score": commit_score,
|
|
73
|
+
"pr_score": pr_score,
|
|
74
|
+
"code_impact_score": code_score,
|
|
75
|
+
"complexity_score": complexity_score,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
raw_score = (
|
|
79
|
+
commit_score * self.WEIGHTS["commits"]
|
|
80
|
+
+ pr_score * self.WEIGHTS["prs"]
|
|
81
|
+
+ code_score * self.WEIGHTS["code_impact"]
|
|
82
|
+
+ complexity_score * self.WEIGHTS["complexity"]
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
"raw_score": raw_score,
|
|
87
|
+
"normalized_score": self._normalize_score(raw_score),
|
|
88
|
+
"components": components,
|
|
89
|
+
"activity_level": self._get_activity_level(raw_score),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def _calculate_commit_score(self, commits: int) -> float:
|
|
93
|
+
"""Calculate score from commit count with diminishing returns."""
|
|
94
|
+
if commits == 0:
|
|
95
|
+
return 0
|
|
96
|
+
|
|
97
|
+
# Use logarithmic scaling for diminishing returns
|
|
98
|
+
# First 10 commits worth full value, then diminishing
|
|
99
|
+
if commits <= 10:
|
|
100
|
+
return commits * self.COMMIT_BASE_SCORE
|
|
101
|
+
else:
|
|
102
|
+
base = 10 * self.COMMIT_BASE_SCORE
|
|
103
|
+
extra = math.log10(commits - 9) * self.COMMIT_BASE_SCORE * 5
|
|
104
|
+
return base + extra
|
|
105
|
+
|
|
106
|
+
def _calculate_pr_score(self, prs: int, total_lines: int) -> float:
|
|
107
|
+
"""Calculate PR score considering optimal PR sizes."""
|
|
108
|
+
if prs == 0:
|
|
109
|
+
return 0
|
|
110
|
+
|
|
111
|
+
base_score = prs * self.PR_BASE_SCORE
|
|
112
|
+
|
|
113
|
+
# Bonus for maintaining optimal PR size
|
|
114
|
+
avg_pr_size = total_lines / prs if prs > 0 else 0
|
|
115
|
+
if avg_pr_size <= self.OPTIMAL_PR_SIZE:
|
|
116
|
+
size_bonus = 1.2 # 20% bonus for optimal size
|
|
117
|
+
else:
|
|
118
|
+
# Penalty for oversized PRs
|
|
119
|
+
size_bonus = max(0.7, 1 - (avg_pr_size - self.OPTIMAL_PR_SIZE) / 1000)
|
|
120
|
+
|
|
121
|
+
return base_score * size_bonus
|
|
122
|
+
|
|
123
|
+
def _calculate_code_impact_score(self, lines_added: int, lines_removed: int) -> float:
|
|
124
|
+
"""Calculate code impact score with balanced add/remove consideration and enhanced diminishing returns.
|
|
125
|
+
|
|
126
|
+
WHY: Massive single commits can unfairly inflate scores. This implementation
|
|
127
|
+
uses stronger diminishing returns to prevent score inflation from extremely
|
|
128
|
+
large commits while still rewarding meaningful contributions.
|
|
129
|
+
"""
|
|
130
|
+
# Research shows deletions are valuable (refactoring, cleanup)
|
|
131
|
+
# Weight deletions at 70% of additions
|
|
132
|
+
effective_lines = lines_added + (lines_removed * 0.7)
|
|
133
|
+
|
|
134
|
+
if effective_lines == 0:
|
|
135
|
+
return 0
|
|
136
|
+
|
|
137
|
+
# Logarithmic scaling to prevent gaming with massive changes
|
|
138
|
+
# First 500 lines worth full value
|
|
139
|
+
if effective_lines <= 500:
|
|
140
|
+
return effective_lines * 0.2
|
|
141
|
+
else:
|
|
142
|
+
base = 500 * 0.2
|
|
143
|
+
# Enhanced diminishing returns for massive commits
|
|
144
|
+
if effective_lines <= 2000:
|
|
145
|
+
extra = math.log10(effective_lines - 499) * 15 # Reduced multiplier
|
|
146
|
+
else:
|
|
147
|
+
# Very large commits get even more aggressive diminishing returns
|
|
148
|
+
medium_extra = math.log10(2000 - 499) * 15
|
|
149
|
+
large_extra = math.log10(effective_lines - 1999) * 8 # Much smaller multiplier
|
|
150
|
+
extra = medium_extra + large_extra
|
|
151
|
+
return base + extra
|
|
152
|
+
|
|
153
|
+
def _calculate_complexity_score(self, files_changed: int, complexity_delta: float) -> float:
|
|
154
|
+
"""Calculate score based on breadth and complexity of changes."""
|
|
155
|
+
if files_changed == 0:
|
|
156
|
+
return 0
|
|
157
|
+
|
|
158
|
+
# Base score from files touched (breadth of impact)
|
|
159
|
+
file_score = min(files_changed * 5, 50) # Cap at 50 points
|
|
160
|
+
|
|
161
|
+
# Complexity factor (can be negative for simplification)
|
|
162
|
+
# Reward simplification (negative complexity delta)
|
|
163
|
+
if complexity_delta < 0:
|
|
164
|
+
complexity_bonus = abs(complexity_delta) * 0.5 # Reward simplification
|
|
165
|
+
else:
|
|
166
|
+
complexity_bonus = -min(
|
|
167
|
+
complexity_delta * 0.2, 10
|
|
168
|
+
) # Small penalty for added complexity
|
|
169
|
+
|
|
170
|
+
return max(0, file_score + complexity_bonus)
|
|
171
|
+
|
|
172
|
+
def _normalize_score(self, raw_score: float) -> float:
|
|
173
|
+
"""Normalize score to 0-100 range."""
|
|
174
|
+
# Based on research, a highly productive week might have:
|
|
175
|
+
# - 15 commits (150 points after scaling)
|
|
176
|
+
# - 3 PRs of optimal size (180 points)
|
|
177
|
+
# - 1000 effective lines (120 points)
|
|
178
|
+
# - 20 files changed (50 points)
|
|
179
|
+
# Total: ~500 points = 100 normalized
|
|
180
|
+
|
|
181
|
+
normalized = (raw_score / 500) * 100
|
|
182
|
+
return min(100, normalized) # Cap at 100
|
|
183
|
+
|
|
184
|
+
def _get_activity_level(self, raw_score: float) -> str:
|
|
185
|
+
"""Categorize activity level based on score."""
|
|
186
|
+
normalized = self._normalize_score(raw_score)
|
|
187
|
+
|
|
188
|
+
if normalized >= 80:
|
|
189
|
+
return "exceptional"
|
|
190
|
+
elif normalized >= 60:
|
|
191
|
+
return "high"
|
|
192
|
+
elif normalized >= 40:
|
|
193
|
+
return "moderate"
|
|
194
|
+
elif normalized >= 20:
|
|
195
|
+
return "low"
|
|
196
|
+
else:
|
|
197
|
+
return "minimal"
|
|
198
|
+
|
|
199
|
+
def calculate_team_relative_score(
|
|
200
|
+
self, individual_score: float, team_scores: list[float]
|
|
201
|
+
) -> dict[str, Any]:
|
|
202
|
+
"""Calculate relative performance within team context.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
individual_score: Individual's raw activity score
|
|
206
|
+
team_scores: List of all team members' raw scores
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Dictionary with percentile and relative metrics
|
|
210
|
+
"""
|
|
211
|
+
if not team_scores:
|
|
212
|
+
return {"percentile": 50, "relative_score": 1.0, "team_position": "average"}
|
|
213
|
+
|
|
214
|
+
# Calculate percentile
|
|
215
|
+
scores_below = sum(1 for score in team_scores if score < individual_score)
|
|
216
|
+
percentile = (scores_below / len(team_scores)) * 100
|
|
217
|
+
|
|
218
|
+
# Calculate relative to team average
|
|
219
|
+
team_avg = sum(team_scores) / len(team_scores)
|
|
220
|
+
relative_score = individual_score / team_avg if team_avg > 0 else 1.0
|
|
221
|
+
|
|
222
|
+
# Determine position
|
|
223
|
+
if percentile >= 90:
|
|
224
|
+
position = "top_performer"
|
|
225
|
+
elif percentile >= 75:
|
|
226
|
+
position = "above_average"
|
|
227
|
+
elif percentile >= 25:
|
|
228
|
+
position = "average"
|
|
229
|
+
else:
|
|
230
|
+
position = "below_average"
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
"percentile": round(percentile, 1),
|
|
234
|
+
"relative_score": round(relative_score, 2),
|
|
235
|
+
"team_position": position,
|
|
236
|
+
"team_average": round(team_avg, 1),
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
def normalize_scores_on_curve(
|
|
240
|
+
self, developer_scores: dict[str, float], curve_mean: float = 50.0, curve_std: float = 15.0
|
|
241
|
+
) -> dict[str, dict[str, Any]]:
|
|
242
|
+
"""Normalize activity scores on a bell curve with quintile grouping.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
developer_scores: Dictionary mapping developer IDs to raw scores
|
|
246
|
+
curve_mean: Target mean for the normalized distribution (default: 50)
|
|
247
|
+
curve_std: Target standard deviation for the distribution (default: 15)
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Dictionary with normalized scores and quintile groupings
|
|
251
|
+
"""
|
|
252
|
+
if not developer_scores:
|
|
253
|
+
return {}
|
|
254
|
+
|
|
255
|
+
# Get all scores
|
|
256
|
+
scores = list(developer_scores.values())
|
|
257
|
+
|
|
258
|
+
# Calculate current statistics
|
|
259
|
+
current_mean = sum(scores) / len(scores)
|
|
260
|
+
variance = sum((x - current_mean) ** 2 for x in scores) / len(scores)
|
|
261
|
+
current_std = math.sqrt(variance) if variance > 0 else 1.0
|
|
262
|
+
|
|
263
|
+
# Normalize to bell curve
|
|
264
|
+
normalized_scores = {}
|
|
265
|
+
for dev_id, raw_score in developer_scores.items():
|
|
266
|
+
# Z-score normalization
|
|
267
|
+
z_score = (raw_score - current_mean) / current_std if current_std > 0 else 0
|
|
268
|
+
|
|
269
|
+
# Transform to target distribution
|
|
270
|
+
curved_score = curve_mean + (z_score * curve_std)
|
|
271
|
+
|
|
272
|
+
# Ensure scores stay in reasonable range (0-100)
|
|
273
|
+
curved_score = max(0, min(100, curved_score))
|
|
274
|
+
|
|
275
|
+
normalized_scores[dev_id] = curved_score
|
|
276
|
+
|
|
277
|
+
# Sort developers by normalized score for quintile assignment
|
|
278
|
+
sorted_devs = sorted(normalized_scores.items(), key=lambda x: x[1])
|
|
279
|
+
|
|
280
|
+
# Assign quintiles
|
|
281
|
+
results = {}
|
|
282
|
+
quintile_size = len(sorted_devs) / 5
|
|
283
|
+
|
|
284
|
+
for idx, (dev_id, curved_score) in enumerate(sorted_devs):
|
|
285
|
+
# Determine quintile (1-5)
|
|
286
|
+
quintile = min(5, int(idx / quintile_size) + 1)
|
|
287
|
+
|
|
288
|
+
# Determine activity level based on quintile
|
|
289
|
+
if quintile == 5:
|
|
290
|
+
activity_level = "exceptional"
|
|
291
|
+
level_description = "Top 20%"
|
|
292
|
+
elif quintile == 4:
|
|
293
|
+
activity_level = "high"
|
|
294
|
+
level_description = "60-80th percentile"
|
|
295
|
+
elif quintile == 3:
|
|
296
|
+
activity_level = "moderate"
|
|
297
|
+
level_description = "40-60th percentile"
|
|
298
|
+
elif quintile == 2:
|
|
299
|
+
activity_level = "low"
|
|
300
|
+
level_description = "20-40th percentile"
|
|
301
|
+
else: # quintile == 1
|
|
302
|
+
activity_level = "minimal"
|
|
303
|
+
level_description = "Bottom 20%"
|
|
304
|
+
|
|
305
|
+
# Calculate exact percentile
|
|
306
|
+
percentile = ((idx + 0.5) / len(sorted_devs)) * 100
|
|
307
|
+
|
|
308
|
+
results[dev_id] = {
|
|
309
|
+
"raw_score": developer_scores[dev_id],
|
|
310
|
+
"curved_score": round(curved_score, 1),
|
|
311
|
+
"quintile": quintile,
|
|
312
|
+
"activity_level": activity_level,
|
|
313
|
+
"level_description": level_description,
|
|
314
|
+
"percentile": round(percentile, 0),
|
|
315
|
+
"z_score": (
|
|
316
|
+
round((developer_scores[dev_id] - current_mean) / current_std, 2)
|
|
317
|
+
if current_std > 0
|
|
318
|
+
else 0
|
|
319
|
+
),
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return results
|