gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1144 @@
|
|
|
1
|
+
"""Story point correlation analysis for GitFlow Analytics.
|
|
2
|
+
|
|
3
|
+
This module provides comprehensive analysis of story point estimation accuracy and
|
|
4
|
+
correlation with actual development work metrics including commits, lines of code,
|
|
5
|
+
and time spent. It tracks velocity trends and generates actionable insights for
|
|
6
|
+
process improvement and team calibration.
|
|
7
|
+
|
|
8
|
+
WHY: Story point estimation is a critical part of agile development, but accuracy
|
|
9
|
+
varies significantly across teams and individuals. This analysis helps identify
|
|
10
|
+
which teams/developers have accurate estimates vs which need calibration training.
|
|
11
|
+
|
|
12
|
+
DESIGN DECISION: Week-based aggregation using Monday-Sunday boundaries to align
|
|
13
|
+
with sprint planning cycles and provide consistent reporting periods. All metrics
|
|
14
|
+
are calculated both at individual and team levels for targeted improvements.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
from datetime import datetime, timedelta, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, Optional
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
import pandas as pd
|
|
25
|
+
from scipy import stats
|
|
26
|
+
|
|
27
|
+
# Get logger for this module
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class StoryPointCorrelationAnalyzer:
|
|
32
|
+
"""Analyzes story point estimation accuracy and correlations with actual work."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, anonymize: bool = False, identity_resolver=None):
|
|
35
|
+
"""Initialize the correlation analyzer.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
anonymize: Whether to anonymize developer names in reports
|
|
39
|
+
identity_resolver: Identity resolver for canonical developer names
|
|
40
|
+
"""
|
|
41
|
+
self.anonymize = anonymize
|
|
42
|
+
self.identity_resolver = identity_resolver
|
|
43
|
+
self._anonymization_map: dict[str, str] = {}
|
|
44
|
+
self._anonymous_counter = 0
|
|
45
|
+
|
|
46
|
+
def calculate_weekly_correlations(
|
|
47
|
+
self,
|
|
48
|
+
commits: list[dict[str, Any]],
|
|
49
|
+
prs: list[dict[str, Any]],
|
|
50
|
+
pm_data: Optional[dict[str, Any]] = None,
|
|
51
|
+
weeks: int = 12
|
|
52
|
+
) -> dict[str, Any]:
|
|
53
|
+
"""Calculate weekly story point correlations with actual work metrics.
|
|
54
|
+
|
|
55
|
+
WHY: Weekly aggregation provides sprint-aligned analysis periods that match
|
|
56
|
+
typical development cycles, enabling actionable insights for sprint planning
|
|
57
|
+
and retrospectives.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
commits: List of commit data with story points and metrics
|
|
61
|
+
prs: List of pull request data with story points
|
|
62
|
+
pm_data: PM platform data with issue correlations
|
|
63
|
+
weeks: Number of weeks to analyze
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Dictionary containing weekly correlation metrics and analysis
|
|
67
|
+
"""
|
|
68
|
+
logger.debug(f"Starting weekly correlation analysis for {weeks} weeks")
|
|
69
|
+
|
|
70
|
+
# Calculate date range
|
|
71
|
+
end_date = datetime.now(timezone.utc)
|
|
72
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
73
|
+
|
|
74
|
+
logger.debug(f"Analysis period: {start_date} to {end_date}")
|
|
75
|
+
|
|
76
|
+
# Aggregate data by week and developer
|
|
77
|
+
weekly_metrics = self._aggregate_weekly_metrics(commits, prs, pm_data, start_date, end_date)
|
|
78
|
+
|
|
79
|
+
# Calculate correlations for each week
|
|
80
|
+
correlation_results = {}
|
|
81
|
+
|
|
82
|
+
for week_start, week_data in weekly_metrics.items():
|
|
83
|
+
week_correlations = self._calculate_week_correlations(week_data)
|
|
84
|
+
correlation_results[week_start] = week_correlations
|
|
85
|
+
|
|
86
|
+
logger.debug(f"Calculated correlations for {len(correlation_results)} weeks")
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
"weekly_correlations": correlation_results,
|
|
90
|
+
"summary_stats": self._calculate_correlation_summary(correlation_results),
|
|
91
|
+
"trend_analysis": self._analyze_correlation_trends(correlation_results),
|
|
92
|
+
"developer_accuracy": self._analyze_developer_accuracy(weekly_metrics),
|
|
93
|
+
"recommendations": self._generate_correlation_recommendations(correlation_results, weekly_metrics)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
def analyze_estimation_accuracy(
|
|
97
|
+
self,
|
|
98
|
+
commits: list[dict[str, Any]],
|
|
99
|
+
pm_data: Optional[dict[str, Any]] = None,
|
|
100
|
+
weeks: int = 12
|
|
101
|
+
) -> dict[str, Any]:
|
|
102
|
+
"""Analyze story point estimation accuracy by comparing estimated vs actual.
|
|
103
|
+
|
|
104
|
+
WHY: Estimation accuracy analysis helps identify systematic over/under-estimation
|
|
105
|
+
patterns and provides targeted feedback for improving planning accuracy.
|
|
106
|
+
|
|
107
|
+
DESIGN DECISION: Uses multiple accuracy metrics (absolute error, relative error,
|
|
108
|
+
accuracy percentage) to provide comprehensive view of estimation quality.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
commits: List of commit data with story points
|
|
112
|
+
pm_data: PM platform data with original story point estimates
|
|
113
|
+
weeks: Number of weeks to analyze
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Dictionary containing estimation accuracy analysis
|
|
117
|
+
"""
|
|
118
|
+
logger.debug("Starting estimation accuracy analysis")
|
|
119
|
+
|
|
120
|
+
if not pm_data or "correlations" not in pm_data:
|
|
121
|
+
logger.warning("No PM data available for estimation accuracy analysis")
|
|
122
|
+
return self._empty_accuracy_analysis()
|
|
123
|
+
|
|
124
|
+
# Extract estimation vs actual pairs
|
|
125
|
+
estimation_pairs = self._extract_estimation_pairs(commits, pm_data, weeks)
|
|
126
|
+
|
|
127
|
+
if not estimation_pairs:
|
|
128
|
+
logger.warning("No estimation pairs found for accuracy analysis")
|
|
129
|
+
return self._empty_accuracy_analysis()
|
|
130
|
+
|
|
131
|
+
# Calculate accuracy metrics
|
|
132
|
+
accuracy_metrics = self._calculate_accuracy_metrics(estimation_pairs)
|
|
133
|
+
|
|
134
|
+
# Analyze by developer
|
|
135
|
+
developer_accuracy = self._analyze_developer_estimation_accuracy(estimation_pairs)
|
|
136
|
+
|
|
137
|
+
# Analyze by story point size
|
|
138
|
+
size_accuracy = self._analyze_size_based_accuracy(estimation_pairs)
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
"overall_accuracy": accuracy_metrics,
|
|
142
|
+
"developer_accuracy": developer_accuracy,
|
|
143
|
+
"size_based_accuracy": size_accuracy,
|
|
144
|
+
"improvement_suggestions": self._generate_accuracy_recommendations(
|
|
145
|
+
accuracy_metrics, developer_accuracy, size_accuracy
|
|
146
|
+
)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
def calculate_velocity_metrics(
|
|
150
|
+
self,
|
|
151
|
+
commits: list[dict[str, Any]],
|
|
152
|
+
prs: list[dict[str, Any]],
|
|
153
|
+
pm_data: Optional[dict[str, Any]] = None,
|
|
154
|
+
weeks: int = 12
|
|
155
|
+
) -> dict[str, Any]:
|
|
156
|
+
"""Calculate velocity trends and patterns over time.
|
|
157
|
+
|
|
158
|
+
WHY: Velocity analysis helps track team productivity over time and identify
|
|
159
|
+
factors that impact delivery speed, enabling better sprint planning and
|
|
160
|
+
capacity management.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
commits: List of commit data with story points
|
|
164
|
+
prs: List of pull request data
|
|
165
|
+
pm_data: PM platform data for additional context
|
|
166
|
+
weeks: Number of weeks to analyze
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dictionary containing velocity metrics and trends
|
|
170
|
+
"""
|
|
171
|
+
logger.debug(f"Calculating velocity metrics for {weeks} weeks")
|
|
172
|
+
|
|
173
|
+
# Calculate date range
|
|
174
|
+
end_date = datetime.now(timezone.utc)
|
|
175
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
176
|
+
|
|
177
|
+
# Aggregate velocity data by week
|
|
178
|
+
weekly_velocity = self._aggregate_weekly_velocity(commits, prs, start_date, end_date)
|
|
179
|
+
|
|
180
|
+
# Calculate velocity trends
|
|
181
|
+
velocity_trends = self._calculate_velocity_trends(weekly_velocity)
|
|
182
|
+
|
|
183
|
+
# Analyze velocity by developer
|
|
184
|
+
developer_velocity = self._analyze_developer_velocity(commits, start_date, end_date)
|
|
185
|
+
|
|
186
|
+
# Calculate predictability metrics
|
|
187
|
+
predictability = self._calculate_velocity_predictability(weekly_velocity)
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"weekly_velocity": weekly_velocity,
|
|
191
|
+
"trends": velocity_trends,
|
|
192
|
+
"developer_velocity": developer_velocity,
|
|
193
|
+
"predictability": predictability,
|
|
194
|
+
"capacity_analysis": self._analyze_team_capacity(weekly_velocity, developer_velocity)
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
def generate_correlation_report(
|
|
198
|
+
self,
|
|
199
|
+
commits: list[dict[str, Any]],
|
|
200
|
+
prs: list[dict[str, Any]],
|
|
201
|
+
pm_data: Optional[dict[str, Any]],
|
|
202
|
+
output_path: Path,
|
|
203
|
+
weeks: int = 12
|
|
204
|
+
) -> Path:
|
|
205
|
+
"""Generate comprehensive CSV report with story point correlation metrics.
|
|
206
|
+
|
|
207
|
+
WHY: CSV format enables easy import into spreadsheet tools for additional
|
|
208
|
+
analysis and sharing with stakeholders who need detailed correlation data.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
commits: List of commit data with story points
|
|
212
|
+
prs: List of pull request data
|
|
213
|
+
pm_data: PM platform data with correlations
|
|
214
|
+
output_path: Path for the output CSV file
|
|
215
|
+
weeks: Number of weeks to analyze
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Path to the generated CSV report
|
|
219
|
+
"""
|
|
220
|
+
logger.debug(f"Generating story point correlation report: {output_path}")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
# Calculate all correlation metrics
|
|
224
|
+
weekly_correlations = self.calculate_weekly_correlations(commits, prs, pm_data, weeks)
|
|
225
|
+
estimation_accuracy = self.analyze_estimation_accuracy(commits, pm_data, weeks)
|
|
226
|
+
velocity_metrics = self.calculate_velocity_metrics(commits, prs, pm_data, weeks)
|
|
227
|
+
|
|
228
|
+
# Build CSV rows
|
|
229
|
+
rows = self._build_correlation_csv_rows(
|
|
230
|
+
weekly_correlations, estimation_accuracy, velocity_metrics
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
if rows:
|
|
234
|
+
df = pd.DataFrame(rows)
|
|
235
|
+
df.to_csv(output_path, index=False)
|
|
236
|
+
logger.debug(f"Generated correlation report with {len(rows)} rows")
|
|
237
|
+
else:
|
|
238
|
+
# Write empty CSV with headers
|
|
239
|
+
self._write_empty_correlation_csv(output_path)
|
|
240
|
+
logger.debug("Generated empty correlation report (no data)")
|
|
241
|
+
|
|
242
|
+
return output_path
|
|
243
|
+
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.error(f"Error generating story point correlation report: {e}")
|
|
246
|
+
# Still create empty report file
|
|
247
|
+
self._write_empty_correlation_csv(output_path)
|
|
248
|
+
raise
|
|
249
|
+
|
|
250
|
+
def _aggregate_weekly_metrics(
|
|
251
|
+
self,
|
|
252
|
+
commits: list[dict[str, Any]],
|
|
253
|
+
prs: list[dict[str, Any]],
|
|
254
|
+
pm_data: Optional[dict[str, Any]],
|
|
255
|
+
start_date: datetime,
|
|
256
|
+
end_date: datetime
|
|
257
|
+
) -> dict[datetime, dict[str, dict[str, Any]]]:
|
|
258
|
+
"""Aggregate metrics by week and developer for correlation analysis."""
|
|
259
|
+
weekly_metrics = defaultdict(lambda: defaultdict(lambda: {
|
|
260
|
+
"story_points": 0,
|
|
261
|
+
"commits": 0,
|
|
262
|
+
"lines_added": 0,
|
|
263
|
+
"lines_removed": 0,
|
|
264
|
+
"files_changed": 0,
|
|
265
|
+
"prs": 0,
|
|
266
|
+
"complexity_delta": 0.0,
|
|
267
|
+
"time_spent_hours": 0.0, # Estimated from commit frequency
|
|
268
|
+
"estimated_story_points": 0, # From PM platform
|
|
269
|
+
"actual_story_points": 0 # From commits
|
|
270
|
+
}))
|
|
271
|
+
|
|
272
|
+
# Process commits
|
|
273
|
+
for commit in commits:
|
|
274
|
+
timestamp = self._ensure_timezone_aware(commit.get("timestamp"))
|
|
275
|
+
if not timestamp or timestamp < start_date or timestamp > end_date:
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
week_start = self._get_week_start(timestamp)
|
|
279
|
+
developer_id = commit.get("canonical_id", commit.get("author_email", "unknown"))
|
|
280
|
+
|
|
281
|
+
metrics = weekly_metrics[week_start][developer_id]
|
|
282
|
+
|
|
283
|
+
# Aggregate commit metrics
|
|
284
|
+
metrics["commits"] += 1
|
|
285
|
+
metrics["story_points"] += commit.get("story_points", 0) or 0
|
|
286
|
+
metrics["actual_story_points"] += commit.get("story_points", 0) or 0
|
|
287
|
+
metrics["lines_added"] += commit.get("insertions", 0) or 0
|
|
288
|
+
metrics["lines_removed"] += commit.get("deletions", 0) or 0
|
|
289
|
+
metrics["files_changed"] += commit.get("files_changed", 0) or 0
|
|
290
|
+
metrics["complexity_delta"] += commit.get("complexity_delta", 0.0) or 0.0
|
|
291
|
+
|
|
292
|
+
# Process PRs
|
|
293
|
+
for pr in prs:
|
|
294
|
+
created_at = self._ensure_timezone_aware(pr.get("created_at"))
|
|
295
|
+
if not created_at or created_at < start_date or created_at > end_date:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
week_start = self._get_week_start(created_at)
|
|
299
|
+
developer_id = pr.get("canonical_id", pr.get("author", "unknown"))
|
|
300
|
+
|
|
301
|
+
if developer_id in weekly_metrics[week_start]:
|
|
302
|
+
weekly_metrics[week_start][developer_id]["prs"] += 1
|
|
303
|
+
|
|
304
|
+
# Add PM platform data if available
|
|
305
|
+
if pm_data and "correlations" in pm_data:
|
|
306
|
+
for correlation in pm_data["correlations"]:
|
|
307
|
+
commit_date = correlation.get("commit_date")
|
|
308
|
+
if not commit_date:
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
timestamp = self._ensure_timezone_aware(
|
|
312
|
+
datetime.fromisoformat(commit_date.replace("Z", "+00:00"))
|
|
313
|
+
if isinstance(commit_date, str) else commit_date
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
if timestamp < start_date or timestamp > end_date:
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
week_start = self._get_week_start(timestamp)
|
|
320
|
+
developer_id = correlation.get("commit_author", "unknown")
|
|
321
|
+
|
|
322
|
+
if developer_id in weekly_metrics[week_start]:
|
|
323
|
+
estimated_sp = correlation.get("story_points", 0) or 0
|
|
324
|
+
weekly_metrics[week_start][developer_id]["estimated_story_points"] += estimated_sp
|
|
325
|
+
|
|
326
|
+
# Convert defaultdicts to regular dicts for JSON serialization
|
|
327
|
+
return {week: dict(developers) for week, developers in weekly_metrics.items()}
|
|
328
|
+
|
|
329
|
+
def _calculate_week_correlations(self, week_data: dict[str, dict[str, Any]]) -> dict[str, Any]:
|
|
330
|
+
"""Calculate correlations for a single week's data."""
|
|
331
|
+
if len(week_data) < 2:
|
|
332
|
+
return self._empty_week_correlations()
|
|
333
|
+
|
|
334
|
+
# Extract parallel arrays for correlation calculation
|
|
335
|
+
developers = []
|
|
336
|
+
story_points = []
|
|
337
|
+
commits = []
|
|
338
|
+
lines_changed = []
|
|
339
|
+
files_changed = []
|
|
340
|
+
prs = []
|
|
341
|
+
complexity = []
|
|
342
|
+
|
|
343
|
+
for dev_id, metrics in week_data.items():
|
|
344
|
+
developers.append(dev_id)
|
|
345
|
+
story_points.append(metrics["story_points"])
|
|
346
|
+
commits.append(metrics["commits"])
|
|
347
|
+
lines_changed.append(metrics["lines_added"] + metrics["lines_removed"])
|
|
348
|
+
files_changed.append(metrics["files_changed"])
|
|
349
|
+
prs.append(metrics["prs"])
|
|
350
|
+
complexity.append(metrics["complexity_delta"])
|
|
351
|
+
|
|
352
|
+
# Calculate correlations using scipy.stats
|
|
353
|
+
correlations = {}
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
# Check if we have enough data points and variance for meaningful correlations
|
|
357
|
+
if len(story_points) < 2:
|
|
358
|
+
logger.debug(f"Insufficient data for correlation: only {len(story_points)} data points")
|
|
359
|
+
correlations = {k: 0.0 for k in ["sp_commits", "sp_lines", "sp_files", "sp_prs", "sp_complexity"]}
|
|
360
|
+
elif np.std(story_points) == 0:
|
|
361
|
+
logger.debug("All story points are the same value - no variance for correlation")
|
|
362
|
+
correlations = {k: 0.0 for k in ["sp_commits", "sp_lines", "sp_files", "sp_prs", "sp_complexity"]}
|
|
363
|
+
else:
|
|
364
|
+
# Calculate correlations only when we have sufficient variance
|
|
365
|
+
correlations["sp_commits"] = float(stats.pearsonr(story_points, commits)[0])
|
|
366
|
+
correlations["sp_lines"] = float(stats.pearsonr(story_points, lines_changed)[0])
|
|
367
|
+
correlations["sp_files"] = float(stats.pearsonr(story_points, files_changed)[0])
|
|
368
|
+
correlations["sp_prs"] = float(stats.pearsonr(story_points, prs)[0])
|
|
369
|
+
correlations["sp_complexity"] = float(stats.pearsonr(story_points, complexity)[0])
|
|
370
|
+
logger.debug(f"Calculated correlations with {len(story_points)} data points")
|
|
371
|
+
|
|
372
|
+
except Exception as e:
|
|
373
|
+
logger.warning(f"Error calculating correlations: {e}")
|
|
374
|
+
correlations = {k: 0.0 for k in ["sp_commits", "sp_lines", "sp_files", "sp_prs", "sp_complexity"]}
|
|
375
|
+
|
|
376
|
+
return {
|
|
377
|
+
"correlations": correlations,
|
|
378
|
+
"sample_size": len(developers),
|
|
379
|
+
"total_story_points": sum(story_points),
|
|
380
|
+
"total_commits": sum(commits),
|
|
381
|
+
"total_lines_changed": sum(lines_changed)
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
def _calculate_correlation_summary(self, correlation_results: dict[datetime, dict[str, Any]]) -> dict[str, Any]:
|
|
385
|
+
"""Calculate summary statistics across all weeks."""
|
|
386
|
+
if not correlation_results:
|
|
387
|
+
return {"avg_correlations": {}, "trend_direction": "stable", "strength": "weak"}
|
|
388
|
+
|
|
389
|
+
# Aggregate correlations across weeks
|
|
390
|
+
all_correlations = defaultdict(list)
|
|
391
|
+
|
|
392
|
+
for week_data in correlation_results.values():
|
|
393
|
+
correlations = week_data.get("correlations", {})
|
|
394
|
+
for metric, value in correlations.items():
|
|
395
|
+
if not np.isnan(value): # Filter out NaN values
|
|
396
|
+
all_correlations[metric].append(value)
|
|
397
|
+
|
|
398
|
+
# Calculate averages
|
|
399
|
+
avg_correlations = {}
|
|
400
|
+
for metric, values in all_correlations.items():
|
|
401
|
+
if values:
|
|
402
|
+
avg_correlations[metric] = float(np.mean(values))
|
|
403
|
+
else:
|
|
404
|
+
avg_correlations[metric] = 0.0
|
|
405
|
+
|
|
406
|
+
# Determine overall correlation strength
|
|
407
|
+
avg_strength = np.mean(list(avg_correlations.values()))
|
|
408
|
+
if avg_strength > 0.7:
|
|
409
|
+
strength = "strong"
|
|
410
|
+
elif avg_strength > 0.4:
|
|
411
|
+
strength = "moderate"
|
|
412
|
+
else:
|
|
413
|
+
strength = "weak"
|
|
414
|
+
|
|
415
|
+
return {
|
|
416
|
+
"avg_correlations": avg_correlations,
|
|
417
|
+
"strength": strength,
|
|
418
|
+
"weeks_analyzed": len(correlation_results),
|
|
419
|
+
"max_correlation": max(avg_correlations.values()) if avg_correlations else 0.0,
|
|
420
|
+
"min_correlation": min(avg_correlations.values()) if avg_correlations else 0.0
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
def _analyze_correlation_trends(self, correlation_results: dict[datetime, dict[str, Any]]) -> dict[str, Any]:
|
|
424
|
+
"""Analyze trends in correlations over time."""
|
|
425
|
+
if len(correlation_results) < 3:
|
|
426
|
+
return {"trend_direction": "insufficient_data", "trend_strength": 0.0}
|
|
427
|
+
|
|
428
|
+
# Sort by week for trend analysis
|
|
429
|
+
sorted_weeks = sorted(correlation_results.keys())
|
|
430
|
+
|
|
431
|
+
# Calculate trend for each correlation metric
|
|
432
|
+
trends = {}
|
|
433
|
+
|
|
434
|
+
for metric in ["sp_commits", "sp_lines", "sp_files", "sp_prs", "sp_complexity"]:
|
|
435
|
+
values = []
|
|
436
|
+
weeks = []
|
|
437
|
+
|
|
438
|
+
for week in sorted_weeks:
|
|
439
|
+
week_correlations = correlation_results[week].get("correlations", {})
|
|
440
|
+
if metric in week_correlations and not np.isnan(week_correlations[metric]):
|
|
441
|
+
values.append(week_correlations[metric])
|
|
442
|
+
weeks.append(len(weeks)) # Use index as x-value
|
|
443
|
+
|
|
444
|
+
if len(values) >= 3: # Need at least 3 points for trend
|
|
445
|
+
slope, intercept, r_value, p_value, std_err = stats.linregress(weeks, values)
|
|
446
|
+
trends[metric] = {
|
|
447
|
+
"slope": float(slope),
|
|
448
|
+
"r_squared": float(r_value ** 2),
|
|
449
|
+
"p_value": float(p_value),
|
|
450
|
+
"direction": "improving" if slope > 0.01 else "declining" if slope < -0.01 else "stable"
|
|
451
|
+
}
|
|
452
|
+
else:
|
|
453
|
+
trends[metric] = {"slope": 0.0, "direction": "insufficient_data"}
|
|
454
|
+
|
|
455
|
+
return trends
|
|
456
|
+
|
|
457
|
+
def _analyze_developer_accuracy(self, weekly_metrics: dict[datetime, dict[str, dict[str, Any]]]) -> dict[str, Any]:
|
|
458
|
+
"""Analyze story point estimation accuracy by developer."""
|
|
459
|
+
developer_totals = defaultdict(lambda: {
|
|
460
|
+
"estimated_total": 0,
|
|
461
|
+
"actual_total": 0,
|
|
462
|
+
"weeks_active": 0,
|
|
463
|
+
"accuracy_scores": []
|
|
464
|
+
})
|
|
465
|
+
|
|
466
|
+
for week_data in weekly_metrics.values():
|
|
467
|
+
for dev_id, metrics in week_data.items():
|
|
468
|
+
estimated = metrics.get("estimated_story_points", 0)
|
|
469
|
+
actual = metrics.get("actual_story_points", 0)
|
|
470
|
+
|
|
471
|
+
if estimated > 0 or actual > 0: # Developer was active
|
|
472
|
+
dev_stats = developer_totals[dev_id]
|
|
473
|
+
dev_stats["estimated_total"] += estimated
|
|
474
|
+
dev_stats["actual_total"] += actual
|
|
475
|
+
dev_stats["weeks_active"] += 1
|
|
476
|
+
|
|
477
|
+
# Calculate weekly accuracy if both values exist
|
|
478
|
+
if estimated > 0 and actual > 0:
|
|
479
|
+
accuracy = 1.0 - abs(estimated - actual) / max(estimated, actual)
|
|
480
|
+
dev_stats["accuracy_scores"].append(accuracy)
|
|
481
|
+
|
|
482
|
+
# Calculate final accuracy metrics for each developer
|
|
483
|
+
developer_accuracy = {}
|
|
484
|
+
|
|
485
|
+
for dev_id, dev_stats in developer_totals.items():
|
|
486
|
+
if dev_stats["weeks_active"] > 0:
|
|
487
|
+
# Overall accuracy based on totals
|
|
488
|
+
if dev_stats["estimated_total"] > 0 and dev_stats["actual_total"] > 0:
|
|
489
|
+
overall_accuracy = 1.0 - abs(dev_stats["estimated_total"] - dev_stats["actual_total"]) / max(dev_stats["estimated_total"], dev_stats["actual_total"])
|
|
490
|
+
else:
|
|
491
|
+
overall_accuracy = 0.0
|
|
492
|
+
|
|
493
|
+
# Average weekly accuracy
|
|
494
|
+
if dev_stats["accuracy_scores"]:
|
|
495
|
+
avg_weekly_accuracy = float(np.mean(dev_stats["accuracy_scores"]))
|
|
496
|
+
consistency = 1.0 - float(np.std(dev_stats["accuracy_scores"]))
|
|
497
|
+
else:
|
|
498
|
+
avg_weekly_accuracy = 0.0
|
|
499
|
+
consistency = 0.0
|
|
500
|
+
|
|
501
|
+
developer_accuracy[self._anonymize_value(dev_id, "name")] = {
|
|
502
|
+
"overall_accuracy": float(overall_accuracy),
|
|
503
|
+
"avg_weekly_accuracy": avg_weekly_accuracy,
|
|
504
|
+
"consistency": consistency,
|
|
505
|
+
"weeks_active": dev_stats["weeks_active"],
|
|
506
|
+
"total_estimated": dev_stats["estimated_total"],
|
|
507
|
+
"total_actual": dev_stats["actual_total"],
|
|
508
|
+
"estimation_ratio": dev_stats["actual_total"] / max(dev_stats["estimated_total"], 1)
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
return developer_accuracy
|
|
512
|
+
|
|
513
|
+
def _generate_correlation_recommendations(
|
|
514
|
+
self, correlation_results: dict[datetime, dict[str, Any]], weekly_metrics: dict[datetime, dict[str, dict[str, Any]]]
|
|
515
|
+
) -> list[dict[str, str]]:
|
|
516
|
+
"""Generate actionable recommendations based on correlation analysis."""
|
|
517
|
+
recommendations = []
|
|
518
|
+
|
|
519
|
+
summary = self._calculate_correlation_summary(correlation_results)
|
|
520
|
+
avg_correlations = summary.get("avg_correlations", {})
|
|
521
|
+
|
|
522
|
+
# Check story points to commits correlation
|
|
523
|
+
sp_commits_corr = avg_correlations.get("sp_commits", 0)
|
|
524
|
+
if sp_commits_corr < 0.3:
|
|
525
|
+
recommendations.append({
|
|
526
|
+
"type": "process_improvement",
|
|
527
|
+
"priority": "high",
|
|
528
|
+
"title": "Weak Story Points to Commits Correlation",
|
|
529
|
+
"description": f"Story points show weak correlation with commit count ({sp_commits_corr:.2f}). Consider story point training or breaking down large stories.",
|
|
530
|
+
"action": "Review story point estimation guidelines and provide team training"
|
|
531
|
+
})
|
|
532
|
+
|
|
533
|
+
# Check story points to lines of code correlation
|
|
534
|
+
sp_lines_corr = avg_correlations.get("sp_lines", 0)
|
|
535
|
+
if sp_lines_corr < 0.4:
|
|
536
|
+
recommendations.append({
|
|
537
|
+
"type": "estimation_calibration",
|
|
538
|
+
"priority": "medium",
|
|
539
|
+
"title": "Story Points Don't Correlate with Code Changes",
|
|
540
|
+
"description": f"Story points show weak correlation with lines of code changed ({sp_lines_corr:.2f}). This may indicate estimation inconsistency.",
|
|
541
|
+
"action": "Analyze whether story points reflect complexity vs. effort, and align team understanding"
|
|
542
|
+
})
|
|
543
|
+
|
|
544
|
+
# Analyze developer accuracy
|
|
545
|
+
developer_accuracy = self._analyze_developer_accuracy(weekly_metrics)
|
|
546
|
+
low_accuracy_devs = [
|
|
547
|
+
dev for dev, stats in developer_accuracy.items()
|
|
548
|
+
if stats["overall_accuracy"] < 0.5 and stats["weeks_active"] >= 2
|
|
549
|
+
]
|
|
550
|
+
|
|
551
|
+
if low_accuracy_devs:
|
|
552
|
+
recommendations.append({
|
|
553
|
+
"type": "individual_coaching",
|
|
554
|
+
"priority": "medium",
|
|
555
|
+
"title": "Developers Need Estimation Training",
|
|
556
|
+
"description": f"{len(low_accuracy_devs)} developers have low estimation accuracy. Consider individual coaching sessions.",
|
|
557
|
+
"action": f"Provide estimation training for: {', '.join(low_accuracy_devs[:3])}"
|
|
558
|
+
})
|
|
559
|
+
|
|
560
|
+
# Check overall correlation strength
|
|
561
|
+
if summary.get("strength") == "weak":
|
|
562
|
+
recommendations.append({
|
|
563
|
+
"type": "process_review",
|
|
564
|
+
"priority": "high",
|
|
565
|
+
"title": "Overall Weak Correlations",
|
|
566
|
+
"description": "Story points show weak correlations across all work metrics. The estimation process may need fundamental review.",
|
|
567
|
+
"action": "Conduct team retrospective on story point estimation process and consider alternative estimation methods"
|
|
568
|
+
})
|
|
569
|
+
|
|
570
|
+
return recommendations
|
|
571
|
+
|
|
572
|
+
def _extract_estimation_pairs(
|
|
573
|
+
self, commits: list[dict[str, Any]], pm_data: dict[str, Any], weeks: int
|
|
574
|
+
) -> list[tuple[int, int, str]]:
|
|
575
|
+
"""Extract (estimated, actual, developer) pairs for accuracy analysis."""
|
|
576
|
+
pairs = []
|
|
577
|
+
|
|
578
|
+
if not pm_data or "correlations" not in pm_data:
|
|
579
|
+
return pairs
|
|
580
|
+
|
|
581
|
+
# Calculate date range
|
|
582
|
+
end_date = datetime.now(timezone.utc)
|
|
583
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
584
|
+
|
|
585
|
+
for correlation in pm_data["correlations"]:
|
|
586
|
+
commit_date = correlation.get("commit_date")
|
|
587
|
+
if not commit_date:
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
timestamp = self._ensure_timezone_aware(
|
|
591
|
+
datetime.fromisoformat(commit_date.replace("Z", "+00:00"))
|
|
592
|
+
if isinstance(commit_date, str) else commit_date
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
if timestamp < start_date or timestamp > end_date:
|
|
596
|
+
continue
|
|
597
|
+
|
|
598
|
+
estimated_sp = correlation.get("story_points", 0) or 0
|
|
599
|
+
commit_hash = correlation.get("commit_hash", "")
|
|
600
|
+
developer = correlation.get("commit_author", "unknown")
|
|
601
|
+
|
|
602
|
+
# Find matching commit for actual story points
|
|
603
|
+
matching_commit = next(
|
|
604
|
+
(c for c in commits if c.get("hash", "") == commit_hash), None
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
if matching_commit:
|
|
608
|
+
actual_sp = matching_commit.get("story_points", 0) or 0
|
|
609
|
+
if estimated_sp > 0 and actual_sp > 0: # Valid pair
|
|
610
|
+
pairs.append((estimated_sp, actual_sp, developer))
|
|
611
|
+
|
|
612
|
+
return pairs
|
|
613
|
+
|
|
614
|
+
def _calculate_accuracy_metrics(self, estimation_pairs: list[tuple[int, int, str]]) -> dict[str, Any]:
|
|
615
|
+
"""Calculate overall estimation accuracy metrics."""
|
|
616
|
+
if not estimation_pairs:
|
|
617
|
+
return {"mean_absolute_error": 0, "mean_relative_error": 0, "accuracy_percentage": 0}
|
|
618
|
+
|
|
619
|
+
estimated_values = [pair[0] for pair in estimation_pairs]
|
|
620
|
+
actual_values = [pair[1] for pair in estimation_pairs]
|
|
621
|
+
|
|
622
|
+
# Mean Absolute Error
|
|
623
|
+
mae = float(np.mean([abs(est - act) for est, act in zip(estimated_values, actual_values)]))
|
|
624
|
+
|
|
625
|
+
# Mean Relative Error (as percentage)
|
|
626
|
+
relative_errors = [
|
|
627
|
+
abs(est - act) / max(est, act) * 100
|
|
628
|
+
for est, act in zip(estimated_values, actual_values)
|
|
629
|
+
if max(est, act) > 0
|
|
630
|
+
]
|
|
631
|
+
mre = float(np.mean(relative_errors)) if relative_errors else 0
|
|
632
|
+
|
|
633
|
+
# Accuracy percentage (within 20% tolerance)
|
|
634
|
+
accurate_estimates = sum(
|
|
635
|
+
1 for est, act in zip(estimated_values, actual_values)
|
|
636
|
+
if abs(est - act) / max(est, act) <= 0.2
|
|
637
|
+
)
|
|
638
|
+
accuracy_percentage = (accurate_estimates / len(estimation_pairs)) * 100 if estimation_pairs else 0
|
|
639
|
+
|
|
640
|
+
return {
|
|
641
|
+
"mean_absolute_error": mae,
|
|
642
|
+
"mean_relative_error": mre,
|
|
643
|
+
"accuracy_percentage": float(accuracy_percentage),
|
|
644
|
+
"total_comparisons": len(estimation_pairs),
|
|
645
|
+
"correlation_coefficient": float(stats.pearsonr(estimated_values, actual_values)[0]) if len(estimation_pairs) > 1 else 0
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
def _analyze_developer_estimation_accuracy(self, estimation_pairs: list[tuple[int, int, str]]) -> dict[str, dict[str, Any]]:
|
|
649
|
+
"""Analyze estimation accuracy by individual developer."""
|
|
650
|
+
developer_pairs = defaultdict(list)
|
|
651
|
+
|
|
652
|
+
for estimated, actual, developer in estimation_pairs:
|
|
653
|
+
developer_pairs[developer].append((estimated, actual))
|
|
654
|
+
|
|
655
|
+
developer_accuracy = {}
|
|
656
|
+
|
|
657
|
+
for developer, pairs in developer_pairs.items():
|
|
658
|
+
if len(pairs) >= 2: # Need multiple estimates for meaningful analysis
|
|
659
|
+
estimated_values = [pair[0] for pair in pairs]
|
|
660
|
+
actual_values = [pair[1] for pair in pairs]
|
|
661
|
+
|
|
662
|
+
# Calculate metrics for this developer
|
|
663
|
+
mae = float(np.mean([abs(est - act) for est, act in zip(estimated_values, actual_values)]))
|
|
664
|
+
|
|
665
|
+
relative_errors = [
|
|
666
|
+
abs(est - act) / max(est, act) * 100
|
|
667
|
+
for est, act in zip(estimated_values, actual_values)
|
|
668
|
+
]
|
|
669
|
+
mre = float(np.mean(relative_errors))
|
|
670
|
+
|
|
671
|
+
accurate_count = sum(
|
|
672
|
+
1 for est, act in zip(estimated_values, actual_values)
|
|
673
|
+
if abs(est - act) / max(est, act) <= 0.2
|
|
674
|
+
)
|
|
675
|
+
accuracy_pct = (accurate_count / len(pairs)) * 100
|
|
676
|
+
|
|
677
|
+
developer_accuracy[self._anonymize_value(developer, "name")] = {
|
|
678
|
+
"mean_absolute_error": mae,
|
|
679
|
+
"mean_relative_error": mre,
|
|
680
|
+
"accuracy_percentage": float(accuracy_pct),
|
|
681
|
+
"estimates_count": len(pairs),
|
|
682
|
+
"tends_to_overestimate": sum(estimated_values) > sum(actual_values),
|
|
683
|
+
"consistency": 1.0 - float(np.std(relative_errors) / 100) if relative_errors else 0
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
return developer_accuracy
|
|
687
|
+
|
|
688
|
+
def _analyze_size_based_accuracy(self, estimation_pairs: list[tuple[int, int, str]]) -> dict[str, dict[str, Any]]:
|
|
689
|
+
"""Analyze estimation accuracy by story point size ranges."""
|
|
690
|
+
size_ranges = {
|
|
691
|
+
"small": (1, 3),
|
|
692
|
+
"medium": (4, 8),
|
|
693
|
+
"large": (9, 21),
|
|
694
|
+
"extra_large": (22, 100)
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
size_accuracy = {}
|
|
698
|
+
|
|
699
|
+
for size_name, (min_sp, max_sp) in size_ranges.items():
|
|
700
|
+
size_pairs = [
|
|
701
|
+
(est, act) for est, act, _ in estimation_pairs
|
|
702
|
+
if min_sp <= est <= max_sp
|
|
703
|
+
]
|
|
704
|
+
|
|
705
|
+
if size_pairs:
|
|
706
|
+
estimated_values = [pair[0] for pair in size_pairs]
|
|
707
|
+
actual_values = [pair[1] for pair in size_pairs]
|
|
708
|
+
|
|
709
|
+
mae = float(np.mean([abs(est - act) for est, act in zip(estimated_values, actual_values)]))
|
|
710
|
+
|
|
711
|
+
relative_errors = [
|
|
712
|
+
abs(est - act) / max(est, act) * 100
|
|
713
|
+
for est, act in zip(estimated_values, actual_values)
|
|
714
|
+
]
|
|
715
|
+
mre = float(np.mean(relative_errors))
|
|
716
|
+
|
|
717
|
+
size_accuracy[size_name] = {
|
|
718
|
+
"mean_absolute_error": mae,
|
|
719
|
+
"mean_relative_error": mre,
|
|
720
|
+
"sample_size": len(size_pairs),
|
|
721
|
+
"avg_estimated": float(np.mean(estimated_values)),
|
|
722
|
+
"avg_actual": float(np.mean(actual_values))
|
|
723
|
+
}
|
|
724
|
+
else:
|
|
725
|
+
size_accuracy[size_name] = {
|
|
726
|
+
"mean_absolute_error": 0,
|
|
727
|
+
"mean_relative_error": 0,
|
|
728
|
+
"sample_size": 0,
|
|
729
|
+
"avg_estimated": 0,
|
|
730
|
+
"avg_actual": 0
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
return size_accuracy
|
|
734
|
+
|
|
735
|
+
def _aggregate_weekly_velocity(
|
|
736
|
+
self, commits: list[dict[str, Any]], prs: list[dict[str, Any]], start_date: datetime, end_date: datetime
|
|
737
|
+
) -> dict[str, dict[str, Any]]:
|
|
738
|
+
"""Aggregate velocity metrics by week."""
|
|
739
|
+
weekly_velocity = defaultdict(lambda: {
|
|
740
|
+
"story_points_completed": 0,
|
|
741
|
+
"commits": 0,
|
|
742
|
+
"prs_merged": 0,
|
|
743
|
+
"developers_active": set()
|
|
744
|
+
})
|
|
745
|
+
|
|
746
|
+
# Process commits
|
|
747
|
+
for commit in commits:
|
|
748
|
+
timestamp = self._ensure_timezone_aware(commit.get("timestamp"))
|
|
749
|
+
if not timestamp or timestamp < start_date or timestamp > end_date:
|
|
750
|
+
continue
|
|
751
|
+
|
|
752
|
+
week_start = self._get_week_start(timestamp)
|
|
753
|
+
week_key = week_start.strftime("%Y-%m-%d")
|
|
754
|
+
|
|
755
|
+
weekly_velocity[week_key]["story_points_completed"] += commit.get("story_points", 0) or 0
|
|
756
|
+
weekly_velocity[week_key]["commits"] += 1
|
|
757
|
+
weekly_velocity[week_key]["developers_active"].add(
|
|
758
|
+
commit.get("canonical_id", commit.get("author_email", "unknown"))
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
# Process PRs
|
|
762
|
+
for pr in prs:
|
|
763
|
+
merged_at = self._ensure_timezone_aware(pr.get("merged_at"))
|
|
764
|
+
if not merged_at or merged_at < start_date or merged_at > end_date:
|
|
765
|
+
continue
|
|
766
|
+
|
|
767
|
+
week_start = self._get_week_start(merged_at)
|
|
768
|
+
week_key = week_start.strftime("%Y-%m-%d")
|
|
769
|
+
|
|
770
|
+
weekly_velocity[week_key]["prs_merged"] += 1
|
|
771
|
+
|
|
772
|
+
# Convert sets to counts
|
|
773
|
+
result = {}
|
|
774
|
+
for week_key, metrics in weekly_velocity.items():
|
|
775
|
+
metrics["developers_active"] = len(metrics["developers_active"])
|
|
776
|
+
result[week_key] = dict(metrics)
|
|
777
|
+
|
|
778
|
+
return result
|
|
779
|
+
|
|
780
|
+
def _calculate_velocity_trends(self, weekly_velocity: dict[str, dict[str, Any]]) -> dict[str, Any]:
|
|
781
|
+
"""Calculate velocity trend analysis."""
|
|
782
|
+
if len(weekly_velocity) < 3:
|
|
783
|
+
return {"trend": "insufficient_data", "velocity_change": 0}
|
|
784
|
+
|
|
785
|
+
weeks = sorted(weekly_velocity.keys())
|
|
786
|
+
story_points = [weekly_velocity[week]["story_points_completed"] for week in weeks]
|
|
787
|
+
|
|
788
|
+
if not any(sp > 0 for sp in story_points):
|
|
789
|
+
return {"trend": "no_story_points", "velocity_change": 0}
|
|
790
|
+
|
|
791
|
+
# Calculate trend using linear regression
|
|
792
|
+
x_values = list(range(len(weeks)))
|
|
793
|
+
slope, intercept, r_value, p_value, std_err = stats.linregress(x_values, story_points)
|
|
794
|
+
|
|
795
|
+
# Determine trend direction
|
|
796
|
+
if slope > 0.5:
|
|
797
|
+
trend = "improving"
|
|
798
|
+
elif slope < -0.5:
|
|
799
|
+
trend = "declining"
|
|
800
|
+
else:
|
|
801
|
+
trend = "stable"
|
|
802
|
+
|
|
803
|
+
# Calculate velocity change (percentage)
|
|
804
|
+
if len(story_points) >= 2:
|
|
805
|
+
recent_avg = np.mean(story_points[-3:]) if len(story_points) >= 3 else story_points[-1]
|
|
806
|
+
early_avg = np.mean(story_points[:3]) if len(story_points) >= 3 else story_points[0]
|
|
807
|
+
velocity_change = ((recent_avg - early_avg) / max(early_avg, 1)) * 100
|
|
808
|
+
else:
|
|
809
|
+
velocity_change = 0
|
|
810
|
+
|
|
811
|
+
return {
|
|
812
|
+
"trend": trend,
|
|
813
|
+
"velocity_change": float(velocity_change),
|
|
814
|
+
"trend_strength": float(abs(r_value)),
|
|
815
|
+
"slope": float(slope),
|
|
816
|
+
"weeks_analyzed": len(weeks),
|
|
817
|
+
"avg_velocity": float(np.mean(story_points)),
|
|
818
|
+
"velocity_stability": 1.0 - float(np.std(story_points) / max(np.mean(story_points), 1))
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
def _analyze_developer_velocity(
|
|
822
|
+
self, commits: list[dict[str, Any]], start_date: datetime, end_date: datetime
|
|
823
|
+
) -> dict[str, dict[str, Any]]:
|
|
824
|
+
"""Analyze individual developer velocity patterns."""
|
|
825
|
+
developer_metrics = defaultdict(lambda: {
|
|
826
|
+
"total_story_points": 0,
|
|
827
|
+
"total_commits": 0,
|
|
828
|
+
"weeks_active": set(),
|
|
829
|
+
"weekly_velocity": []
|
|
830
|
+
})
|
|
831
|
+
|
|
832
|
+
# Aggregate by developer and week
|
|
833
|
+
for commit in commits:
|
|
834
|
+
timestamp = self._ensure_timezone_aware(commit.get("timestamp"))
|
|
835
|
+
if not timestamp or timestamp < start_date or timestamp > end_date:
|
|
836
|
+
continue
|
|
837
|
+
|
|
838
|
+
developer_id = commit.get("canonical_id", commit.get("author_email", "unknown"))
|
|
839
|
+
week_start = self._get_week_start(timestamp)
|
|
840
|
+
|
|
841
|
+
metrics = developer_metrics[developer_id]
|
|
842
|
+
metrics["total_story_points"] += commit.get("story_points", 0) or 0
|
|
843
|
+
metrics["total_commits"] += 1
|
|
844
|
+
metrics["weeks_active"].add(week_start)
|
|
845
|
+
|
|
846
|
+
# Calculate velocity metrics for each developer
|
|
847
|
+
developer_velocity = {}
|
|
848
|
+
|
|
849
|
+
for dev_id, metrics in developer_metrics.items():
|
|
850
|
+
if metrics["total_commits"] > 0:
|
|
851
|
+
weeks_active = len(metrics["weeks_active"])
|
|
852
|
+
avg_velocity = metrics["total_story_points"] / max(weeks_active, 1)
|
|
853
|
+
|
|
854
|
+
developer_velocity[self._anonymize_value(dev_id, "name")] = {
|
|
855
|
+
"total_story_points": metrics["total_story_points"],
|
|
856
|
+
"total_commits": metrics["total_commits"],
|
|
857
|
+
"weeks_active": weeks_active,
|
|
858
|
+
"avg_weekly_velocity": float(avg_velocity),
|
|
859
|
+
"story_points_per_commit": metrics["total_story_points"] / metrics["total_commits"]
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
return developer_velocity
|
|
863
|
+
|
|
864
|
+
def _calculate_velocity_predictability(self, weekly_velocity: dict[str, dict[str, Any]]) -> dict[str, Any]:
|
|
865
|
+
"""Calculate how predictable the team's velocity is."""
|
|
866
|
+
if len(weekly_velocity) < 4:
|
|
867
|
+
return {"predictability": "insufficient_data", "confidence_interval": [0, 0]}
|
|
868
|
+
|
|
869
|
+
story_points = [metrics["story_points_completed"] for metrics in weekly_velocity.values()]
|
|
870
|
+
|
|
871
|
+
if not any(sp > 0 for sp in story_points):
|
|
872
|
+
return {"predictability": "no_velocity_data", "confidence_interval": [0, 0]}
|
|
873
|
+
|
|
874
|
+
mean_velocity = np.mean(story_points)
|
|
875
|
+
std_velocity = np.std(story_points)
|
|
876
|
+
coefficient_variation = std_velocity / max(mean_velocity, 1)
|
|
877
|
+
|
|
878
|
+
# Classify predictability
|
|
879
|
+
if coefficient_variation < 0.2:
|
|
880
|
+
predictability = "high"
|
|
881
|
+
elif coefficient_variation < 0.4:
|
|
882
|
+
predictability = "moderate"
|
|
883
|
+
else:
|
|
884
|
+
predictability = "low"
|
|
885
|
+
|
|
886
|
+
# Calculate 80% confidence interval
|
|
887
|
+
confidence_interval = [
|
|
888
|
+
float(max(0, mean_velocity - 1.28 * std_velocity)),
|
|
889
|
+
float(mean_velocity + 1.28 * std_velocity)
|
|
890
|
+
]
|
|
891
|
+
|
|
892
|
+
return {
|
|
893
|
+
"predictability": predictability,
|
|
894
|
+
"coefficient_of_variation": float(coefficient_variation),
|
|
895
|
+
"confidence_interval": confidence_interval,
|
|
896
|
+
"mean_velocity": float(mean_velocity),
|
|
897
|
+
"std_deviation": float(std_velocity)
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
def _analyze_team_capacity(
|
|
901
|
+
self, weekly_velocity: dict[str, dict[str, Any]], developer_velocity: dict[str, dict[str, Any]]
|
|
902
|
+
) -> dict[str, Any]:
|
|
903
|
+
"""Analyze team capacity and workload distribution."""
|
|
904
|
+
if not weekly_velocity or not developer_velocity:
|
|
905
|
+
return {"analysis": "insufficient_data"}
|
|
906
|
+
|
|
907
|
+
# Calculate team metrics
|
|
908
|
+
total_developers = len(developer_velocity)
|
|
909
|
+
weeks_analyzed = len(weekly_velocity)
|
|
910
|
+
|
|
911
|
+
# Calculate capacity utilization
|
|
912
|
+
developer_contributions = [dev["total_story_points"] for dev in developer_velocity.values()]
|
|
913
|
+
total_story_points = sum(developer_contributions)
|
|
914
|
+
|
|
915
|
+
if total_story_points == 0:
|
|
916
|
+
return {"analysis": "no_story_points"}
|
|
917
|
+
|
|
918
|
+
# Analyze workload distribution
|
|
919
|
+
[
|
|
920
|
+
(contrib / total_story_points) * 100 for contrib in developer_contributions
|
|
921
|
+
]
|
|
922
|
+
|
|
923
|
+
# Calculate Gini coefficient for workload inequality
|
|
924
|
+
sorted_contributions = sorted(developer_contributions)
|
|
925
|
+
n = len(sorted_contributions)
|
|
926
|
+
np.cumsum(sorted_contributions)
|
|
927
|
+
gini = (n + 1 - 2 * sum((n + 1 - i) * x for i, x in enumerate(sorted_contributions, 1))) / (n * sum(sorted_contributions))
|
|
928
|
+
|
|
929
|
+
# Capacity recommendations
|
|
930
|
+
recommendations = []
|
|
931
|
+
|
|
932
|
+
# Check for workload imbalance
|
|
933
|
+
if gini > 0.4: # High inequality
|
|
934
|
+
recommendations.append("Consider redistributing workload - significant imbalance detected")
|
|
935
|
+
|
|
936
|
+
# Check for low contributors
|
|
937
|
+
low_contributors = [
|
|
938
|
+
dev for dev, metrics in developer_velocity.items()
|
|
939
|
+
if metrics["avg_weekly_velocity"] < np.mean([m["avg_weekly_velocity"] for m in developer_velocity.values()]) * 0.5
|
|
940
|
+
]
|
|
941
|
+
|
|
942
|
+
if low_contributors:
|
|
943
|
+
recommendations.append(f"Support developers with low velocity: {', '.join(low_contributors[:3])}")
|
|
944
|
+
|
|
945
|
+
return {
|
|
946
|
+
"total_developers": total_developers,
|
|
947
|
+
"weeks_analyzed": weeks_analyzed,
|
|
948
|
+
"total_story_points": total_story_points,
|
|
949
|
+
"avg_weekly_team_velocity": float(np.mean([w["story_points_completed"] for w in weekly_velocity.values()])),
|
|
950
|
+
"workload_distribution_gini": float(gini),
|
|
951
|
+
"workload_balance": "balanced" if gini < 0.3 else "imbalanced",
|
|
952
|
+
"capacity_recommendations": recommendations,
|
|
953
|
+
"top_contributors": sorted(
|
|
954
|
+
[(dev, metrics["total_story_points"]) for dev, metrics in developer_velocity.items()],
|
|
955
|
+
key=lambda x: x[1], reverse=True
|
|
956
|
+
)[:5]
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
def _build_correlation_csv_rows(
|
|
960
|
+
self,
|
|
961
|
+
weekly_correlations: dict[str, Any],
|
|
962
|
+
estimation_accuracy: dict[str, Any],
|
|
963
|
+
velocity_metrics: dict[str, Any]
|
|
964
|
+
) -> list[dict[str, Any]]:
|
|
965
|
+
"""Build CSV rows from correlation analysis results."""
|
|
966
|
+
rows = []
|
|
967
|
+
|
|
968
|
+
# Add weekly correlation data
|
|
969
|
+
correlation_results = weekly_correlations.get("weekly_correlations", {})
|
|
970
|
+
|
|
971
|
+
for week_start, week_data in correlation_results.items():
|
|
972
|
+
correlations = week_data.get("correlations", {})
|
|
973
|
+
|
|
974
|
+
row = {
|
|
975
|
+
"week_start": week_start.strftime("%Y-%m-%d"),
|
|
976
|
+
"metric_type": "weekly_correlations",
|
|
977
|
+
"sp_commits_correlation": round(correlations.get("sp_commits", 0), 3),
|
|
978
|
+
"sp_lines_correlation": round(correlations.get("sp_lines", 0), 3),
|
|
979
|
+
"sp_files_correlation": round(correlations.get("sp_files", 0), 3),
|
|
980
|
+
"sp_prs_correlation": round(correlations.get("sp_prs", 0), 3),
|
|
981
|
+
"sp_complexity_correlation": round(correlations.get("sp_complexity", 0), 3),
|
|
982
|
+
"sample_size": week_data.get("sample_size", 0),
|
|
983
|
+
"total_story_points": week_data.get("total_story_points", 0),
|
|
984
|
+
"total_commits": week_data.get("total_commits", 0)
|
|
985
|
+
}
|
|
986
|
+
rows.append(row)
|
|
987
|
+
|
|
988
|
+
# Add velocity data
|
|
989
|
+
weekly_velocity = velocity_metrics.get("weekly_velocity", {})
|
|
990
|
+
for week_key, velocity_data in weekly_velocity.items():
|
|
991
|
+
row = {
|
|
992
|
+
"week_start": week_key,
|
|
993
|
+
"metric_type": "velocity",
|
|
994
|
+
"story_points_completed": velocity_data.get("story_points_completed", 0),
|
|
995
|
+
"commits_count": velocity_data.get("commits", 0),
|
|
996
|
+
"prs_merged": velocity_data.get("prs_merged", 0),
|
|
997
|
+
"developers_active": velocity_data.get("developers_active", 0),
|
|
998
|
+
"velocity_trend": velocity_metrics.get("trends", {}).get("trend", "unknown")
|
|
999
|
+
}
|
|
1000
|
+
rows.append(row)
|
|
1001
|
+
|
|
1002
|
+
# Add developer accuracy summary
|
|
1003
|
+
developer_accuracy = estimation_accuracy.get("developer_accuracy", {})
|
|
1004
|
+
for developer, accuracy_data in developer_accuracy.items():
|
|
1005
|
+
row = {
|
|
1006
|
+
"developer_name": developer,
|
|
1007
|
+
"metric_type": "developer_accuracy",
|
|
1008
|
+
"overall_accuracy": round(accuracy_data.get("overall_accuracy", 0), 3),
|
|
1009
|
+
"avg_weekly_accuracy": round(accuracy_data.get("avg_weekly_accuracy", 0), 3),
|
|
1010
|
+
"consistency": round(accuracy_data.get("consistency", 0), 3),
|
|
1011
|
+
"weeks_active": accuracy_data.get("weeks_active", 0),
|
|
1012
|
+
"total_estimated_sp": accuracy_data.get("total_estimated", 0),
|
|
1013
|
+
"total_actual_sp": accuracy_data.get("total_actual", 0),
|
|
1014
|
+
"estimation_ratio": round(accuracy_data.get("estimation_ratio", 0), 3)
|
|
1015
|
+
}
|
|
1016
|
+
rows.append(row)
|
|
1017
|
+
|
|
1018
|
+
return rows
|
|
1019
|
+
|
|
1020
|
+
def _write_empty_correlation_csv(self, output_path: Path) -> None:
|
|
1021
|
+
"""Write empty CSV file with proper headers."""
|
|
1022
|
+
headers = [
|
|
1023
|
+
"week_start", "metric_type", "developer_name",
|
|
1024
|
+
"sp_commits_correlation", "sp_lines_correlation", "sp_files_correlation",
|
|
1025
|
+
"sp_prs_correlation", "sp_complexity_correlation", "sample_size",
|
|
1026
|
+
"total_story_points", "total_commits", "story_points_completed",
|
|
1027
|
+
"commits_count", "prs_merged", "developers_active", "velocity_trend",
|
|
1028
|
+
"overall_accuracy", "avg_weekly_accuracy", "consistency",
|
|
1029
|
+
"weeks_active", "total_estimated_sp", "total_actual_sp", "estimation_ratio"
|
|
1030
|
+
]
|
|
1031
|
+
|
|
1032
|
+
df = pd.DataFrame(columns=headers)
|
|
1033
|
+
df.to_csv(output_path, index=False)
|
|
1034
|
+
|
|
1035
|
+
def _empty_accuracy_analysis(self) -> dict[str, Any]:
|
|
1036
|
+
"""Return empty accuracy analysis structure."""
|
|
1037
|
+
return {
|
|
1038
|
+
"overall_accuracy": {"mean_absolute_error": 0, "mean_relative_error": 0, "accuracy_percentage": 0},
|
|
1039
|
+
"developer_accuracy": {},
|
|
1040
|
+
"size_based_accuracy": {},
|
|
1041
|
+
"improvement_suggestions": []
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
def _empty_week_correlations(self) -> dict[str, Any]:
|
|
1045
|
+
"""Return empty week correlations structure."""
|
|
1046
|
+
return {
|
|
1047
|
+
"correlations": {k: 0.0 for k in ["sp_commits", "sp_lines", "sp_files", "sp_prs", "sp_complexity"]},
|
|
1048
|
+
"sample_size": 0,
|
|
1049
|
+
"total_story_points": 0,
|
|
1050
|
+
"total_commits": 0,
|
|
1051
|
+
"total_lines_changed": 0
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
def _generate_accuracy_recommendations(
|
|
1055
|
+
self, accuracy_metrics: dict[str, Any], developer_accuracy: dict[str, dict[str, Any]], size_accuracy: dict[str, dict[str, Any]]
|
|
1056
|
+
) -> list[dict[str, str]]:
|
|
1057
|
+
"""Generate recommendations for improving estimation accuracy."""
|
|
1058
|
+
recommendations = []
|
|
1059
|
+
|
|
1060
|
+
overall_accuracy = accuracy_metrics.get("accuracy_percentage", 0)
|
|
1061
|
+
|
|
1062
|
+
if overall_accuracy < 50:
|
|
1063
|
+
recommendations.append({
|
|
1064
|
+
"priority": "high",
|
|
1065
|
+
"title": "Low Overall Estimation Accuracy",
|
|
1066
|
+
"description": f"Only {overall_accuracy:.1f}% of estimates are within 20% tolerance",
|
|
1067
|
+
"action": "Conduct team workshop on story point estimation techniques"
|
|
1068
|
+
})
|
|
1069
|
+
|
|
1070
|
+
# Check for developers with low accuracy
|
|
1071
|
+
low_accuracy_devs = [
|
|
1072
|
+
dev for dev, stats in developer_accuracy.items()
|
|
1073
|
+
if stats.get("overall_accuracy", 0) < 0.4
|
|
1074
|
+
]
|
|
1075
|
+
|
|
1076
|
+
if low_accuracy_devs:
|
|
1077
|
+
recommendations.append({
|
|
1078
|
+
"priority": "medium",
|
|
1079
|
+
"title": "Individual Estimation Training Needed",
|
|
1080
|
+
"description": f"{len(low_accuracy_devs)} developers need estimation improvement",
|
|
1081
|
+
"action": f"Provide 1-on-1 training for: {', '.join(low_accuracy_devs[:3])}"
|
|
1082
|
+
})
|
|
1083
|
+
|
|
1084
|
+
# Check size-based accuracy patterns
|
|
1085
|
+
large_story_accuracy = size_accuracy.get("large", {}).get("mean_relative_error", 0)
|
|
1086
|
+
if large_story_accuracy > 40: # High error rate for large stories
|
|
1087
|
+
recommendations.append({
|
|
1088
|
+
"priority": "medium",
|
|
1089
|
+
"title": "Large Stories Are Poorly Estimated",
|
|
1090
|
+
"description": f"Large stories (9-21 pts) have {large_story_accuracy:.1f}% average error",
|
|
1091
|
+
"action": "Encourage breaking down large stories into smaller, more estimable pieces"
|
|
1092
|
+
})
|
|
1093
|
+
|
|
1094
|
+
return recommendations
|
|
1095
|
+
|
|
1096
|
+
def _ensure_timezone_aware(self, dt: Any) -> Optional[datetime]:
|
|
1097
|
+
"""Ensure datetime is timezone-aware UTC."""
|
|
1098
|
+
if not dt:
|
|
1099
|
+
return None
|
|
1100
|
+
|
|
1101
|
+
if isinstance(dt, str):
|
|
1102
|
+
try:
|
|
1103
|
+
dt = datetime.fromisoformat(dt.replace("Z", "+00:00"))
|
|
1104
|
+
except (ValueError, AttributeError):
|
|
1105
|
+
return None
|
|
1106
|
+
|
|
1107
|
+
if not isinstance(dt, datetime):
|
|
1108
|
+
return None
|
|
1109
|
+
|
|
1110
|
+
if dt.tzinfo is None:
|
|
1111
|
+
return dt.replace(tzinfo=timezone.utc)
|
|
1112
|
+
elif dt.tzinfo != timezone.utc:
|
|
1113
|
+
return dt.astimezone(timezone.utc)
|
|
1114
|
+
else:
|
|
1115
|
+
return dt
|
|
1116
|
+
|
|
1117
|
+
def _get_week_start(self, date: datetime) -> datetime:
|
|
1118
|
+
"""Get Monday of the week for consistent week boundaries."""
|
|
1119
|
+
if date.tzinfo is None:
|
|
1120
|
+
date = date.replace(tzinfo=timezone.utc)
|
|
1121
|
+
elif date.tzinfo != timezone.utc:
|
|
1122
|
+
date = date.astimezone(timezone.utc)
|
|
1123
|
+
|
|
1124
|
+
days_since_monday = date.weekday()
|
|
1125
|
+
monday = date - timedelta(days=days_since_monday)
|
|
1126
|
+
return monday.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
1127
|
+
|
|
1128
|
+
def _anonymize_value(self, value: str, field_type: str) -> str:
|
|
1129
|
+
"""Anonymize values if anonymization is enabled."""
|
|
1130
|
+
if not self.anonymize or not value:
|
|
1131
|
+
return value
|
|
1132
|
+
|
|
1133
|
+
if value not in self._anonymization_map:
|
|
1134
|
+
self._anonymous_counter += 1
|
|
1135
|
+
if field_type == "name":
|
|
1136
|
+
anonymous = f"Developer{self._anonymous_counter}"
|
|
1137
|
+
elif field_type == "id":
|
|
1138
|
+
anonymous = f"ID{self._anonymous_counter:04d}"
|
|
1139
|
+
else:
|
|
1140
|
+
anonymous = f"anon{self._anonymous_counter}"
|
|
1141
|
+
|
|
1142
|
+
self._anonymization_map[value] = anonymous
|
|
1143
|
+
|
|
1144
|
+
return self._anonymization_map[value]
|