gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,339 +1,1803 @@
|
|
|
1
1
|
"""CSV report generation for GitFlow Analytics."""
|
|
2
|
+
|
|
2
3
|
import csv
|
|
4
|
+
import logging
|
|
5
|
+
from collections import defaultdict
|
|
3
6
|
from datetime import datetime, timedelta, timezone
|
|
4
7
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
6
|
-
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
7
10
|
import pandas as pd
|
|
8
11
|
|
|
12
|
+
from ..metrics.activity_scoring import ActivityScorer
|
|
13
|
+
from .base import BaseReportGenerator, ReportData, ReportOutput
|
|
14
|
+
from .interfaces import ReportFormat
|
|
9
15
|
|
|
10
|
-
|
|
16
|
+
# Get logger for this module
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CSVReportGenerator(BaseReportGenerator):
|
|
11
21
|
"""Generate CSV reports with weekly metrics."""
|
|
12
|
-
|
|
13
|
-
def __init__(self, anonymize: bool = False):
|
|
22
|
+
|
|
23
|
+
def __init__(self, anonymize: bool = False, exclude_authors: list[str] = None, identity_resolver=None, **kwargs):
|
|
14
24
|
"""Initialize report generator."""
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
self.
|
|
25
|
+
super().__init__(anonymize=anonymize, exclude_authors=exclude_authors,
|
|
26
|
+
identity_resolver=identity_resolver, **kwargs)
|
|
27
|
+
self.activity_scorer = ActivityScorer()
|
|
28
|
+
|
|
29
|
+
# Implementation of abstract methods from BaseReportGenerator
|
|
30
|
+
|
|
31
|
+
def generate(self, data: ReportData, output_path: Optional[Path] = None) -> ReportOutput:
|
|
32
|
+
"""Generate CSV report from standardized data.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
data: Standardized report data
|
|
36
|
+
output_path: Optional path to write the report to
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
ReportOutput containing the results
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
# Validate data
|
|
43
|
+
if not self.validate_data(data):
|
|
44
|
+
return ReportOutput(
|
|
45
|
+
success=False,
|
|
46
|
+
errors=["Invalid or incomplete data provided"]
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Pre-process data (apply filters and anonymization)
|
|
50
|
+
data = self.pre_process(data)
|
|
51
|
+
|
|
52
|
+
# Generate appropriate CSV based on available data
|
|
53
|
+
if output_path:
|
|
54
|
+
# Determine report type based on filename or available data
|
|
55
|
+
filename = output_path.name.lower()
|
|
56
|
+
|
|
57
|
+
if "weekly" in filename and data.commits:
|
|
58
|
+
self.generate_weekly_report(data.commits, data.developer_stats, output_path)
|
|
59
|
+
elif "developer" in filename and data.developer_stats:
|
|
60
|
+
self.generate_developer_report(data.developer_stats, output_path)
|
|
61
|
+
elif "activity" in filename and data.activity_data:
|
|
62
|
+
# Write activity data directly
|
|
63
|
+
df = pd.DataFrame(data.activity_data)
|
|
64
|
+
df.to_csv(output_path, index=False)
|
|
65
|
+
elif "focus" in filename and data.focus_data:
|
|
66
|
+
# Write focus data directly
|
|
67
|
+
df = pd.DataFrame(data.focus_data)
|
|
68
|
+
df.to_csv(output_path, index=False)
|
|
69
|
+
elif data.commits:
|
|
70
|
+
# Default to weekly report
|
|
71
|
+
self.generate_weekly_report(data.commits, data.developer_stats, output_path)
|
|
72
|
+
else:
|
|
73
|
+
return ReportOutput(
|
|
74
|
+
success=False,
|
|
75
|
+
errors=["No suitable data found for CSV generation"]
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Calculate file size
|
|
79
|
+
file_size = output_path.stat().st_size if output_path.exists() else 0
|
|
80
|
+
|
|
81
|
+
return ReportOutput(
|
|
82
|
+
success=True,
|
|
83
|
+
file_path=output_path,
|
|
84
|
+
format="csv",
|
|
85
|
+
size_bytes=file_size
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
# Generate in-memory CSV
|
|
89
|
+
import io
|
|
90
|
+
buffer = io.StringIO()
|
|
91
|
+
|
|
92
|
+
# Default to generating weekly report in memory
|
|
93
|
+
if data.commits:
|
|
94
|
+
# Create temporary dataframe
|
|
95
|
+
df = pd.DataFrame(self._aggregate_weekly_data(data.commits,
|
|
96
|
+
datetime.now(timezone.utc) - timedelta(weeks=52),
|
|
97
|
+
datetime.now(timezone.utc)))
|
|
98
|
+
df.to_csv(buffer, index=False)
|
|
99
|
+
content = buffer.getvalue()
|
|
100
|
+
|
|
101
|
+
return ReportOutput(
|
|
102
|
+
success=True,
|
|
103
|
+
content=content,
|
|
104
|
+
format="csv",
|
|
105
|
+
size_bytes=len(content)
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
return ReportOutput(
|
|
109
|
+
success=False,
|
|
110
|
+
errors=["No data available for CSV generation"]
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
except Exception as e:
|
|
114
|
+
self.logger.error(f"Error generating CSV report: {e}")
|
|
115
|
+
return ReportOutput(
|
|
116
|
+
success=False,
|
|
117
|
+
errors=[str(e)]
|
|
118
|
+
)
|
|
18
119
|
|
|
19
|
-
def
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
120
|
+
def get_required_fields(self) -> List[str]:
|
|
121
|
+
"""Get the list of required data fields for CSV generation.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of required field names
|
|
125
|
+
"""
|
|
126
|
+
# CSV reports can work with various combinations of data
|
|
127
|
+
# At minimum, we need either commits or developer_stats
|
|
128
|
+
return ["commits"] # Primary requirement
|
|
129
|
+
|
|
130
|
+
def get_format_type(self) -> str:
|
|
131
|
+
"""Get the format type this generator produces.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Format identifier
|
|
135
|
+
"""
|
|
136
|
+
return ReportFormat.CSV.value
|
|
137
|
+
|
|
138
|
+
def _filter_excluded_authors_list(self, data_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
139
|
+
"""
|
|
140
|
+
Filter out excluded authors from any data list using canonical_id and enhanced bot detection.
|
|
141
|
+
|
|
142
|
+
WHY: Bot exclusion happens in Phase 2 (reporting) instead of Phase 1 (data collection)
|
|
143
|
+
to ensure manual identity mappings work correctly. This allows the system to see
|
|
144
|
+
consolidated bot identities via canonical_id instead of just original author_email/author_name.
|
|
145
|
+
|
|
146
|
+
ENHANCEMENT: Added enhanced bot pattern matching to catch bots that weren't properly
|
|
147
|
+
consolidated via manual mappings, preventing bot leakage in reports.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
data_list: List of data dictionaries containing canonical_id field
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Filtered list with excluded authors removed
|
|
154
|
+
"""
|
|
155
|
+
if not self.exclude_authors:
|
|
156
|
+
return data_list
|
|
157
|
+
|
|
158
|
+
logger.debug(f"DEBUG EXCLUSION: Starting filter with {len(self.exclude_authors)} excluded authors: {self.exclude_authors}")
|
|
159
|
+
logger.debug(f"DEBUG EXCLUSION: Filtering {len(data_list)} items from data list")
|
|
160
|
+
|
|
161
|
+
excluded_lower = [author.lower() for author in self.exclude_authors]
|
|
162
|
+
logger.debug(f"DEBUG EXCLUSION: Excluded authors (lowercase): {excluded_lower}")
|
|
163
|
+
|
|
164
|
+
# Separate explicit excludes from bot patterns
|
|
165
|
+
explicit_excludes = []
|
|
166
|
+
bot_patterns = []
|
|
167
|
+
|
|
168
|
+
for exclude in excluded_lower:
|
|
169
|
+
if '[bot]' in exclude or 'bot' in exclude.split():
|
|
170
|
+
bot_patterns.append(exclude)
|
|
171
|
+
else:
|
|
172
|
+
explicit_excludes.append(exclude)
|
|
173
|
+
|
|
174
|
+
logger.debug(f"DEBUG EXCLUSION: Explicit excludes: {explicit_excludes}")
|
|
175
|
+
logger.debug(f"DEBUG EXCLUSION: Bot patterns: {bot_patterns}")
|
|
176
|
+
|
|
177
|
+
filtered_data = []
|
|
178
|
+
excluded_count = 0
|
|
179
|
+
|
|
180
|
+
# Sample first 5 items to see data structure
|
|
181
|
+
for i, item in enumerate(data_list[:5]):
|
|
182
|
+
logger.debug(f"DEBUG EXCLUSION: Sample item {i}: canonical_id='{item.get('canonical_id', '')}', "
|
|
183
|
+
f"author_email='{item.get('author_email', '')}', author_name='{item.get('author_name', '')}', "
|
|
184
|
+
f"author='{item.get('author', '')}', primary_name='{item.get('primary_name', '')}', "
|
|
185
|
+
f"name='{item.get('name', '')}', developer='{item.get('developer', '')}', "
|
|
186
|
+
f"display_name='{item.get('display_name', '')}'")
|
|
187
|
+
|
|
188
|
+
for item in data_list:
|
|
189
|
+
canonical_id = item.get("canonical_id", "")
|
|
190
|
+
# Also check original author fields as fallback for data without canonical_id
|
|
191
|
+
author_email = item.get("author_email", "")
|
|
192
|
+
author_name = item.get("author_name", "")
|
|
193
|
+
|
|
194
|
+
# Check all possible author fields to ensure we catch every variation
|
|
195
|
+
author = item.get("author", "")
|
|
196
|
+
primary_name = item.get("primary_name", "")
|
|
197
|
+
name = item.get("name", "")
|
|
198
|
+
developer = item.get("developer", "") # Common in CSV data
|
|
199
|
+
display_name = item.get("display_name", "") # Common in some data structures
|
|
200
|
+
|
|
201
|
+
# Collect all identity fields for checking
|
|
202
|
+
identity_fields = [
|
|
203
|
+
canonical_id,
|
|
204
|
+
item.get("primary_email", ""),
|
|
205
|
+
author_email,
|
|
206
|
+
author_name,
|
|
207
|
+
author,
|
|
208
|
+
primary_name,
|
|
209
|
+
name,
|
|
210
|
+
developer,
|
|
211
|
+
display_name
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
should_exclude = False
|
|
215
|
+
exclusion_reason = ""
|
|
216
|
+
|
|
217
|
+
# Check for exact matches with explicit excludes first
|
|
218
|
+
for field in identity_fields:
|
|
219
|
+
if field and field.lower() in explicit_excludes:
|
|
220
|
+
should_exclude = True
|
|
221
|
+
exclusion_reason = f"exact match with '{field}' in explicit excludes"
|
|
222
|
+
break
|
|
223
|
+
|
|
224
|
+
# If not explicitly excluded, check for bot patterns
|
|
225
|
+
if not should_exclude:
|
|
226
|
+
for field in identity_fields:
|
|
227
|
+
if not field:
|
|
228
|
+
continue
|
|
229
|
+
field_lower = field.lower()
|
|
230
|
+
|
|
231
|
+
# Enhanced bot detection: check if any field contains bot-like patterns
|
|
232
|
+
for bot_pattern in bot_patterns:
|
|
233
|
+
if bot_pattern in field_lower:
|
|
234
|
+
should_exclude = True
|
|
235
|
+
exclusion_reason = f"bot pattern '{bot_pattern}' matches field '{field}'"
|
|
236
|
+
break
|
|
237
|
+
|
|
238
|
+
# Additional bot detection: check for common bot patterns not in explicit list
|
|
239
|
+
if not should_exclude:
|
|
240
|
+
bot_indicators = ['[bot]', 'bot@', '-bot', 'automated', 'github-actions', 'dependabot', 'renovate']
|
|
241
|
+
for indicator in bot_indicators:
|
|
242
|
+
if indicator in field_lower:
|
|
243
|
+
# Only exclude if this bot-like pattern matches something in our exclude list
|
|
244
|
+
for exclude in excluded_lower:
|
|
245
|
+
if indicator.replace('[', '').replace(']', '') in exclude or exclude in field_lower:
|
|
246
|
+
should_exclude = True
|
|
247
|
+
exclusion_reason = f"bot indicator '{indicator}' in field '{field}' matches exclude pattern '{exclude}'"
|
|
248
|
+
break
|
|
249
|
+
if should_exclude:
|
|
250
|
+
break
|
|
251
|
+
|
|
252
|
+
if should_exclude:
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
if should_exclude:
|
|
256
|
+
excluded_count += 1
|
|
257
|
+
logger.debug(f"DEBUG EXCLUSION: EXCLUDING item - {exclusion_reason}")
|
|
258
|
+
logger.debug(f" canonical_id='{canonical_id}', primary_email='{item.get('primary_email', '')}', "
|
|
259
|
+
f"author_email='{author_email}', author_name='{author_name}', author='{author}', "
|
|
260
|
+
f"primary_name='{primary_name}', name='{name}', developer='{developer}', "
|
|
261
|
+
f"display_name='{display_name}'")
|
|
262
|
+
else:
|
|
263
|
+
filtered_data.append(item)
|
|
264
|
+
|
|
265
|
+
logger.debug(f"DEBUG EXCLUSION: Excluded {excluded_count} items, kept {len(filtered_data)} items")
|
|
266
|
+
return filtered_data
|
|
267
|
+
|
|
268
|
+
def _get_canonical_display_name(self, canonical_id: str, fallback_name: str) -> str:
|
|
269
|
+
"""
|
|
270
|
+
Get the canonical display name for a developer.
|
|
271
|
+
|
|
272
|
+
WHY: Manual identity mappings may have updated display names that aren't
|
|
273
|
+
reflected in the developer_stats data passed to report generators. This
|
|
274
|
+
method ensures we get the most current display name from the identity resolver.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
canonical_id: The canonical ID to get the display name for
|
|
278
|
+
fallback_name: The fallback name to use if identity resolver is not available
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
The canonical display name or fallback name
|
|
282
|
+
"""
|
|
283
|
+
if self.identity_resolver and canonical_id:
|
|
284
|
+
try:
|
|
285
|
+
canonical_name = self.identity_resolver.get_canonical_name(canonical_id)
|
|
286
|
+
if canonical_name and canonical_name != "Unknown":
|
|
287
|
+
return canonical_name
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.debug(f"Error getting canonical name for {canonical_id}: {e}")
|
|
290
|
+
|
|
291
|
+
return fallback_name
|
|
292
|
+
|
|
293
|
+
def _log_datetime_comparison(
|
|
294
|
+
self, dt1: datetime, dt2: datetime, operation: str, location: str
|
|
295
|
+
) -> None:
|
|
296
|
+
"""Log datetime comparison details for debugging timezone issues."""
|
|
297
|
+
logger.debug(f"Comparing dates in {location} ({operation}):")
|
|
298
|
+
logger.debug(f" dt1: {dt1} (tzinfo: {dt1.tzinfo}, aware: {dt1.tzinfo is not None})")
|
|
299
|
+
logger.debug(f" dt2: {dt2} (tzinfo: {dt2.tzinfo}, aware: {dt2.tzinfo is not None})")
|
|
300
|
+
|
|
301
|
+
def _safe_datetime_compare(
|
|
302
|
+
self, dt1: datetime, dt2: datetime, operation: str, location: str
|
|
303
|
+
) -> bool:
|
|
304
|
+
"""Safely compare datetimes with logging and error handling."""
|
|
305
|
+
try:
|
|
306
|
+
self._log_datetime_comparison(dt1, dt2, operation, location)
|
|
307
|
+
|
|
308
|
+
if operation == "lt":
|
|
309
|
+
result = dt1 < dt2
|
|
310
|
+
elif operation == "gt":
|
|
311
|
+
result = dt1 > dt2
|
|
312
|
+
elif operation == "le":
|
|
313
|
+
result = dt1 <= dt2
|
|
314
|
+
elif operation == "ge":
|
|
315
|
+
result = dt1 >= dt2
|
|
316
|
+
elif operation == "eq":
|
|
317
|
+
result = dt1 == dt2
|
|
318
|
+
else:
|
|
319
|
+
raise ValueError(f"Unknown operation: {operation}")
|
|
320
|
+
|
|
321
|
+
logger.debug(f" Result: {result}")
|
|
322
|
+
return result
|
|
323
|
+
|
|
324
|
+
except TypeError as e:
|
|
325
|
+
logger.error(f"Timezone comparison error in {location}:")
|
|
326
|
+
logger.error(
|
|
327
|
+
f" dt1: {dt1} (type: {type(dt1)}, tzinfo: {getattr(dt1, 'tzinfo', 'N/A')})"
|
|
328
|
+
)
|
|
329
|
+
logger.error(
|
|
330
|
+
f" dt2: {dt2} (type: {type(dt2)}, tzinfo: {getattr(dt2, 'tzinfo', 'N/A')})"
|
|
331
|
+
)
|
|
332
|
+
logger.error(f" Operation: {operation}")
|
|
333
|
+
logger.error(f" Error: {e}")
|
|
334
|
+
|
|
335
|
+
# Import traceback for detailed error info
|
|
336
|
+
import traceback
|
|
337
|
+
|
|
338
|
+
logger.error(f" Full traceback:\n{traceback.format_exc()}")
|
|
339
|
+
|
|
340
|
+
# Try to fix by making both timezone-aware in UTC
|
|
341
|
+
try:
|
|
342
|
+
if dt1.tzinfo is None:
|
|
343
|
+
dt1 = dt1.replace(tzinfo=timezone.utc)
|
|
344
|
+
logger.debug(f" Fixed dt1 to UTC: {dt1}")
|
|
345
|
+
if dt2.tzinfo is None:
|
|
346
|
+
dt2 = dt2.replace(tzinfo=timezone.utc)
|
|
347
|
+
logger.debug(f" Fixed dt2 to UTC: {dt2}")
|
|
348
|
+
|
|
349
|
+
# Retry comparison
|
|
350
|
+
if operation == "lt":
|
|
351
|
+
result = dt1 < dt2
|
|
352
|
+
elif operation == "gt":
|
|
353
|
+
result = dt1 > dt2
|
|
354
|
+
elif operation == "le":
|
|
355
|
+
result = dt1 <= dt2
|
|
356
|
+
elif operation == "ge":
|
|
357
|
+
result = dt1 >= dt2
|
|
358
|
+
elif operation == "eq":
|
|
359
|
+
result = dt1 == dt2
|
|
360
|
+
else:
|
|
361
|
+
raise ValueError(f"Unknown operation: {operation}")
|
|
362
|
+
|
|
363
|
+
logger.info(f" Fixed comparison result: {result}")
|
|
364
|
+
return result
|
|
365
|
+
|
|
366
|
+
except Exception as fix_error:
|
|
367
|
+
logger.error(f" Failed to fix timezone issue: {fix_error}")
|
|
368
|
+
raise
|
|
369
|
+
|
|
370
|
+
def _safe_datetime_format(self, dt: datetime, format_str: str) -> str:
|
|
371
|
+
"""Safely format datetime with logging."""
|
|
372
|
+
try:
|
|
373
|
+
logger.debug(
|
|
374
|
+
f"Formatting datetime: {dt} (tzinfo: {getattr(dt, 'tzinfo', 'N/A')}) with format {format_str}"
|
|
375
|
+
)
|
|
376
|
+
result = dt.strftime(format_str)
|
|
377
|
+
logger.debug(f" Format result: {result}")
|
|
378
|
+
return result
|
|
379
|
+
except Exception as e:
|
|
380
|
+
logger.error(f"Error formatting datetime {dt}: {e}")
|
|
381
|
+
return str(dt)
|
|
382
|
+
|
|
383
|
+
def generate_weekly_report(
|
|
384
|
+
self,
|
|
385
|
+
commits: list[dict[str, Any]],
|
|
386
|
+
developer_stats: list[dict[str, Any]],
|
|
387
|
+
output_path: Path,
|
|
388
|
+
weeks: int = 12,
|
|
389
|
+
) -> Path:
|
|
23
390
|
"""Generate weekly metrics CSV report."""
|
|
391
|
+
# Apply exclusion filtering in Phase 2
|
|
392
|
+
commits = self._filter_excluded_authors_list(commits)
|
|
393
|
+
developer_stats = self._filter_excluded_authors_list(developer_stats)
|
|
24
394
|
# Calculate week boundaries (timezone-aware to match commit timestamps)
|
|
25
395
|
end_date = datetime.now(timezone.utc)
|
|
26
396
|
start_date = end_date - timedelta(weeks=weeks)
|
|
27
|
-
|
|
397
|
+
|
|
398
|
+
logger.debug("Weekly report date range:")
|
|
399
|
+
logger.debug(f" start_date: {start_date} (tzinfo: {start_date.tzinfo})")
|
|
400
|
+
logger.debug(f" end_date: {end_date} (tzinfo: {end_date.tzinfo})")
|
|
401
|
+
|
|
28
402
|
# Group commits by week and developer
|
|
29
|
-
weekly_data:
|
|
30
|
-
|
|
403
|
+
weekly_data: dict[tuple[datetime, str, str], dict[str, Any]] = self._aggregate_weekly_data(
|
|
404
|
+
commits, start_date, end_date
|
|
405
|
+
)
|
|
406
|
+
|
|
31
407
|
# Create developer lookup
|
|
32
|
-
dev_lookup = {dev[
|
|
408
|
+
dev_lookup = {dev["canonical_id"]: dev for dev in developer_stats}
|
|
409
|
+
|
|
410
|
+
# First pass: collect all raw scores for curve normalization
|
|
411
|
+
developer_raw_scores = {}
|
|
412
|
+
weekly_scores = {}
|
|
33
413
|
|
|
414
|
+
for (week_start, canonical_id, project_key), metrics in weekly_data.items():
|
|
415
|
+
activity_result = self.activity_scorer.calculate_activity_score(metrics)
|
|
416
|
+
raw_score = activity_result["raw_score"]
|
|
417
|
+
|
|
418
|
+
# Store for curve normalization
|
|
419
|
+
if canonical_id not in developer_raw_scores:
|
|
420
|
+
developer_raw_scores[canonical_id] = 0
|
|
421
|
+
developer_raw_scores[canonical_id] += raw_score
|
|
422
|
+
|
|
423
|
+
# Store weekly result for later use
|
|
424
|
+
weekly_scores[(week_start, canonical_id, project_key)] = activity_result
|
|
425
|
+
|
|
426
|
+
# Apply curve normalization to developer totals
|
|
427
|
+
curve_normalized = self.activity_scorer.normalize_scores_on_curve(developer_raw_scores)
|
|
428
|
+
|
|
34
429
|
# Build CSV rows
|
|
35
430
|
rows = []
|
|
36
431
|
for (week_start, canonical_id, project_key), metrics in weekly_data.items():
|
|
37
432
|
developer = dev_lookup.get(canonical_id, {})
|
|
433
|
+
activity_result = weekly_scores[(week_start, canonical_id, project_key)]
|
|
38
434
|
|
|
435
|
+
# Get curve data for this developer
|
|
436
|
+
curve_data = curve_normalized.get(canonical_id, {})
|
|
437
|
+
|
|
39
438
|
row = {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
439
|
+
"week_start": week_start.strftime("%Y-%m-%d"),
|
|
440
|
+
"developer_id": self._anonymize_value(canonical_id, "id"),
|
|
441
|
+
"developer_name": self._anonymize_value(
|
|
442
|
+
self._get_canonical_display_name(
|
|
443
|
+
canonical_id,
|
|
444
|
+
developer.get("primary_name", "Unknown")
|
|
445
|
+
), "name"
|
|
44
446
|
),
|
|
45
|
-
|
|
46
|
-
developer.get(
|
|
447
|
+
"developer_email": self._anonymize_value(
|
|
448
|
+
developer.get("primary_email", "unknown@example.com"), "email"
|
|
47
449
|
),
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
450
|
+
"project": project_key,
|
|
451
|
+
"commits": metrics["commits"],
|
|
452
|
+
"story_points": metrics["story_points"],
|
|
453
|
+
"lines_added": metrics["lines_added"],
|
|
454
|
+
"lines_removed": metrics["lines_removed"],
|
|
455
|
+
"files_changed": metrics["files_changed"],
|
|
456
|
+
"complexity_delta": round(metrics["complexity_delta"], 2),
|
|
457
|
+
"ticket_coverage_pct": round(metrics["ticket_coverage_pct"], 1),
|
|
458
|
+
"avg_commit_size": round(metrics["avg_commit_size"], 1),
|
|
459
|
+
"unique_tickets": metrics["unique_tickets"],
|
|
460
|
+
"prs_involved": metrics["prs_involved"],
|
|
461
|
+
# Activity score fields
|
|
462
|
+
"activity_score": round(activity_result["normalized_score"], 1),
|
|
463
|
+
"activity_level": activity_result["activity_level"],
|
|
464
|
+
"commit_score": round(activity_result["components"]["commit_score"], 1),
|
|
465
|
+
"pr_score": round(activity_result["components"]["pr_score"], 1),
|
|
466
|
+
"code_impact_score": round(activity_result["components"]["code_impact_score"], 1),
|
|
467
|
+
"complexity_score": round(activity_result["components"]["complexity_score"], 1),
|
|
468
|
+
# Curve normalization fields
|
|
469
|
+
"curved_score": curve_data.get("curved_score", 0),
|
|
470
|
+
"percentile": curve_data.get("percentile", 0),
|
|
471
|
+
"quintile": curve_data.get("quintile", 0),
|
|
472
|
+
"curved_activity_level": curve_data.get("activity_level", "unknown"),
|
|
59
473
|
}
|
|
60
474
|
rows.append(row)
|
|
61
|
-
|
|
475
|
+
|
|
62
476
|
# Sort by week and developer
|
|
63
|
-
rows.sort(key=lambda x: (x[
|
|
64
|
-
|
|
477
|
+
rows.sort(key=lambda x: (x["week_start"], x["developer_name"], x["project"]))
|
|
478
|
+
|
|
65
479
|
# Write CSV
|
|
66
480
|
if rows:
|
|
67
481
|
df = pd.DataFrame(rows)
|
|
68
482
|
df.to_csv(output_path, index=False)
|
|
69
483
|
else:
|
|
70
484
|
# Write empty CSV with headers
|
|
71
|
-
with open(output_path,
|
|
72
|
-
writer = csv.DictWriter(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
485
|
+
with open(output_path, "w", newline="") as f:
|
|
486
|
+
writer = csv.DictWriter(
|
|
487
|
+
f,
|
|
488
|
+
fieldnames=[
|
|
489
|
+
"week_start",
|
|
490
|
+
"developer_id",
|
|
491
|
+
"developer_name",
|
|
492
|
+
"developer_email",
|
|
493
|
+
"project",
|
|
494
|
+
"commits",
|
|
495
|
+
"story_points",
|
|
496
|
+
"lines_added",
|
|
497
|
+
"lines_removed",
|
|
498
|
+
"files_changed",
|
|
499
|
+
"complexity_delta",
|
|
500
|
+
"ticket_coverage_pct",
|
|
501
|
+
"avg_commit_size",
|
|
502
|
+
"unique_tickets",
|
|
503
|
+
"prs_involved",
|
|
504
|
+
"activity_score",
|
|
505
|
+
"activity_level",
|
|
506
|
+
"commit_score",
|
|
507
|
+
"pr_score",
|
|
508
|
+
"code_impact_score",
|
|
509
|
+
"complexity_score",
|
|
510
|
+
"curved_score",
|
|
511
|
+
"percentile",
|
|
512
|
+
"quintile",
|
|
513
|
+
"curved_activity_level",
|
|
514
|
+
],
|
|
515
|
+
)
|
|
78
516
|
writer.writeheader()
|
|
79
|
-
|
|
517
|
+
|
|
80
518
|
return output_path
|
|
81
|
-
|
|
82
|
-
def generate_summary_report(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
519
|
+
|
|
520
|
+
def generate_summary_report(
|
|
521
|
+
self,
|
|
522
|
+
commits: list[dict[str, Any]],
|
|
523
|
+
prs: list[dict[str, Any]],
|
|
524
|
+
developer_stats: list[dict[str, Any]],
|
|
525
|
+
ticket_analysis: dict[str, Any],
|
|
526
|
+
output_path: Path,
|
|
527
|
+
pm_data: Optional[dict[str, Any]] = None,
|
|
528
|
+
) -> Path:
|
|
87
529
|
"""Generate summary statistics CSV."""
|
|
88
|
-
|
|
530
|
+
# Apply exclusion filtering in Phase 2
|
|
531
|
+
commits = self._filter_excluded_authors_list(commits)
|
|
532
|
+
developer_stats = self._filter_excluded_authors_list(developer_stats)
|
|
89
533
|
|
|
534
|
+
summary_data = []
|
|
535
|
+
|
|
90
536
|
# Overall statistics
|
|
91
537
|
total_commits = len(commits)
|
|
92
|
-
total_story_points = sum(c.get(
|
|
538
|
+
total_story_points = sum(c.get("story_points", 0) or 0 for c in commits)
|
|
93
539
|
# Use filtered stats if available, otherwise fall back to raw stats
|
|
94
540
|
total_lines = sum(
|
|
95
|
-
c.get(
|
|
96
|
-
c.get(
|
|
541
|
+
c.get("filtered_insertions", c.get("insertions", 0))
|
|
542
|
+
+ c.get("filtered_deletions", c.get("deletions", 0))
|
|
97
543
|
for c in commits
|
|
98
544
|
)
|
|
99
|
-
|
|
100
|
-
summary_data.append(
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
summary_data.append(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
})
|
|
117
|
-
|
|
118
|
-
summary_data.append({
|
|
119
|
-
'metric': 'Active Developers',
|
|
120
|
-
'value': len(developer_stats),
|
|
121
|
-
'category': 'Overall'
|
|
122
|
-
})
|
|
123
|
-
|
|
545
|
+
|
|
546
|
+
summary_data.append(
|
|
547
|
+
{"metric": "Total Commits", "value": total_commits, "category": "Overall"}
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
summary_data.append(
|
|
551
|
+
{"metric": "Total Story Points", "value": total_story_points, "category": "Overall"}
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
summary_data.append(
|
|
555
|
+
{"metric": "Total Lines Changed", "value": total_lines, "category": "Overall"}
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
summary_data.append(
|
|
559
|
+
{"metric": "Active Developers", "value": len(developer_stats), "category": "Overall"}
|
|
560
|
+
)
|
|
561
|
+
|
|
124
562
|
# Ticket coverage
|
|
125
|
-
summary_data.append(
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
563
|
+
summary_data.append(
|
|
564
|
+
{
|
|
565
|
+
"metric": "Commit Ticket Coverage %",
|
|
566
|
+
"value": round(ticket_analysis.get("commit_coverage_pct", 0), 1),
|
|
567
|
+
"category": "Tracking",
|
|
568
|
+
}
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
summary_data.append(
|
|
572
|
+
{
|
|
573
|
+
"metric": "PR Ticket Coverage %",
|
|
574
|
+
"value": round(ticket_analysis.get("pr_coverage_pct", 0), 1),
|
|
575
|
+
"category": "Tracking",
|
|
576
|
+
}
|
|
577
|
+
)
|
|
578
|
+
|
|
137
579
|
# Platform breakdown
|
|
138
|
-
for platform, count in ticket_analysis.get(
|
|
139
|
-
summary_data.append(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
})
|
|
144
|
-
|
|
580
|
+
for platform, count in ticket_analysis.get("ticket_summary", {}).items():
|
|
581
|
+
summary_data.append(
|
|
582
|
+
{"metric": f"{platform.title()} Tickets", "value": count, "category": "Platforms"}
|
|
583
|
+
)
|
|
584
|
+
|
|
145
585
|
# Developer statistics
|
|
146
586
|
if developer_stats:
|
|
147
|
-
top_contributor = max(developer_stats, key=lambda x: x[
|
|
148
|
-
summary_data.append(
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
587
|
+
top_contributor = max(developer_stats, key=lambda x: x["total_commits"])
|
|
588
|
+
summary_data.append(
|
|
589
|
+
{
|
|
590
|
+
"metric": "Top Contributor",
|
|
591
|
+
"value": self._anonymize_value(
|
|
592
|
+
self._get_canonical_display_name(
|
|
593
|
+
top_contributor["canonical_id"],
|
|
594
|
+
top_contributor["primary_name"]
|
|
595
|
+
), "name"
|
|
596
|
+
),
|
|
597
|
+
"category": "Developers",
|
|
598
|
+
}
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
summary_data.append(
|
|
602
|
+
{
|
|
603
|
+
"metric": "Top Contributor Commits",
|
|
604
|
+
"value": top_contributor["total_commits"],
|
|
605
|
+
"category": "Developers",
|
|
606
|
+
}
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# PM Platform statistics
|
|
610
|
+
if pm_data and "metrics" in pm_data:
|
|
611
|
+
metrics = pm_data["metrics"]
|
|
612
|
+
|
|
613
|
+
# Total PM issues
|
|
614
|
+
summary_data.append(
|
|
615
|
+
{
|
|
616
|
+
"metric": "Total PM Issues",
|
|
617
|
+
"value": metrics.get("total_pm_issues", 0),
|
|
618
|
+
"category": "PM Platforms",
|
|
619
|
+
}
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
# Story point analysis
|
|
623
|
+
story_analysis = metrics.get("story_point_analysis", {})
|
|
624
|
+
summary_data.append(
|
|
625
|
+
{
|
|
626
|
+
"metric": "PM Story Points",
|
|
627
|
+
"value": story_analysis.get("pm_total_story_points", 0),
|
|
628
|
+
"category": "PM Platforms",
|
|
629
|
+
}
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
summary_data.append(
|
|
633
|
+
{
|
|
634
|
+
"metric": "Story Point Coverage %",
|
|
635
|
+
"value": round(story_analysis.get("story_point_coverage_pct", 0), 1),
|
|
636
|
+
"category": "PM Platforms",
|
|
637
|
+
}
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Issue type distribution
|
|
641
|
+
issue_types = metrics.get("issue_type_distribution", {})
|
|
642
|
+
for issue_type, count in issue_types.items():
|
|
643
|
+
summary_data.append(
|
|
644
|
+
{
|
|
645
|
+
"metric": f"{issue_type.title()} Issues",
|
|
646
|
+
"value": count,
|
|
647
|
+
"category": "Issue Types",
|
|
648
|
+
}
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
# Platform coverage
|
|
652
|
+
platform_coverage = metrics.get("platform_coverage", {})
|
|
653
|
+
for platform, coverage_data in platform_coverage.items():
|
|
654
|
+
summary_data.append(
|
|
655
|
+
{
|
|
656
|
+
"metric": f"{platform.title()} Issues",
|
|
657
|
+
"value": coverage_data.get("total_issues", 0),
|
|
658
|
+
"category": "Platform Coverage",
|
|
659
|
+
}
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
summary_data.append(
|
|
663
|
+
{
|
|
664
|
+
"metric": f"{platform.title()} Linked %",
|
|
665
|
+
"value": round(coverage_data.get("coverage_percentage", 0), 1),
|
|
666
|
+
"category": "Platform Coverage",
|
|
667
|
+
}
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
# Correlation quality
|
|
671
|
+
correlation_quality = metrics.get("correlation_quality", {})
|
|
672
|
+
summary_data.append(
|
|
673
|
+
{
|
|
674
|
+
"metric": "Issue-Commit Correlations",
|
|
675
|
+
"value": correlation_quality.get("total_correlations", 0),
|
|
676
|
+
"category": "Correlation Quality",
|
|
677
|
+
}
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
summary_data.append(
|
|
681
|
+
{
|
|
682
|
+
"metric": "Avg Correlation Confidence",
|
|
683
|
+
"value": round(correlation_quality.get("average_confidence", 0), 2),
|
|
684
|
+
"category": "Correlation Quality",
|
|
685
|
+
}
|
|
686
|
+
)
|
|
687
|
+
|
|
160
688
|
# Write summary CSV
|
|
161
689
|
df = pd.DataFrame(summary_data)
|
|
162
690
|
df.to_csv(output_path, index=False)
|
|
163
|
-
|
|
691
|
+
|
|
164
692
|
return output_path
|
|
165
|
-
|
|
166
|
-
def generate_developer_report(
|
|
167
|
-
|
|
693
|
+
|
|
694
|
+
def generate_developer_report(
|
|
695
|
+
self, developer_stats: list[dict[str, Any]], output_path: Path
|
|
696
|
+
) -> Path:
|
|
168
697
|
"""Generate developer statistics CSV."""
|
|
169
698
|
rows = []
|
|
170
|
-
|
|
699
|
+
|
|
171
700
|
for dev in developer_stats:
|
|
172
701
|
row = {
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
702
|
+
"developer_id": self._anonymize_value(dev["canonical_id"], "id"),
|
|
703
|
+
"name": self._anonymize_value(
|
|
704
|
+
self._get_canonical_display_name(
|
|
705
|
+
dev["canonical_id"],
|
|
706
|
+
dev["primary_name"]
|
|
707
|
+
), "name"
|
|
708
|
+
),
|
|
709
|
+
"email": self._anonymize_value(dev["primary_email"], "email"),
|
|
710
|
+
"github_username": (
|
|
711
|
+
self._anonymize_value(dev.get("github_username", ""), "username")
|
|
712
|
+
if dev.get("github_username")
|
|
713
|
+
else ""
|
|
714
|
+
),
|
|
715
|
+
"total_commits": dev["total_commits"],
|
|
716
|
+
"total_story_points": dev["total_story_points"],
|
|
717
|
+
"alias_count": dev.get("alias_count", 1),
|
|
718
|
+
"first_seen": (
|
|
719
|
+
self._safe_datetime_format(dev["first_seen"], "%Y-%m-%d")
|
|
720
|
+
if dev["first_seen"]
|
|
721
|
+
else ""
|
|
722
|
+
),
|
|
723
|
+
"last_seen": (
|
|
724
|
+
self._safe_datetime_format(dev["last_seen"], "%Y-%m-%d")
|
|
725
|
+
if dev["last_seen"]
|
|
726
|
+
else ""
|
|
727
|
+
),
|
|
728
|
+
"avg_story_points_per_commit": round(
|
|
729
|
+
dev["total_story_points"] / max(dev["total_commits"], 1), 2
|
|
730
|
+
),
|
|
187
731
|
}
|
|
188
732
|
rows.append(row)
|
|
189
|
-
|
|
733
|
+
|
|
190
734
|
# Sort by total commits
|
|
191
|
-
rows.sort(key=lambda x: x[
|
|
192
|
-
|
|
735
|
+
rows.sort(key=lambda x: x["total_commits"], reverse=True)
|
|
736
|
+
|
|
193
737
|
# Write CSV
|
|
194
738
|
df = pd.DataFrame(rows)
|
|
195
739
|
df.to_csv(output_path, index=False)
|
|
196
|
-
|
|
740
|
+
|
|
197
741
|
return output_path
|
|
198
|
-
|
|
199
|
-
def
|
|
200
|
-
|
|
201
|
-
|
|
742
|
+
|
|
743
|
+
def generate_pm_correlations_report(self, pm_data: dict[str, Any], output_path: Path) -> Path:
|
|
744
|
+
"""Generate PM platform correlations CSV report.
|
|
745
|
+
|
|
746
|
+
WHY: PM platform integration provides valuable correlation data between
|
|
747
|
+
work items and code changes. This report enables analysis of story point
|
|
748
|
+
accuracy, development velocity, and work item completion patterns.
|
|
749
|
+
|
|
750
|
+
Args:
|
|
751
|
+
pm_data: PM platform data including correlations and metrics.
|
|
752
|
+
output_path: Path where the CSV report should be written.
|
|
753
|
+
|
|
754
|
+
Returns:
|
|
755
|
+
Path to the generated CSV file.
|
|
756
|
+
"""
|
|
757
|
+
if not pm_data or "correlations" not in pm_data:
|
|
758
|
+
# Generate empty report if no PM data
|
|
759
|
+
df = pd.DataFrame(
|
|
760
|
+
columns=[
|
|
761
|
+
"commit_hash",
|
|
762
|
+
"commit_message",
|
|
763
|
+
"commit_author",
|
|
764
|
+
"commit_date",
|
|
765
|
+
"issue_key",
|
|
766
|
+
"issue_title",
|
|
767
|
+
"issue_type",
|
|
768
|
+
"issue_status",
|
|
769
|
+
"issue_platform",
|
|
770
|
+
"story_points",
|
|
771
|
+
"correlation_method",
|
|
772
|
+
"confidence",
|
|
773
|
+
"matched_text",
|
|
774
|
+
]
|
|
775
|
+
)
|
|
776
|
+
df.to_csv(output_path, index=False)
|
|
777
|
+
return output_path
|
|
778
|
+
|
|
779
|
+
correlations = pm_data["correlations"]
|
|
780
|
+
rows = []
|
|
781
|
+
|
|
782
|
+
for correlation in correlations:
|
|
783
|
+
row = {
|
|
784
|
+
"commit_hash": correlation.get("commit_hash", ""),
|
|
785
|
+
"commit_message": correlation.get("commit_message", ""),
|
|
786
|
+
"commit_author": self._anonymize_value(
|
|
787
|
+
correlation.get("commit_author", ""), "name"
|
|
788
|
+
),
|
|
789
|
+
"commit_date": correlation.get("commit_date", ""),
|
|
790
|
+
"issue_key": correlation.get("issue_key", ""),
|
|
791
|
+
"issue_title": correlation.get("issue_title", ""),
|
|
792
|
+
"issue_type": correlation.get("issue_type", ""),
|
|
793
|
+
"issue_status": correlation.get("issue_status", ""),
|
|
794
|
+
"issue_platform": correlation.get("issue_platform", ""),
|
|
795
|
+
"story_points": correlation.get("story_points", 0) or 0,
|
|
796
|
+
"correlation_method": correlation.get("correlation_method", ""),
|
|
797
|
+
"confidence": round(correlation.get("confidence", 0), 3),
|
|
798
|
+
"matched_text": correlation.get("matched_text", ""),
|
|
799
|
+
}
|
|
800
|
+
rows.append(row)
|
|
801
|
+
|
|
802
|
+
df = pd.DataFrame(rows)
|
|
803
|
+
df.to_csv(output_path, index=False)
|
|
804
|
+
|
|
805
|
+
return output_path
|
|
806
|
+
|
|
807
|
+
def _aggregate_weekly_data(
|
|
808
|
+
self, commits: list[dict[str, Any]], start_date: datetime, end_date: datetime
|
|
809
|
+
) -> dict[tuple[datetime, str, str], dict[str, Any]]:
|
|
202
810
|
"""Aggregate commit data by week."""
|
|
203
|
-
weekly_data: defaultdict[
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
811
|
+
weekly_data: defaultdict[tuple[datetime, str, str], dict[str, Any]] = defaultdict(
|
|
812
|
+
lambda: {
|
|
813
|
+
"commits": 0,
|
|
814
|
+
"story_points": 0,
|
|
815
|
+
"lines_added": 0,
|
|
816
|
+
"lines_removed": 0,
|
|
817
|
+
"files_changed": 0,
|
|
818
|
+
"complexity_delta": 0.0,
|
|
819
|
+
"commits_with_tickets": 0,
|
|
820
|
+
"tickets": set(),
|
|
821
|
+
"prs": set(),
|
|
822
|
+
}
|
|
823
|
+
)
|
|
824
|
+
|
|
215
825
|
for commit in commits:
|
|
216
|
-
timestamp = commit[
|
|
826
|
+
timestamp = commit["timestamp"]
|
|
827
|
+
logger.debug(
|
|
828
|
+
f"Processing commit timestamp: {timestamp} (tzinfo: {getattr(timestamp, 'tzinfo', 'N/A')})"
|
|
829
|
+
)
|
|
830
|
+
|
|
217
831
|
# Ensure consistent timezone handling
|
|
218
|
-
if hasattr(timestamp,
|
|
832
|
+
if hasattr(timestamp, "tzinfo") and timestamp.tzinfo is not None:
|
|
219
833
|
# Keep timezone-aware but ensure it's UTC
|
|
220
834
|
if timestamp.tzinfo != timezone.utc:
|
|
221
835
|
timestamp = timestamp.astimezone(timezone.utc)
|
|
836
|
+
logger.debug(f" Converted to UTC: {timestamp}")
|
|
222
837
|
else:
|
|
223
838
|
# Convert naive datetime to UTC timezone-aware
|
|
224
839
|
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
225
|
-
|
|
226
|
-
|
|
840
|
+
logger.debug(f" Made timezone-aware: {timestamp}")
|
|
841
|
+
|
|
842
|
+
# Use safe comparison functions with logging
|
|
843
|
+
if self._safe_datetime_compare(
|
|
844
|
+
timestamp, start_date, "lt", "_aggregate_weekly_data range check"
|
|
845
|
+
) or self._safe_datetime_compare(
|
|
846
|
+
timestamp, end_date, "gt", "_aggregate_weekly_data range check"
|
|
847
|
+
):
|
|
848
|
+
logger.debug(" Skipping commit outside date range")
|
|
227
849
|
continue
|
|
228
|
-
|
|
850
|
+
|
|
229
851
|
# Get week start (Monday)
|
|
230
852
|
week_start = self._get_week_start(timestamp)
|
|
231
|
-
|
|
853
|
+
|
|
232
854
|
# Get project key (default to 'unknown')
|
|
233
|
-
project_key = commit.get(
|
|
234
|
-
|
|
855
|
+
project_key = commit.get("project_key", "unknown")
|
|
856
|
+
|
|
235
857
|
# Get canonical developer ID
|
|
236
|
-
canonical_id = commit.get(
|
|
237
|
-
|
|
858
|
+
canonical_id = commit.get("canonical_id", commit.get("author_email", "unknown"))
|
|
859
|
+
|
|
238
860
|
key = (week_start, canonical_id, project_key)
|
|
239
|
-
|
|
861
|
+
|
|
240
862
|
# Aggregate metrics
|
|
241
863
|
data = weekly_data[key]
|
|
242
|
-
data[
|
|
243
|
-
data[
|
|
244
|
-
|
|
864
|
+
data["commits"] += 1
|
|
865
|
+
data["story_points"] += commit.get("story_points", 0) or 0
|
|
866
|
+
|
|
245
867
|
# Use filtered stats if available, otherwise fall back to raw stats
|
|
246
|
-
data[
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
868
|
+
data["lines_added"] += (
|
|
869
|
+
commit.get("filtered_insertions", commit.get("insertions", 0)) or 0
|
|
870
|
+
)
|
|
871
|
+
data["lines_removed"] += (
|
|
872
|
+
commit.get("filtered_deletions", commit.get("deletions", 0)) or 0
|
|
873
|
+
)
|
|
874
|
+
data["files_changed"] += (
|
|
875
|
+
commit.get("filtered_files_changed", commit.get("files_changed", 0)) or 0
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
data["complexity_delta"] += commit.get("complexity_delta", 0.0) or 0.0
|
|
879
|
+
|
|
252
880
|
# Track tickets
|
|
253
|
-
ticket_refs = commit.get(
|
|
881
|
+
ticket_refs = commit.get("ticket_references", [])
|
|
254
882
|
if ticket_refs:
|
|
255
|
-
data[
|
|
256
|
-
tickets_set = data[
|
|
883
|
+
data["commits_with_tickets"] += 1
|
|
884
|
+
tickets_set = data["tickets"]
|
|
257
885
|
for ticket in ticket_refs:
|
|
258
886
|
if isinstance(ticket, dict):
|
|
259
|
-
tickets_set.add(ticket.get(
|
|
887
|
+
tickets_set.add(ticket.get("full_id", ""))
|
|
260
888
|
else:
|
|
261
889
|
tickets_set.add(str(ticket))
|
|
262
|
-
|
|
890
|
+
|
|
263
891
|
# Track PRs (if available)
|
|
264
|
-
pr_number = commit.get(
|
|
892
|
+
pr_number = commit.get("pr_number")
|
|
265
893
|
if pr_number:
|
|
266
|
-
prs_set = data[
|
|
894
|
+
prs_set = data["prs"]
|
|
267
895
|
prs_set.add(pr_number)
|
|
268
|
-
|
|
896
|
+
|
|
269
897
|
# Calculate derived metrics
|
|
270
|
-
result:
|
|
898
|
+
result: dict[tuple[datetime, str, str], dict[str, Any]] = {}
|
|
271
899
|
for key, metrics in weekly_data.items():
|
|
272
|
-
commits_count = metrics[
|
|
900
|
+
commits_count = metrics["commits"]
|
|
273
901
|
if commits_count > 0:
|
|
274
|
-
metrics[
|
|
275
|
-
metrics[
|
|
276
|
-
)
|
|
277
|
-
metrics['avg_commit_size'] = (
|
|
278
|
-
(metrics['lines_added'] + metrics['lines_removed']) / commits_count
|
|
902
|
+
metrics["ticket_coverage_pct"] = (
|
|
903
|
+
metrics["commits_with_tickets"] / commits_count * 100
|
|
279
904
|
)
|
|
905
|
+
metrics["avg_commit_size"] = (
|
|
906
|
+
metrics["lines_added"] + metrics["lines_removed"]
|
|
907
|
+
) / commits_count
|
|
280
908
|
else:
|
|
281
|
-
metrics[
|
|
282
|
-
metrics[
|
|
283
|
-
|
|
284
|
-
tickets_set = metrics[
|
|
285
|
-
prs_set = metrics[
|
|
286
|
-
metrics[
|
|
287
|
-
metrics[
|
|
288
|
-
|
|
909
|
+
metrics["ticket_coverage_pct"] = 0
|
|
910
|
+
metrics["avg_commit_size"] = 0
|
|
911
|
+
|
|
912
|
+
tickets_set = metrics["tickets"]
|
|
913
|
+
prs_set = metrics["prs"]
|
|
914
|
+
metrics["unique_tickets"] = len(tickets_set)
|
|
915
|
+
metrics["prs_involved"] = len(prs_set)
|
|
916
|
+
|
|
289
917
|
# Remove sets before returning
|
|
290
|
-
del metrics[
|
|
291
|
-
del metrics[
|
|
292
|
-
del metrics[
|
|
293
|
-
|
|
918
|
+
del metrics["tickets"]
|
|
919
|
+
del metrics["prs"]
|
|
920
|
+
del metrics["commits_with_tickets"]
|
|
921
|
+
|
|
294
922
|
result[key] = metrics
|
|
295
|
-
|
|
923
|
+
|
|
296
924
|
return result
|
|
297
|
-
|
|
925
|
+
|
|
298
926
|
def _get_week_start(self, date: datetime) -> datetime:
|
|
299
927
|
"""Get Monday of the week for a given date."""
|
|
928
|
+
logger.debug(
|
|
929
|
+
f"Getting week start for date: {date} (tzinfo: {getattr(date, 'tzinfo', 'N/A')})"
|
|
930
|
+
)
|
|
931
|
+
|
|
300
932
|
# Ensure consistent timezone handling - keep timezone info
|
|
301
|
-
if hasattr(date,
|
|
933
|
+
if hasattr(date, "tzinfo") and date.tzinfo is not None:
|
|
302
934
|
# Keep timezone-aware but ensure it's UTC
|
|
303
935
|
if date.tzinfo != timezone.utc:
|
|
304
936
|
date = date.astimezone(timezone.utc)
|
|
937
|
+
logger.debug(f" Converted to UTC: {date}")
|
|
305
938
|
else:
|
|
306
939
|
# Convert naive datetime to UTC timezone-aware
|
|
307
940
|
date = date.replace(tzinfo=timezone.utc)
|
|
308
|
-
|
|
941
|
+
logger.debug(f" Made timezone-aware: {date}")
|
|
942
|
+
|
|
309
943
|
days_since_monday = date.weekday()
|
|
310
944
|
monday = date - timedelta(days=days_since_monday)
|
|
311
|
-
|
|
312
|
-
|
|
945
|
+
result = monday.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
946
|
+
|
|
947
|
+
logger.debug(f" Week start result: {result} (tzinfo: {result.tzinfo})")
|
|
948
|
+
return result
|
|
949
|
+
|
|
950
|
+
def generate_developer_activity_summary(
|
|
951
|
+
self,
|
|
952
|
+
commits: list[dict[str, Any]],
|
|
953
|
+
developer_stats: list[dict[str, Any]],
|
|
954
|
+
prs: list[dict[str, Any]],
|
|
955
|
+
output_path: Path,
|
|
956
|
+
weeks: int = 12,
|
|
957
|
+
) -> Path:
|
|
958
|
+
"""Generate developer activity summary with curve-normalized scores.
|
|
959
|
+
|
|
960
|
+
This report provides a high-level view of developer activity with
|
|
961
|
+
curve-normalized scores that allow for fair comparison across the team.
|
|
962
|
+
"""
|
|
963
|
+
# Apply exclusion filtering in Phase 2
|
|
964
|
+
commits = self._filter_excluded_authors_list(commits)
|
|
965
|
+
developer_stats = self._filter_excluded_authors_list(developer_stats)
|
|
966
|
+
|
|
967
|
+
# Calculate date range
|
|
968
|
+
end_date = datetime.now(timezone.utc)
|
|
969
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
970
|
+
|
|
971
|
+
# Aggregate metrics by developer
|
|
972
|
+
developer_metrics = defaultdict(lambda: {
|
|
973
|
+
"commits": 0,
|
|
974
|
+
"prs_involved": 0,
|
|
975
|
+
"lines_added": 0,
|
|
976
|
+
"lines_removed": 0,
|
|
977
|
+
"files_changed": 0,
|
|
978
|
+
"complexity_delta": 0.0,
|
|
979
|
+
"story_points": 0,
|
|
980
|
+
"unique_tickets": set(),
|
|
981
|
+
})
|
|
982
|
+
|
|
983
|
+
# Process commits
|
|
984
|
+
for commit in commits:
|
|
985
|
+
timestamp = commit["timestamp"]
|
|
986
|
+
if hasattr(timestamp, "tzinfo") and timestamp.tzinfo is None:
|
|
987
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
988
|
+
|
|
989
|
+
if timestamp < start_date or timestamp > end_date:
|
|
990
|
+
continue
|
|
991
|
+
|
|
992
|
+
dev_id = commit.get("canonical_id", commit.get("author_email", "unknown"))
|
|
993
|
+
metrics = developer_metrics[dev_id]
|
|
994
|
+
|
|
995
|
+
metrics["commits"] += 1
|
|
996
|
+
metrics["lines_added"] += commit.get("filtered_insertions", commit.get("insertions", 0)) or 0
|
|
997
|
+
metrics["lines_removed"] += commit.get("filtered_deletions", commit.get("deletions", 0)) or 0
|
|
998
|
+
metrics["files_changed"] += commit.get("filtered_files_changed", commit.get("files_changed", 0)) or 0
|
|
999
|
+
metrics["complexity_delta"] += commit.get("complexity_delta", 0.0) or 0.0
|
|
1000
|
+
metrics["story_points"] += commit.get("story_points", 0) or 0
|
|
1001
|
+
|
|
1002
|
+
ticket_refs = commit.get("ticket_references", [])
|
|
1003
|
+
for ticket in ticket_refs:
|
|
1004
|
+
if isinstance(ticket, dict):
|
|
1005
|
+
metrics["unique_tickets"].add(ticket.get("full_id", ""))
|
|
1006
|
+
else:
|
|
1007
|
+
metrics["unique_tickets"].add(str(ticket))
|
|
1008
|
+
|
|
1009
|
+
# Process PRs
|
|
1010
|
+
for pr in prs:
|
|
1011
|
+
author_id = pr.get("canonical_id", pr.get("author", "unknown"))
|
|
1012
|
+
if author_id in developer_metrics:
|
|
1013
|
+
developer_metrics[author_id]["prs_involved"] += 1
|
|
1014
|
+
|
|
1015
|
+
# Calculate activity scores
|
|
1016
|
+
developer_scores = {}
|
|
1017
|
+
developer_results = {}
|
|
1018
|
+
|
|
1019
|
+
for dev_id, metrics in developer_metrics.items():
|
|
1020
|
+
# Convert sets to counts
|
|
1021
|
+
metrics["unique_tickets"] = len(metrics["unique_tickets"])
|
|
1022
|
+
|
|
1023
|
+
# Calculate activity score
|
|
1024
|
+
activity_result = self.activity_scorer.calculate_activity_score(metrics)
|
|
1025
|
+
developer_scores[dev_id] = activity_result["raw_score"]
|
|
1026
|
+
developer_results[dev_id] = activity_result
|
|
1027
|
+
|
|
1028
|
+
# Apply curve normalization
|
|
1029
|
+
curve_normalized = self.activity_scorer.normalize_scores_on_curve(developer_scores)
|
|
1030
|
+
|
|
1031
|
+
# Create developer lookup
|
|
1032
|
+
dev_lookup = {dev["canonical_id"]: dev for dev in developer_stats}
|
|
1033
|
+
|
|
1034
|
+
# Build rows
|
|
1035
|
+
rows = []
|
|
1036
|
+
for dev_id, metrics in developer_metrics.items():
|
|
1037
|
+
developer = dev_lookup.get(dev_id, {})
|
|
1038
|
+
activity_result = developer_results[dev_id]
|
|
1039
|
+
curve_data = curve_normalized.get(dev_id, {})
|
|
1040
|
+
|
|
1041
|
+
row = {
|
|
1042
|
+
"developer_id": self._anonymize_value(dev_id, "id"),
|
|
1043
|
+
"developer_name": self._anonymize_value(
|
|
1044
|
+
self._get_canonical_display_name(
|
|
1045
|
+
dev_id,
|
|
1046
|
+
developer.get("primary_name", "Unknown")
|
|
1047
|
+
), "name"
|
|
1048
|
+
),
|
|
1049
|
+
"commits": metrics["commits"],
|
|
1050
|
+
"prs": metrics["prs_involved"],
|
|
1051
|
+
"story_points": metrics["story_points"],
|
|
1052
|
+
"lines_added": metrics["lines_added"],
|
|
1053
|
+
"lines_removed": metrics["lines_removed"],
|
|
1054
|
+
"files_changed": metrics["files_changed"],
|
|
1055
|
+
"unique_tickets": metrics["unique_tickets"],
|
|
1056
|
+
# Raw activity scores
|
|
1057
|
+
"raw_activity_score": round(activity_result["raw_score"], 1),
|
|
1058
|
+
"normalized_activity_score": round(activity_result["normalized_score"], 1),
|
|
1059
|
+
"activity_level": activity_result["activity_level"],
|
|
1060
|
+
# Curve-normalized scores
|
|
1061
|
+
"curved_score": curve_data.get("curved_score", 0),
|
|
1062
|
+
"percentile": curve_data.get("percentile", 0),
|
|
1063
|
+
"quintile": curve_data.get("quintile", 0),
|
|
1064
|
+
"curved_activity_level": curve_data.get("activity_level", "unknown"),
|
|
1065
|
+
"level_description": curve_data.get("level_description", ""),
|
|
1066
|
+
# Component breakdown
|
|
1067
|
+
"commit_score": round(activity_result["components"]["commit_score"], 1),
|
|
1068
|
+
"pr_score": round(activity_result["components"]["pr_score"], 1),
|
|
1069
|
+
"code_impact_score": round(activity_result["components"]["code_impact_score"], 1),
|
|
1070
|
+
"complexity_score": round(activity_result["components"]["complexity_score"], 1),
|
|
1071
|
+
}
|
|
1072
|
+
rows.append(row)
|
|
1073
|
+
|
|
1074
|
+
# Sort by curved score (highest first)
|
|
1075
|
+
rows.sort(key=lambda x: x["curved_score"], reverse=True)
|
|
1076
|
+
|
|
1077
|
+
# Write CSV
|
|
1078
|
+
if rows:
|
|
1079
|
+
df = pd.DataFrame(rows)
|
|
1080
|
+
df.to_csv(output_path, index=False)
|
|
1081
|
+
else:
|
|
1082
|
+
# Write empty CSV with headers
|
|
1083
|
+
with open(output_path, "w", newline="") as f:
|
|
1084
|
+
writer = csv.DictWriter(
|
|
1085
|
+
f,
|
|
1086
|
+
fieldnames=[
|
|
1087
|
+
"developer_id",
|
|
1088
|
+
"developer_name",
|
|
1089
|
+
"commits",
|
|
1090
|
+
"prs",
|
|
1091
|
+
"story_points",
|
|
1092
|
+
"lines_added",
|
|
1093
|
+
"lines_removed",
|
|
1094
|
+
"files_changed",
|
|
1095
|
+
"unique_tickets",
|
|
1096
|
+
"raw_activity_score",
|
|
1097
|
+
"normalized_activity_score",
|
|
1098
|
+
"activity_level",
|
|
1099
|
+
"curved_score",
|
|
1100
|
+
"percentile",
|
|
1101
|
+
"quintile",
|
|
1102
|
+
"curved_activity_level",
|
|
1103
|
+
"level_description",
|
|
1104
|
+
"commit_score",
|
|
1105
|
+
"pr_score",
|
|
1106
|
+
"code_impact_score",
|
|
1107
|
+
"complexity_score",
|
|
1108
|
+
],
|
|
1109
|
+
)
|
|
1110
|
+
writer.writeheader()
|
|
1111
|
+
|
|
1112
|
+
return output_path
|
|
1113
|
+
|
|
313
1114
|
def _anonymize_value(self, value: str, field_type: str) -> str:
|
|
314
1115
|
"""Anonymize a value if anonymization is enabled."""
|
|
315
1116
|
if not self.anonymize or not value:
|
|
316
1117
|
return value
|
|
317
|
-
|
|
318
|
-
if field_type ==
|
|
1118
|
+
|
|
1119
|
+
if field_type == "email" and "@" in value:
|
|
319
1120
|
# Keep domain for email
|
|
320
|
-
local, domain = value.split(
|
|
1121
|
+
local, domain = value.split("@", 1)
|
|
321
1122
|
value = local # Anonymize only local part
|
|
322
1123
|
suffix = f"@{domain}"
|
|
323
1124
|
else:
|
|
324
1125
|
suffix = ""
|
|
325
|
-
|
|
1126
|
+
|
|
326
1127
|
if value not in self._anonymization_map:
|
|
327
1128
|
self._anonymous_counter += 1
|
|
328
|
-
if field_type ==
|
|
1129
|
+
if field_type == "name":
|
|
329
1130
|
anonymous = f"Developer{self._anonymous_counter}"
|
|
330
|
-
elif field_type ==
|
|
1131
|
+
elif field_type == "email":
|
|
331
1132
|
anonymous = f"dev{self._anonymous_counter}"
|
|
332
|
-
elif field_type ==
|
|
1133
|
+
elif field_type == "id":
|
|
333
1134
|
anonymous = f"ID{self._anonymous_counter:04d}"
|
|
334
1135
|
else:
|
|
335
1136
|
anonymous = f"anon{self._anonymous_counter}"
|
|
336
|
-
|
|
1137
|
+
|
|
337
1138
|
self._anonymization_map[value] = anonymous
|
|
1139
|
+
|
|
1140
|
+
return self._anonymization_map[value] + suffix
|
|
1141
|
+
|
|
1142
|
+
def generate_untracked_commits_report(
|
|
1143
|
+
self, ticket_analysis: dict[str, Any], output_path: Path
|
|
1144
|
+
) -> Path:
|
|
1145
|
+
"""Generate detailed CSV report for commits without ticket references.
|
|
1146
|
+
|
|
1147
|
+
WHY: Untracked commits represent work that may not be visible to project
|
|
1148
|
+
management tools. This report enables analysis of what types of work are
|
|
1149
|
+
being performed outside the tracked process, helping identify process
|
|
1150
|
+
improvements and training needs.
|
|
1151
|
+
|
|
1152
|
+
Args:
|
|
1153
|
+
ticket_analysis: Ticket analysis results containing untracked commits
|
|
1154
|
+
output_path: Path where the CSV report should be written
|
|
1155
|
+
|
|
1156
|
+
Returns:
|
|
1157
|
+
Path to the generated CSV file
|
|
1158
|
+
"""
|
|
1159
|
+
untracked_commits = ticket_analysis.get("untracked_commits", [])
|
|
1160
|
+
|
|
1161
|
+
if not untracked_commits:
|
|
1162
|
+
# Generate empty report with headers
|
|
1163
|
+
headers = [
|
|
1164
|
+
"commit_hash",
|
|
1165
|
+
"short_hash",
|
|
1166
|
+
"author",
|
|
1167
|
+
"author_email",
|
|
1168
|
+
"canonical_id",
|
|
1169
|
+
"date",
|
|
1170
|
+
"project",
|
|
1171
|
+
"message",
|
|
1172
|
+
"category",
|
|
1173
|
+
"files_changed",
|
|
1174
|
+
"lines_added",
|
|
1175
|
+
"lines_removed",
|
|
1176
|
+
"lines_changed",
|
|
1177
|
+
"is_merge",
|
|
1178
|
+
]
|
|
1179
|
+
with open(output_path, "w", newline="") as f:
|
|
1180
|
+
writer = csv.DictWriter(f, fieldnames=headers)
|
|
1181
|
+
writer.writeheader()
|
|
1182
|
+
return output_path
|
|
1183
|
+
|
|
1184
|
+
# Process untracked commits into CSV rows
|
|
1185
|
+
rows = []
|
|
1186
|
+
for commit in untracked_commits:
|
|
1187
|
+
# Handle datetime formatting
|
|
1188
|
+
timestamp = commit.get("timestamp")
|
|
1189
|
+
if timestamp:
|
|
1190
|
+
if hasattr(timestamp, "strftime"):
|
|
1191
|
+
date_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
|
|
1192
|
+
else:
|
|
1193
|
+
date_str = str(timestamp)
|
|
1194
|
+
else:
|
|
1195
|
+
date_str = ""
|
|
1196
|
+
|
|
1197
|
+
row = {
|
|
1198
|
+
"commit_hash": commit.get("full_hash", commit.get("hash", "")),
|
|
1199
|
+
"short_hash": commit.get("hash", ""),
|
|
1200
|
+
"author": self._anonymize_value(commit.get("author", "Unknown"), "name"),
|
|
1201
|
+
"author_email": self._anonymize_value(commit.get("author_email", ""), "email"),
|
|
1202
|
+
"canonical_id": self._anonymize_value(commit.get("canonical_id", ""), "id"),
|
|
1203
|
+
"date": date_str,
|
|
1204
|
+
"project": commit.get("project_key", "UNKNOWN"),
|
|
1205
|
+
"message": commit.get("message", ""),
|
|
1206
|
+
"category": commit.get("category", "other"),
|
|
1207
|
+
"files_changed": commit.get("files_changed", 0),
|
|
1208
|
+
"lines_added": commit.get("lines_added", 0),
|
|
1209
|
+
"lines_removed": commit.get("lines_removed", 0),
|
|
1210
|
+
"lines_changed": commit.get("lines_changed", 0),
|
|
1211
|
+
"is_merge": commit.get("is_merge", False),
|
|
1212
|
+
}
|
|
1213
|
+
rows.append(row)
|
|
1214
|
+
|
|
1215
|
+
# Write CSV
|
|
1216
|
+
if rows:
|
|
1217
|
+
df = pd.DataFrame(rows)
|
|
1218
|
+
df.to_csv(output_path, index=False)
|
|
1219
|
+
|
|
1220
|
+
return output_path
|
|
1221
|
+
def generate_weekly_categorization_report(
|
|
1222
|
+
self,
|
|
1223
|
+
all_commits: list[dict[str, Any]],
|
|
1224
|
+
ticket_extractor, # TicketExtractor or MLTicketExtractor instance
|
|
1225
|
+
output_path: Path,
|
|
1226
|
+
weeks: int = 12
|
|
1227
|
+
) -> Path:
|
|
1228
|
+
"""Generate weekly commit categorization metrics CSV report for ALL commits.
|
|
1229
|
+
|
|
1230
|
+
WHY: Categorization trends provide insights into development patterns
|
|
1231
|
+
over time, helping identify process improvements and training needs.
|
|
1232
|
+
This enhanced version processes ALL commits (tracked and untracked) to provide
|
|
1233
|
+
complete visibility into work patterns across the entire development flow.
|
|
1234
|
+
|
|
1235
|
+
DESIGN DECISION: Processes all commits using the same ML/rule-based categorization
|
|
1236
|
+
system used elsewhere in the application, ensuring consistent categorization
|
|
1237
|
+
across all reports and analysis.
|
|
1238
|
+
|
|
1239
|
+
Args:
|
|
1240
|
+
all_commits: Complete list of commits to categorize
|
|
1241
|
+
ticket_extractor: TicketExtractor instance for commit categorization
|
|
1242
|
+
output_path: Path where the CSV report should be written
|
|
1243
|
+
weeks: Number of weeks to analyze
|
|
1244
|
+
|
|
1245
|
+
Returns:
|
|
1246
|
+
Path to the generated CSV file
|
|
1247
|
+
"""
|
|
1248
|
+
# Calculate week boundaries
|
|
1249
|
+
end_date = datetime.now(timezone.utc)
|
|
1250
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
1251
|
+
|
|
1252
|
+
# Initialize weekly aggregation structures
|
|
1253
|
+
weekly_categories = defaultdict(lambda: defaultdict(int))
|
|
1254
|
+
weekly_metrics = defaultdict(lambda: {
|
|
1255
|
+
'lines_added': 0,
|
|
1256
|
+
'lines_removed': 0,
|
|
1257
|
+
'files_changed': 0,
|
|
1258
|
+
'developers': set()
|
|
1259
|
+
})
|
|
1260
|
+
|
|
1261
|
+
# Process ALL commits with classification
|
|
1262
|
+
processed_commits = 0
|
|
1263
|
+
for commit in all_commits:
|
|
1264
|
+
if not isinstance(commit, dict):
|
|
1265
|
+
continue
|
|
1266
|
+
|
|
1267
|
+
# Get timestamp and validate date range
|
|
1268
|
+
timestamp = commit.get("timestamp")
|
|
1269
|
+
if not timestamp:
|
|
1270
|
+
continue
|
|
1271
|
+
|
|
1272
|
+
# Ensure timezone consistency
|
|
1273
|
+
if hasattr(timestamp, "tzinfo") and timestamp.tzinfo is None:
|
|
1274
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
1275
|
+
elif hasattr(timestamp, "tzinfo") and timestamp.tzinfo != timezone.utc:
|
|
1276
|
+
timestamp = timestamp.astimezone(timezone.utc)
|
|
1277
|
+
|
|
1278
|
+
if timestamp < start_date or timestamp > end_date:
|
|
1279
|
+
continue
|
|
1280
|
+
|
|
1281
|
+
# Skip merge commits (consistent with untracked analysis)
|
|
1282
|
+
if commit.get("is_merge", False):
|
|
1283
|
+
continue
|
|
1284
|
+
|
|
1285
|
+
# Categorize the commit using the same system as untracked analysis
|
|
1286
|
+
message = commit.get("message", "")
|
|
1287
|
+
files_changed_raw = commit.get("files_changed", [])
|
|
1288
|
+
|
|
1289
|
+
# Handle both int and list types for files_changed
|
|
1290
|
+
if isinstance(files_changed_raw, int):
|
|
1291
|
+
files_changed_count = files_changed_raw
|
|
1292
|
+
files_changed_list = [] # Can't provide file names, only count
|
|
1293
|
+
elif isinstance(files_changed_raw, list):
|
|
1294
|
+
files_changed_count = len(files_changed_raw)
|
|
1295
|
+
files_changed_list = files_changed_raw
|
|
1296
|
+
else:
|
|
1297
|
+
files_changed_count = 0
|
|
1298
|
+
files_changed_list = []
|
|
1299
|
+
|
|
1300
|
+
# Handle both TicketExtractor and MLTicketExtractor signatures
|
|
1301
|
+
try:
|
|
1302
|
+
# Try ML signature first (message, files_changed as list)
|
|
1303
|
+
category = ticket_extractor.categorize_commit(message, files_changed_list)
|
|
1304
|
+
except TypeError:
|
|
1305
|
+
# Fall back to base signature (message only)
|
|
1306
|
+
category = ticket_extractor.categorize_commit(message)
|
|
1307
|
+
|
|
1308
|
+
# Get week boundary (Monday start)
|
|
1309
|
+
week_start = self._get_week_start(timestamp)
|
|
1310
|
+
|
|
1311
|
+
# Aggregate by category
|
|
1312
|
+
weekly_categories[week_start][category] += 1
|
|
1313
|
+
|
|
1314
|
+
# Aggregate metrics
|
|
1315
|
+
weekly_metrics[week_start]['lines_added'] += commit.get("insertions", 0)
|
|
1316
|
+
weekly_metrics[week_start]['lines_removed'] += commit.get("deletions", 0)
|
|
1317
|
+
weekly_metrics[week_start]['files_changed'] += files_changed_count
|
|
1318
|
+
|
|
1319
|
+
# Track unique developers (use canonical_id or fallback to email)
|
|
1320
|
+
developer_id = commit.get("canonical_id") or commit.get("author_email", "Unknown")
|
|
1321
|
+
weekly_metrics[week_start]['developers'].add(developer_id)
|
|
1322
|
+
|
|
1323
|
+
processed_commits += 1
|
|
1324
|
+
|
|
1325
|
+
# Build CSV rows with comprehensive metrics
|
|
1326
|
+
rows = []
|
|
1327
|
+
all_categories = set()
|
|
1328
|
+
|
|
1329
|
+
# Collect all categories across all weeks
|
|
1330
|
+
for week_data in weekly_categories.values():
|
|
1331
|
+
all_categories.update(week_data.keys())
|
|
1332
|
+
|
|
1333
|
+
# Ensure standard categories are included even if not found
|
|
1334
|
+
standard_categories = ["bug_fix", "feature", "refactor", "documentation",
|
|
1335
|
+
"maintenance", "test", "style", "build", "integration", "other"]
|
|
1336
|
+
all_categories.update(standard_categories)
|
|
1337
|
+
sorted_categories = sorted(all_categories)
|
|
1338
|
+
|
|
1339
|
+
# Generate weekly rows
|
|
1340
|
+
for week_start in sorted(weekly_categories.keys()):
|
|
1341
|
+
week_data = weekly_categories[week_start]
|
|
1342
|
+
week_metrics = weekly_metrics[week_start]
|
|
1343
|
+
total_commits = sum(week_data.values())
|
|
1344
|
+
|
|
1345
|
+
row = {
|
|
1346
|
+
"week_start": week_start.strftime("%Y-%m-%d"),
|
|
1347
|
+
"total_commits": total_commits,
|
|
1348
|
+
"lines_added": week_metrics['lines_added'],
|
|
1349
|
+
"lines_removed": week_metrics['lines_removed'],
|
|
1350
|
+
"files_changed": week_metrics['files_changed'],
|
|
1351
|
+
"developer_count": len(week_metrics['developers'])
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
# Add each category count and percentage
|
|
1355
|
+
for category in sorted_categories:
|
|
1356
|
+
count = week_data.get(category, 0)
|
|
1357
|
+
pct = (count / total_commits * 100) if total_commits > 0 else 0
|
|
1358
|
+
|
|
1359
|
+
row[f"{category}_count"] = count
|
|
1360
|
+
row[f"{category}_pct"] = round(pct, 1)
|
|
1361
|
+
|
|
1362
|
+
rows.append(row)
|
|
1363
|
+
|
|
1364
|
+
# Write CSV with comprehensive headers
|
|
1365
|
+
if rows:
|
|
1366
|
+
df = pd.DataFrame(rows)
|
|
1367
|
+
df.to_csv(output_path, index=False)
|
|
1368
|
+
else:
|
|
1369
|
+
# Write empty CSV with comprehensive headers
|
|
1370
|
+
headers = ["week_start", "total_commits", "lines_added", "lines_removed",
|
|
1371
|
+
"files_changed", "developer_count"]
|
|
1372
|
+
|
|
1373
|
+
for category in sorted_categories:
|
|
1374
|
+
headers.extend([f"{category}_count", f"{category}_pct"])
|
|
1375
|
+
|
|
1376
|
+
with open(output_path, "w", newline="") as f:
|
|
1377
|
+
writer = csv.DictWriter(f, fieldnames=headers)
|
|
1378
|
+
writer.writeheader()
|
|
338
1379
|
|
|
339
|
-
return
|
|
1380
|
+
return output_path
|
|
1381
|
+
|
|
1382
|
+
def generate_story_point_correlation_report(
|
|
1383
|
+
self,
|
|
1384
|
+
commits: list[dict[str, Any]],
|
|
1385
|
+
prs: list[dict[str, Any]],
|
|
1386
|
+
pm_data: Optional[dict[str, Any]],
|
|
1387
|
+
output_path: Path,
|
|
1388
|
+
weeks: int = 12
|
|
1389
|
+
) -> Path:
|
|
1390
|
+
"""Generate story point correlation analysis CSV report.
|
|
1391
|
+
|
|
1392
|
+
WHY: Story point correlation analysis helps teams understand the relationship
|
|
1393
|
+
between estimated effort (story points) and actual work metrics (commits,
|
|
1394
|
+
lines of code, time). This enables process improvements and better estimation
|
|
1395
|
+
calibration.
|
|
1396
|
+
|
|
1397
|
+
INTEGRATION: Uses the StoryPointCorrelationAnalyzer to provide comprehensive
|
|
1398
|
+
correlation metrics including weekly trends, developer accuracy, and velocity
|
|
1399
|
+
analysis in a format suitable for spreadsheet analysis.
|
|
1400
|
+
|
|
1401
|
+
Args:
|
|
1402
|
+
commits: List of commit data with story points
|
|
1403
|
+
prs: List of pull request data
|
|
1404
|
+
pm_data: PM platform data with issue correlations
|
|
1405
|
+
output_path: Path for the output CSV file
|
|
1406
|
+
weeks: Number of weeks to analyze
|
|
1407
|
+
|
|
1408
|
+
Returns:
|
|
1409
|
+
Path to the generated CSV report
|
|
1410
|
+
"""
|
|
1411
|
+
try:
|
|
1412
|
+
# Import here to avoid circular imports
|
|
1413
|
+
from .story_point_correlation import StoryPointCorrelationAnalyzer
|
|
1414
|
+
|
|
1415
|
+
# Create analyzer with same configuration as CSV writer
|
|
1416
|
+
analyzer = StoryPointCorrelationAnalyzer(
|
|
1417
|
+
anonymize=self.anonymize,
|
|
1418
|
+
identity_resolver=self.identity_resolver
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
# Apply exclusion filtering consistent with other reports
|
|
1422
|
+
commits = self._filter_excluded_authors_list(commits)
|
|
1423
|
+
|
|
1424
|
+
# Generate the correlation report
|
|
1425
|
+
logger.debug(f"Generating story point correlation report: {output_path}")
|
|
1426
|
+
return analyzer.generate_correlation_report(commits, prs, pm_data, output_path, weeks)
|
|
1427
|
+
|
|
1428
|
+
except Exception as e:
|
|
1429
|
+
logger.error(f"Error generating story point correlation report: {e}")
|
|
1430
|
+
|
|
1431
|
+
# Create empty report as fallback
|
|
1432
|
+
headers = [
|
|
1433
|
+
"week_start", "metric_type", "developer_name",
|
|
1434
|
+
"sp_commits_correlation", "sp_lines_correlation", "sp_files_correlation",
|
|
1435
|
+
"sp_prs_correlation", "sp_complexity_correlation", "sample_size",
|
|
1436
|
+
"total_story_points", "total_commits", "story_points_completed",
|
|
1437
|
+
"commits_count", "prs_merged", "developers_active", "velocity_trend",
|
|
1438
|
+
"overall_accuracy", "avg_weekly_accuracy", "consistency",
|
|
1439
|
+
"weeks_active", "total_estimated_sp", "total_actual_sp", "estimation_ratio"
|
|
1440
|
+
]
|
|
1441
|
+
|
|
1442
|
+
df = pd.DataFrame(columns=headers)
|
|
1443
|
+
df.to_csv(output_path, index=False)
|
|
1444
|
+
|
|
1445
|
+
raise
|
|
1446
|
+
|
|
1447
|
+
def generate_weekly_velocity_report(
|
|
1448
|
+
self,
|
|
1449
|
+
commits: list[dict[str, Any]],
|
|
1450
|
+
prs: list[dict[str, Any]],
|
|
1451
|
+
output_path: Path,
|
|
1452
|
+
weeks: int = 12,
|
|
1453
|
+
) -> Path:
|
|
1454
|
+
"""Generate weekly lines-per-story-point velocity analysis report.
|
|
1455
|
+
|
|
1456
|
+
WHY: Velocity analysis helps teams understand the relationship between
|
|
1457
|
+
estimated effort (story points) and actual work performed (lines of code).
|
|
1458
|
+
This enables process improvements, better estimation calibration, and
|
|
1459
|
+
identification of efficiency trends over time.
|
|
1460
|
+
|
|
1461
|
+
DESIGN DECISION: Combines both PR-based and commit-based story points
|
|
1462
|
+
to provide comprehensive coverage, as some organizations track story
|
|
1463
|
+
points differently across their development workflow.
|
|
1464
|
+
|
|
1465
|
+
Args:
|
|
1466
|
+
commits: List of commit data dictionaries with story points
|
|
1467
|
+
prs: List of pull request data dictionaries with story points
|
|
1468
|
+
output_path: Path where the CSV report should be written
|
|
1469
|
+
weeks: Number of weeks to analyze (default: 12)
|
|
1470
|
+
|
|
1471
|
+
Returns:
|
|
1472
|
+
Path to the generated CSV file
|
|
1473
|
+
"""
|
|
1474
|
+
# Apply exclusion filtering in Phase 2
|
|
1475
|
+
commits = self._filter_excluded_authors_list(commits)
|
|
1476
|
+
|
|
1477
|
+
# Calculate date range (timezone-aware to match commit timestamps)
|
|
1478
|
+
end_date = datetime.now(timezone.utc)
|
|
1479
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
1480
|
+
|
|
1481
|
+
logger.debug("Weekly velocity report date range:")
|
|
1482
|
+
logger.debug(f" start_date: {start_date} (tzinfo: {start_date.tzinfo})")
|
|
1483
|
+
logger.debug(f" end_date: {end_date} (tzinfo: {end_date.tzinfo})")
|
|
1484
|
+
|
|
1485
|
+
# Initialize weekly aggregation structures
|
|
1486
|
+
weekly_data: dict[datetime, dict[str, Any]] = defaultdict(lambda: {
|
|
1487
|
+
'total_story_points': 0,
|
|
1488
|
+
'pr_story_points': 0,
|
|
1489
|
+
'commit_story_points': 0,
|
|
1490
|
+
'total_lines': 0,
|
|
1491
|
+
'lines_added': 0,
|
|
1492
|
+
'lines_removed': 0,
|
|
1493
|
+
'files_changed': 0,
|
|
1494
|
+
'commits_count': 0,
|
|
1495
|
+
'developers': set(),
|
|
1496
|
+
'prs_with_sp': 0,
|
|
1497
|
+
'commits_with_sp': 0,
|
|
1498
|
+
})
|
|
1499
|
+
|
|
1500
|
+
# Process commits for weekly aggregation
|
|
1501
|
+
for commit in commits:
|
|
1502
|
+
timestamp = commit["timestamp"]
|
|
1503
|
+
logger.debug(
|
|
1504
|
+
f"Processing commit timestamp: {timestamp} (tzinfo: {getattr(timestamp, 'tzinfo', 'N/A')})"
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1507
|
+
# Ensure consistent timezone handling
|
|
1508
|
+
if hasattr(timestamp, "tzinfo") and timestamp.tzinfo is not None:
|
|
1509
|
+
if timestamp.tzinfo != timezone.utc:
|
|
1510
|
+
timestamp = timestamp.astimezone(timezone.utc)
|
|
1511
|
+
else:
|
|
1512
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
1513
|
+
|
|
1514
|
+
# Check date range
|
|
1515
|
+
if self._safe_datetime_compare(
|
|
1516
|
+
timestamp, start_date, "lt", "generate_weekly_velocity_report range check"
|
|
1517
|
+
) or self._safe_datetime_compare(
|
|
1518
|
+
timestamp, end_date, "gt", "generate_weekly_velocity_report range check"
|
|
1519
|
+
):
|
|
1520
|
+
continue
|
|
1521
|
+
|
|
1522
|
+
# Get week start (Monday)
|
|
1523
|
+
week_start = self._get_week_start(timestamp)
|
|
1524
|
+
week_data = weekly_data[week_start]
|
|
1525
|
+
|
|
1526
|
+
# Aggregate commit metrics
|
|
1527
|
+
story_points = commit.get("story_points", 0) or 0
|
|
1528
|
+
lines_added = commit.get("filtered_insertions", commit.get("insertions", 0)) or 0
|
|
1529
|
+
lines_removed = commit.get("filtered_deletions", commit.get("deletions", 0)) or 0
|
|
1530
|
+
files_changed = commit.get("filtered_files_changed", commit.get("files_changed", 0)) or 0
|
|
1531
|
+
|
|
1532
|
+
week_data['commits_count'] += 1
|
|
1533
|
+
week_data['commit_story_points'] += story_points
|
|
1534
|
+
week_data['total_story_points'] += story_points
|
|
1535
|
+
week_data['lines_added'] += lines_added
|
|
1536
|
+
week_data['lines_removed'] += lines_removed
|
|
1537
|
+
week_data['total_lines'] += lines_added + lines_removed
|
|
1538
|
+
week_data['files_changed'] += files_changed
|
|
1539
|
+
|
|
1540
|
+
# Track developers and story point coverage
|
|
1541
|
+
developer_id = commit.get("canonical_id", commit.get("author_email", "unknown"))
|
|
1542
|
+
week_data['developers'].add(developer_id)
|
|
1543
|
+
|
|
1544
|
+
if story_points > 0:
|
|
1545
|
+
week_data['commits_with_sp'] += 1
|
|
1546
|
+
|
|
1547
|
+
# Process PRs for weekly aggregation (by merge date or creation date)
|
|
1548
|
+
for pr in prs:
|
|
1549
|
+
# Use merged_at if available and valid, otherwise created_at
|
|
1550
|
+
pr_date = pr.get("merged_at") or pr.get("created_at")
|
|
1551
|
+
if not pr_date:
|
|
1552
|
+
continue
|
|
1553
|
+
|
|
1554
|
+
# Handle string dates (convert to datetime if needed)
|
|
1555
|
+
if isinstance(pr_date, str):
|
|
1556
|
+
try:
|
|
1557
|
+
from dateutil.parser import parse
|
|
1558
|
+
pr_date = parse(pr_date)
|
|
1559
|
+
except Exception:
|
|
1560
|
+
continue
|
|
1561
|
+
|
|
1562
|
+
# Ensure timezone consistency
|
|
1563
|
+
if hasattr(pr_date, "tzinfo") and pr_date.tzinfo is not None:
|
|
1564
|
+
if pr_date.tzinfo != timezone.utc:
|
|
1565
|
+
pr_date = pr_date.astimezone(timezone.utc)
|
|
1566
|
+
else:
|
|
1567
|
+
pr_date = pr_date.replace(tzinfo=timezone.utc)
|
|
1568
|
+
|
|
1569
|
+
# Check date range
|
|
1570
|
+
if self._safe_datetime_compare(
|
|
1571
|
+
pr_date, start_date, "lt", "generate_weekly_velocity_report PR range check"
|
|
1572
|
+
) or self._safe_datetime_compare(
|
|
1573
|
+
pr_date, end_date, "gt", "generate_weekly_velocity_report PR range check"
|
|
1574
|
+
):
|
|
1575
|
+
continue
|
|
1576
|
+
|
|
1577
|
+
# Get week start
|
|
1578
|
+
week_start = self._get_week_start(pr_date)
|
|
1579
|
+
week_data = weekly_data[week_start]
|
|
1580
|
+
|
|
1581
|
+
# Aggregate PR metrics
|
|
1582
|
+
story_points = pr.get("story_points", 0) or 0
|
|
1583
|
+
if story_points > 0:
|
|
1584
|
+
week_data['pr_story_points'] += story_points
|
|
1585
|
+
week_data['total_story_points'] += story_points
|
|
1586
|
+
week_data['prs_with_sp'] += 1
|
|
1587
|
+
|
|
1588
|
+
# Track developer from PR
|
|
1589
|
+
developer_id = pr.get("canonical_id", pr.get("author", "unknown"))
|
|
1590
|
+
week_data['developers'].add(developer_id)
|
|
1591
|
+
|
|
1592
|
+
# Build CSV rows with velocity metrics
|
|
1593
|
+
rows = []
|
|
1594
|
+
previous_week_lines_per_point = None
|
|
1595
|
+
|
|
1596
|
+
for week_start in sorted(weekly_data.keys()):
|
|
1597
|
+
week_data = weekly_data[week_start]
|
|
1598
|
+
total_story_points = week_data['total_story_points']
|
|
1599
|
+
total_lines = week_data['total_lines']
|
|
1600
|
+
|
|
1601
|
+
# Calculate key metrics with division by zero protection
|
|
1602
|
+
lines_per_point = (total_lines / total_story_points) if total_story_points > 0 else 0
|
|
1603
|
+
commits_per_point = (week_data['commits_count'] / total_story_points) if total_story_points > 0 else 0
|
|
1604
|
+
|
|
1605
|
+
# Calculate efficiency score (inverse of lines per point, normalized to 0-100 scale)
|
|
1606
|
+
# Higher efficiency = fewer lines needed per story point
|
|
1607
|
+
if lines_per_point > 0:
|
|
1608
|
+
# Use a logarithmic scale to handle wide ranges
|
|
1609
|
+
import math
|
|
1610
|
+
efficiency_score = max(0, 100 - (math.log10(max(lines_per_point, 1)) * 20))
|
|
1611
|
+
else:
|
|
1612
|
+
efficiency_score = 0
|
|
1613
|
+
|
|
1614
|
+
# Calculate velocity trend (week-over-week change in lines per point)
|
|
1615
|
+
if previous_week_lines_per_point is not None and previous_week_lines_per_point > 0:
|
|
1616
|
+
if lines_per_point > 0:
|
|
1617
|
+
velocity_trend = ((lines_per_point - previous_week_lines_per_point) / previous_week_lines_per_point) * 100
|
|
1618
|
+
else:
|
|
1619
|
+
velocity_trend = -100 # Went from some lines per point to zero
|
|
1620
|
+
else:
|
|
1621
|
+
velocity_trend = 0 # No previous data for comparison
|
|
1622
|
+
|
|
1623
|
+
row = {
|
|
1624
|
+
"week_start": week_start.strftime("%Y-%m-%d"),
|
|
1625
|
+
"total_story_points": total_story_points,
|
|
1626
|
+
"pr_story_points": week_data['pr_story_points'],
|
|
1627
|
+
"commit_story_points": week_data['commit_story_points'],
|
|
1628
|
+
"total_lines": total_lines,
|
|
1629
|
+
"lines_added": week_data['lines_added'],
|
|
1630
|
+
"lines_removed": week_data['lines_removed'],
|
|
1631
|
+
"files_changed": week_data['files_changed'],
|
|
1632
|
+
"lines_per_point": round(lines_per_point, 2) if lines_per_point > 0 else 0,
|
|
1633
|
+
"commits_per_point": round(commits_per_point, 2) if commits_per_point > 0 else 0,
|
|
1634
|
+
"developers_involved": len(week_data['developers']),
|
|
1635
|
+
"efficiency_score": round(efficiency_score, 1),
|
|
1636
|
+
"velocity_trend": round(velocity_trend, 1),
|
|
1637
|
+
# Additional metrics for deeper analysis
|
|
1638
|
+
"commits_count": week_data['commits_count'],
|
|
1639
|
+
"prs_with_story_points": week_data['prs_with_sp'],
|
|
1640
|
+
"commits_with_story_points": week_data['commits_with_sp'],
|
|
1641
|
+
"story_point_coverage_pct": round(
|
|
1642
|
+
(week_data['commits_with_sp'] / max(week_data['commits_count'], 1)) * 100, 1
|
|
1643
|
+
),
|
|
1644
|
+
"avg_lines_per_commit": round(
|
|
1645
|
+
total_lines / max(week_data['commits_count'], 1), 1
|
|
1646
|
+
),
|
|
1647
|
+
"avg_files_per_commit": round(
|
|
1648
|
+
week_data['files_changed'] / max(week_data['commits_count'], 1), 1
|
|
1649
|
+
),
|
|
1650
|
+
}
|
|
1651
|
+
rows.append(row)
|
|
1652
|
+
|
|
1653
|
+
# Store for next iteration's trend calculation
|
|
1654
|
+
previous_week_lines_per_point = lines_per_point if lines_per_point > 0 else None
|
|
1655
|
+
|
|
1656
|
+
# Write CSV
|
|
1657
|
+
if rows:
|
|
1658
|
+
df = pd.DataFrame(rows)
|
|
1659
|
+
df.to_csv(output_path, index=False)
|
|
1660
|
+
else:
|
|
1661
|
+
# Write empty CSV with headers
|
|
1662
|
+
headers = [
|
|
1663
|
+
"week_start",
|
|
1664
|
+
"total_story_points",
|
|
1665
|
+
"pr_story_points",
|
|
1666
|
+
"commit_story_points",
|
|
1667
|
+
"total_lines",
|
|
1668
|
+
"lines_added",
|
|
1669
|
+
"lines_removed",
|
|
1670
|
+
"files_changed",
|
|
1671
|
+
"lines_per_point",
|
|
1672
|
+
"commits_per_point",
|
|
1673
|
+
"developers_involved",
|
|
1674
|
+
"efficiency_score",
|
|
1675
|
+
"velocity_trend",
|
|
1676
|
+
"commits_count",
|
|
1677
|
+
"prs_with_story_points",
|
|
1678
|
+
"commits_with_story_points",
|
|
1679
|
+
"story_point_coverage_pct",
|
|
1680
|
+
"avg_lines_per_commit",
|
|
1681
|
+
"avg_files_per_commit",
|
|
1682
|
+
]
|
|
1683
|
+
with open(output_path, "w", newline="") as f:
|
|
1684
|
+
writer = csv.DictWriter(f, fieldnames=headers)
|
|
1685
|
+
writer.writeheader()
|
|
1686
|
+
|
|
1687
|
+
return output_path
|
|
1688
|
+
|
|
1689
|
+
def generate_weekly_dora_report(
|
|
1690
|
+
self,
|
|
1691
|
+
commits: list[dict[str, Any]],
|
|
1692
|
+
prs: list[dict[str, Any]],
|
|
1693
|
+
output_path: Path,
|
|
1694
|
+
weeks: int = 12,
|
|
1695
|
+
) -> Path:
|
|
1696
|
+
"""Generate weekly DORA metrics CSV report.
|
|
1697
|
+
|
|
1698
|
+
WHY: Weekly DORA metrics provide trend analysis for software delivery
|
|
1699
|
+
performance, enabling teams to track improvements and identify periods
|
|
1700
|
+
of degraded performance across the four key metrics.
|
|
1701
|
+
|
|
1702
|
+
DESIGN DECISION: Uses the DORAMetricsCalculator with weekly breakdown
|
|
1703
|
+
to provide consistent methodology while adding trend analysis and
|
|
1704
|
+
rolling averages for smoother interpretation.
|
|
1705
|
+
|
|
1706
|
+
Args:
|
|
1707
|
+
commits: List of commit data dictionaries
|
|
1708
|
+
prs: List of pull request data dictionaries
|
|
1709
|
+
output_path: Path where the CSV report should be written
|
|
1710
|
+
weeks: Number of weeks to analyze (default: 12)
|
|
1711
|
+
|
|
1712
|
+
Returns:
|
|
1713
|
+
Path to the generated CSV file
|
|
1714
|
+
"""
|
|
1715
|
+
from ..metrics.dora import DORAMetricsCalculator
|
|
1716
|
+
|
|
1717
|
+
# Apply exclusion filtering in Phase 2
|
|
1718
|
+
commits = self._filter_excluded_authors_list(commits)
|
|
1719
|
+
|
|
1720
|
+
# Calculate date range
|
|
1721
|
+
end_date = datetime.now(timezone.utc)
|
|
1722
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
1723
|
+
|
|
1724
|
+
# Initialize DORA calculator
|
|
1725
|
+
dora_calculator = DORAMetricsCalculator()
|
|
1726
|
+
|
|
1727
|
+
try:
|
|
1728
|
+
# Calculate weekly DORA metrics
|
|
1729
|
+
weekly_metrics = dora_calculator.calculate_weekly_dora_metrics(
|
|
1730
|
+
commits=commits,
|
|
1731
|
+
prs=prs,
|
|
1732
|
+
start_date=start_date,
|
|
1733
|
+
end_date=end_date,
|
|
1734
|
+
)
|
|
1735
|
+
|
|
1736
|
+
if not weekly_metrics:
|
|
1737
|
+
# Generate empty report with headers
|
|
1738
|
+
headers = [
|
|
1739
|
+
"week_start",
|
|
1740
|
+
"week_end",
|
|
1741
|
+
"deployment_frequency",
|
|
1742
|
+
"lead_time_hours",
|
|
1743
|
+
"change_failure_rate",
|
|
1744
|
+
"mttr_hours",
|
|
1745
|
+
"total_failures",
|
|
1746
|
+
"total_commits",
|
|
1747
|
+
"total_prs",
|
|
1748
|
+
"deployment_frequency_4w_avg",
|
|
1749
|
+
"lead_time_4w_avg",
|
|
1750
|
+
"change_failure_rate_4w_avg",
|
|
1751
|
+
"mttr_4w_avg",
|
|
1752
|
+
"deployment_frequency_change_pct",
|
|
1753
|
+
"lead_time_change_pct",
|
|
1754
|
+
"change_failure_rate_change_pct",
|
|
1755
|
+
"mttr_change_pct",
|
|
1756
|
+
"deployment_frequency_trend",
|
|
1757
|
+
"lead_time_trend",
|
|
1758
|
+
"change_failure_rate_trend",
|
|
1759
|
+
"mttr_trend",
|
|
1760
|
+
]
|
|
1761
|
+
|
|
1762
|
+
df = pd.DataFrame(columns=headers)
|
|
1763
|
+
df.to_csv(output_path, index=False)
|
|
1764
|
+
return output_path
|
|
1765
|
+
|
|
1766
|
+
# Convert to DataFrame and write CSV
|
|
1767
|
+
df = pd.DataFrame(weekly_metrics)
|
|
1768
|
+
df.to_csv(output_path, index=False)
|
|
1769
|
+
|
|
1770
|
+
return output_path
|
|
1771
|
+
|
|
1772
|
+
except Exception as e:
|
|
1773
|
+
logger.error(f"Error generating weekly DORA report: {e}")
|
|
1774
|
+
|
|
1775
|
+
# Create empty report as fallback
|
|
1776
|
+
headers = [
|
|
1777
|
+
"week_start",
|
|
1778
|
+
"week_end",
|
|
1779
|
+
"deployment_frequency",
|
|
1780
|
+
"lead_time_hours",
|
|
1781
|
+
"change_failure_rate",
|
|
1782
|
+
"mttr_hours",
|
|
1783
|
+
"total_failures",
|
|
1784
|
+
"total_commits",
|
|
1785
|
+
"total_prs",
|
|
1786
|
+
"deployment_frequency_4w_avg",
|
|
1787
|
+
"lead_time_4w_avg",
|
|
1788
|
+
"change_failure_rate_4w_avg",
|
|
1789
|
+
"mttr_4w_avg",
|
|
1790
|
+
"deployment_frequency_change_pct",
|
|
1791
|
+
"lead_time_change_pct",
|
|
1792
|
+
"change_failure_rate_change_pct",
|
|
1793
|
+
"mttr_change_pct",
|
|
1794
|
+
"deployment_frequency_trend",
|
|
1795
|
+
"lead_time_trend",
|
|
1796
|
+
"change_failure_rate_trend",
|
|
1797
|
+
"mttr_trend",
|
|
1798
|
+
]
|
|
1799
|
+
|
|
1800
|
+
df = pd.DataFrame(columns=headers)
|
|
1801
|
+
df.to_csv(output_path, index=False)
|
|
1802
|
+
|
|
1803
|
+
raise
|