gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -1,26 +1,232 @@
|
|
|
1
1
|
"""Advanced analytics report generation with percentage and qualitative metrics."""
|
|
2
2
|
import csv
|
|
3
|
+
import logging
|
|
4
|
+
from collections import defaultdict
|
|
3
5
|
from datetime import datetime, timedelta, timezone
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
6
|
-
|
|
7
|
-
import pandas as pd
|
|
7
|
+
from typing import Any, Dict, List, Tuple
|
|
8
|
+
|
|
8
9
|
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
# Get logger for this module
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
class AnalyticsReportGenerator:
|
|
12
17
|
"""Generate advanced analytics reports with percentage breakdowns and qualitative insights."""
|
|
13
18
|
|
|
14
|
-
def __init__(self, anonymize: bool = False):
|
|
19
|
+
def __init__(self, anonymize: bool = False, exclude_authors: list[str] = None, identity_resolver=None):
|
|
15
20
|
"""Initialize analytics report generator."""
|
|
16
21
|
self.anonymize = anonymize
|
|
17
22
|
self._anonymization_map = {}
|
|
18
23
|
self._anonymous_counter = 0
|
|
24
|
+
self.exclude_authors = exclude_authors or []
|
|
25
|
+
self.identity_resolver = identity_resolver
|
|
26
|
+
|
|
27
|
+
def _filter_excluded_authors(self, data_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
28
|
+
"""
|
|
29
|
+
Filter out excluded authors from any data list using canonical_id.
|
|
30
|
+
|
|
31
|
+
WHY: Bot exclusion happens in Phase 2 (reporting) instead of Phase 1 (data collection)
|
|
32
|
+
to ensure manual identity mappings work correctly. This allows the system to see
|
|
33
|
+
consolidated bot identities via canonical_id instead of just original author_email/author_name.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
data_list: List of data dictionaries containing canonical_id field
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Filtered list with excluded authors removed
|
|
40
|
+
"""
|
|
41
|
+
if not self.exclude_authors:
|
|
42
|
+
return data_list
|
|
43
|
+
|
|
44
|
+
logger.debug(f"DEBUG EXCLUSION: Starting filter with {len(self.exclude_authors)} excluded authors: {self.exclude_authors}")
|
|
45
|
+
logger.debug(f"DEBUG EXCLUSION: Filtering {len(data_list)} items from data list")
|
|
46
|
+
|
|
47
|
+
excluded_lower = [author.lower() for author in self.exclude_authors]
|
|
48
|
+
logger.debug(f"DEBUG EXCLUSION: Excluded authors (lowercase): {excluded_lower}")
|
|
49
|
+
|
|
50
|
+
filtered_data = []
|
|
51
|
+
excluded_count = 0
|
|
52
|
+
|
|
53
|
+
# Sample first 5 items to see data structure
|
|
54
|
+
for i, item in enumerate(data_list[:5]):
|
|
55
|
+
logger.debug(f"DEBUG EXCLUSION: Sample item {i}: canonical_id='{item.get('canonical_id', '')}', "
|
|
56
|
+
f"author_email='{item.get('author_email', '')}', author_name='{item.get('author_name', '')}', "
|
|
57
|
+
f"author='{item.get('author', '')}', primary_name='{item.get('primary_name', '')}', "
|
|
58
|
+
f"name='{item.get('name', '')}', developer='{item.get('developer', '')}', "
|
|
59
|
+
f"display_name='{item.get('display_name', '')}'")
|
|
60
|
+
|
|
61
|
+
for item in data_list:
|
|
62
|
+
canonical_id = item.get("canonical_id", "")
|
|
63
|
+
# Also check original author fields as fallback for data without canonical_id
|
|
64
|
+
author_email = item.get("author_email", "")
|
|
65
|
+
author_name = item.get("author_name", "")
|
|
66
|
+
|
|
67
|
+
# Check all possible author fields to ensure we catch every variation
|
|
68
|
+
author = item.get("author", "")
|
|
69
|
+
primary_name = item.get("primary_name", "")
|
|
70
|
+
name = item.get("name", "")
|
|
71
|
+
developer = item.get("developer", "") # Common in analytics data
|
|
72
|
+
display_name = item.get("display_name", "") # Common in some data structures
|
|
73
|
+
|
|
74
|
+
# Check canonical_id FIRST - this is the primary exclusion check
|
|
75
|
+
should_exclude = False
|
|
76
|
+
if canonical_id and canonical_id.lower() in excluded_lower:
|
|
77
|
+
should_exclude = True
|
|
78
|
+
# CRITICAL: Also check primary_email for manual mappings (e.g. bots mapped to bot@excluded.local)
|
|
79
|
+
elif item.get("primary_email", "") and item.get("primary_email", "").lower() in excluded_lower:
|
|
80
|
+
should_exclude = True
|
|
81
|
+
# Fall back to checking other fields only if canonical_id and primary_email don't match
|
|
82
|
+
elif not should_exclude:
|
|
83
|
+
should_exclude = (
|
|
84
|
+
(author_email and author_email.lower() in excluded_lower) or
|
|
85
|
+
(author_name and author_name.lower() in excluded_lower) or
|
|
86
|
+
(author and author.lower() in excluded_lower) or
|
|
87
|
+
(primary_name and primary_name.lower() in excluded_lower) or
|
|
88
|
+
(name and name.lower() in excluded_lower) or
|
|
89
|
+
(developer and developer.lower() in excluded_lower) or
|
|
90
|
+
(display_name and display_name.lower() in excluded_lower)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if should_exclude:
|
|
94
|
+
excluded_count += 1
|
|
95
|
+
logger.debug(f"DEBUG EXCLUSION: EXCLUDING item - canonical_id='{canonical_id}', "
|
|
96
|
+
f"primary_email='{item.get('primary_email', '')}', "
|
|
97
|
+
f"author_email='{author_email}', author_name='{author_name}', author='{author}', "
|
|
98
|
+
f"primary_name='{primary_name}', name='{name}', developer='{developer}', "
|
|
99
|
+
f"display_name='{display_name}'")
|
|
100
|
+
else:
|
|
101
|
+
filtered_data.append(item)
|
|
102
|
+
|
|
103
|
+
logger.debug(f"DEBUG EXCLUSION: Excluded {excluded_count} items, kept {len(filtered_data)} items")
|
|
104
|
+
return filtered_data
|
|
105
|
+
|
|
106
|
+
def _get_canonical_display_name(self, canonical_id: str, fallback_name: str) -> str:
|
|
107
|
+
"""
|
|
108
|
+
Get the canonical display name for a developer.
|
|
109
|
+
|
|
110
|
+
WHY: Manual identity mappings may have updated display names that aren't
|
|
111
|
+
reflected in the developer_stats data passed to report generators. This
|
|
112
|
+
method ensures we get the most current display name from the identity resolver.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
canonical_id: The canonical ID to get the display name for
|
|
116
|
+
fallback_name: The fallback name to use if identity resolver is not available
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The canonical display name or fallback name
|
|
120
|
+
"""
|
|
121
|
+
if self.identity_resolver and canonical_id:
|
|
122
|
+
try:
|
|
123
|
+
canonical_name = self.identity_resolver.get_canonical_name(canonical_id)
|
|
124
|
+
if canonical_name and canonical_name != "Unknown":
|
|
125
|
+
return canonical_name
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.debug(f"Error getting canonical name for {canonical_id}: {e}")
|
|
128
|
+
|
|
129
|
+
return fallback_name
|
|
130
|
+
|
|
131
|
+
def _get_files_changed_count(self, commit: Dict[str, Any]) -> int:
|
|
132
|
+
"""Safely extract files_changed count from commit data.
|
|
133
|
+
|
|
134
|
+
WHY: The files_changed field can be either an int (count) or list (file names).
|
|
135
|
+
This helper ensures we always get an integer count for calculations.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
commit: Commit dictionary with files_changed field
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Integer count of files changed
|
|
142
|
+
"""
|
|
143
|
+
files_changed = commit.get('files_changed', 0)
|
|
144
|
+
|
|
145
|
+
if isinstance(files_changed, int):
|
|
146
|
+
return files_changed
|
|
147
|
+
elif isinstance(files_changed, list):
|
|
148
|
+
return len(files_changed)
|
|
149
|
+
else:
|
|
150
|
+
# Fallback for unexpected types
|
|
151
|
+
logger.warning(f"Unexpected files_changed type: {type(files_changed)}, defaulting to 0")
|
|
152
|
+
return 0
|
|
153
|
+
|
|
154
|
+
def _log_datetime_comparison(self, dt1: datetime, dt2: datetime, operation: str, location: str) -> None:
|
|
155
|
+
"""Log datetime comparison details for debugging timezone issues."""
|
|
156
|
+
logger.debug(f"Comparing dates in {location} ({operation}):")
|
|
157
|
+
logger.debug(f" dt1: {dt1} (tzinfo: {dt1.tzinfo}, aware: {dt1.tzinfo is not None})")
|
|
158
|
+
logger.debug(f" dt2: {dt2} (tzinfo: {dt2.tzinfo}, aware: {dt2.tzinfo is not None})")
|
|
159
|
+
|
|
160
|
+
def _safe_datetime_compare(self, dt1: datetime, dt2: datetime, operation: str, location: str) -> bool:
|
|
161
|
+
"""Safely compare datetimes with logging and error handling."""
|
|
162
|
+
try:
|
|
163
|
+
self._log_datetime_comparison(dt1, dt2, operation, location)
|
|
164
|
+
|
|
165
|
+
if operation == 'lt':
|
|
166
|
+
result = dt1 < dt2
|
|
167
|
+
elif operation == 'gt':
|
|
168
|
+
result = dt1 > dt2
|
|
169
|
+
elif operation == 'le':
|
|
170
|
+
result = dt1 <= dt2
|
|
171
|
+
elif operation == 'ge':
|
|
172
|
+
result = dt1 >= dt2
|
|
173
|
+
elif operation == 'eq':
|
|
174
|
+
result = dt1 == dt2
|
|
175
|
+
else:
|
|
176
|
+
raise ValueError(f"Unknown operation: {operation}")
|
|
177
|
+
|
|
178
|
+
logger.debug(f" Result: {result}")
|
|
179
|
+
return result
|
|
180
|
+
|
|
181
|
+
except TypeError as e:
|
|
182
|
+
logger.error(f"Timezone comparison error in {location}:")
|
|
183
|
+
logger.error(f" dt1: {dt1} (type: {type(dt1)}, tzinfo: {getattr(dt1, 'tzinfo', 'N/A')})")
|
|
184
|
+
logger.error(f" dt2: {dt2} (type: {type(dt2)}, tzinfo: {getattr(dt2, 'tzinfo', 'N/A')})")
|
|
185
|
+
logger.error(f" Operation: {operation}")
|
|
186
|
+
logger.error(f" Error: {e}")
|
|
187
|
+
|
|
188
|
+
# Import traceback for detailed error info
|
|
189
|
+
import traceback
|
|
190
|
+
logger.error(f" Full traceback:\n{traceback.format_exc()}")
|
|
191
|
+
|
|
192
|
+
# Try to fix by making both timezone-aware in UTC
|
|
193
|
+
try:
|
|
194
|
+
if dt1.tzinfo is None:
|
|
195
|
+
dt1 = dt1.replace(tzinfo=timezone.utc)
|
|
196
|
+
logger.debug(f" Fixed dt1 to UTC: {dt1}")
|
|
197
|
+
if dt2.tzinfo is None:
|
|
198
|
+
dt2 = dt2.replace(tzinfo=timezone.utc)
|
|
199
|
+
logger.debug(f" Fixed dt2 to UTC: {dt2}")
|
|
200
|
+
|
|
201
|
+
# Retry comparison
|
|
202
|
+
if operation == 'lt':
|
|
203
|
+
result = dt1 < dt2
|
|
204
|
+
elif operation == 'gt':
|
|
205
|
+
result = dt1 > dt2
|
|
206
|
+
elif operation == 'le':
|
|
207
|
+
result = dt1 <= dt2
|
|
208
|
+
elif operation == 'ge':
|
|
209
|
+
result = dt1 >= dt2
|
|
210
|
+
elif operation == 'eq':
|
|
211
|
+
result = dt1 == dt2
|
|
212
|
+
else:
|
|
213
|
+
raise ValueError(f"Unknown operation: {operation}")
|
|
214
|
+
|
|
215
|
+
logger.info(f" Fixed comparison result: {result}")
|
|
216
|
+
return result
|
|
217
|
+
|
|
218
|
+
except Exception as fix_error:
|
|
219
|
+
logger.error(f" Failed to fix timezone issue: {fix_error}")
|
|
220
|
+
raise
|
|
19
221
|
|
|
20
222
|
def generate_activity_distribution_report(self, commits: List[Dict[str, Any]],
|
|
21
223
|
developer_stats: List[Dict[str, Any]],
|
|
22
224
|
output_path: Path) -> Path:
|
|
23
225
|
"""Generate activity distribution report with percentage breakdowns."""
|
|
226
|
+
# Apply exclusion filtering in Phase 2
|
|
227
|
+
commits = self._filter_excluded_authors(commits)
|
|
228
|
+
developer_stats = self._filter_excluded_authors(developer_stats)
|
|
229
|
+
|
|
24
230
|
# Build lookup maps
|
|
25
231
|
dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
|
|
26
232
|
|
|
@@ -31,7 +237,7 @@ class AnalyticsReportGenerator:
|
|
|
31
237
|
c.get('filtered_deletions', c.get('deletions', 0))
|
|
32
238
|
for c in commits
|
|
33
239
|
)
|
|
34
|
-
total_files = sum(c
|
|
240
|
+
total_files = sum(self._get_files_changed_count(c) for c in commits)
|
|
35
241
|
|
|
36
242
|
# Group by developer and project
|
|
37
243
|
dev_project_activity = defaultdict(lambda: defaultdict(lambda: {
|
|
@@ -47,7 +253,16 @@ class AnalyticsReportGenerator:
|
|
|
47
253
|
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
48
254
|
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
49
255
|
)
|
|
50
|
-
|
|
256
|
+
# Handle files_changed safely - could be int or list
|
|
257
|
+
files_changed = commit.get('filtered_files_changed')
|
|
258
|
+
if files_changed is None:
|
|
259
|
+
files_changed = self._get_files_changed_count(commit)
|
|
260
|
+
elif isinstance(files_changed, list):
|
|
261
|
+
files_changed = len(files_changed)
|
|
262
|
+
elif not isinstance(files_changed, int):
|
|
263
|
+
files_changed = 0
|
|
264
|
+
|
|
265
|
+
dev_project_activity[dev_id][project]['files'] += files_changed
|
|
51
266
|
dev_project_activity[dev_id][project]['story_points'] += commit.get('story_points', 0) or 0
|
|
52
267
|
|
|
53
268
|
# Build report data
|
|
@@ -55,7 +270,12 @@ class AnalyticsReportGenerator:
|
|
|
55
270
|
|
|
56
271
|
for dev_id, projects in dev_project_activity.items():
|
|
57
272
|
developer = dev_lookup.get(dev_id, {})
|
|
58
|
-
dev_name = self._anonymize_value(
|
|
273
|
+
dev_name = self._anonymize_value(
|
|
274
|
+
self._get_canonical_display_name(
|
|
275
|
+
dev_id,
|
|
276
|
+
developer.get('primary_name', 'Unknown')
|
|
277
|
+
), 'name'
|
|
278
|
+
)
|
|
59
279
|
|
|
60
280
|
# Calculate developer totals
|
|
61
281
|
dev_total_commits = sum(p['commits'] for p in projects.values())
|
|
@@ -98,6 +318,9 @@ class AnalyticsReportGenerator:
|
|
|
98
318
|
ticket_analysis: Dict[str, Any],
|
|
99
319
|
output_path: Path) -> Path:
|
|
100
320
|
"""Generate qualitative insights and patterns report."""
|
|
321
|
+
# Apply exclusion filtering in Phase 2
|
|
322
|
+
commits = self._filter_excluded_authors(commits)
|
|
323
|
+
developer_stats = self._filter_excluded_authors(developer_stats)
|
|
101
324
|
insights = []
|
|
102
325
|
|
|
103
326
|
# Analyze commit patterns
|
|
@@ -127,10 +350,18 @@ class AnalyticsReportGenerator:
|
|
|
127
350
|
output_path: Path,
|
|
128
351
|
weeks: int = 12) -> Path:
|
|
129
352
|
"""Generate developer focus analysis showing concentration patterns and activity across all projects."""
|
|
353
|
+
# Apply exclusion filtering in Phase 2
|
|
354
|
+
commits = self._filter_excluded_authors(commits)
|
|
355
|
+
developer_stats = self._filter_excluded_authors(developer_stats)
|
|
356
|
+
|
|
130
357
|
# Calculate week boundaries (timezone-aware to match commit timestamps)
|
|
131
358
|
end_date = datetime.now(timezone.utc)
|
|
132
359
|
start_date = end_date - timedelta(weeks=weeks)
|
|
133
360
|
|
|
361
|
+
logger.debug(f"Developer focus report date range:")
|
|
362
|
+
logger.debug(f" start_date: {start_date} (tzinfo: {start_date.tzinfo})")
|
|
363
|
+
logger.debug(f" end_date: {end_date} (tzinfo: {end_date.tzinfo})")
|
|
364
|
+
|
|
134
365
|
# Build developer lookup
|
|
135
366
|
dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
|
|
136
367
|
|
|
@@ -149,7 +380,12 @@ class AnalyticsReportGenerator:
|
|
|
149
380
|
|
|
150
381
|
for dev in developer_stats:
|
|
151
382
|
dev_id = dev['canonical_id']
|
|
152
|
-
dev_name = self._anonymize_value(
|
|
383
|
+
dev_name = self._anonymize_value(
|
|
384
|
+
self._get_canonical_display_name(
|
|
385
|
+
dev_id,
|
|
386
|
+
dev['primary_name']
|
|
387
|
+
), 'name'
|
|
388
|
+
)
|
|
153
389
|
|
|
154
390
|
# Get developer's commits
|
|
155
391
|
dev_commits = [c for c in commits if c.get('canonical_id') == dev_id]
|
|
@@ -164,6 +400,10 @@ class AnalyticsReportGenerator:
|
|
|
164
400
|
commit_hours = []
|
|
165
401
|
|
|
166
402
|
for commit in dev_commits:
|
|
403
|
+
# Log commit processing
|
|
404
|
+
logger.debug(f"Processing commit for developer {dev_name}: {commit.get('hash', 'unknown')[:8]}")
|
|
405
|
+
logger.debug(f" timestamp: {commit['timestamp']} (tzinfo: {getattr(commit['timestamp'], 'tzinfo', 'N/A')})")
|
|
406
|
+
|
|
167
407
|
# Project distribution
|
|
168
408
|
project_key = commit.get('project_key', 'UNKNOWN')
|
|
169
409
|
projects[project_key] += 1
|
|
@@ -182,8 +422,10 @@ class AnalyticsReportGenerator:
|
|
|
182
422
|
# Commit size
|
|
183
423
|
commit_sizes.append(lines_changed)
|
|
184
424
|
|
|
185
|
-
# Time of day
|
|
186
|
-
if
|
|
425
|
+
# Time of day (use local hour if available, fallback to UTC)
|
|
426
|
+
if 'local_hour' in commit:
|
|
427
|
+
commit_hours.append(commit['local_hour'])
|
|
428
|
+
elif hasattr(commit['timestamp'], 'hour'):
|
|
187
429
|
commit_hours.append(commit['timestamp'].hour)
|
|
188
430
|
|
|
189
431
|
# Calculate metrics
|
|
@@ -276,12 +518,284 @@ class AnalyticsReportGenerator:
|
|
|
276
518
|
|
|
277
519
|
return output_path
|
|
278
520
|
|
|
521
|
+
def generate_weekly_trends_report(self, commits: List[Dict[str, Any]],
|
|
522
|
+
developer_stats: List[Dict[str, Any]],
|
|
523
|
+
output_path: Path,
|
|
524
|
+
weeks: int = 12) -> Path:
|
|
525
|
+
"""Generate weekly trends analysis showing changes in activity patterns."""
|
|
526
|
+
# Apply exclusion filtering in Phase 2
|
|
527
|
+
commits = self._filter_excluded_authors(commits)
|
|
528
|
+
developer_stats = self._filter_excluded_authors(developer_stats)
|
|
529
|
+
|
|
530
|
+
# Calculate week boundaries
|
|
531
|
+
end_date = datetime.now(timezone.utc)
|
|
532
|
+
start_date = end_date - timedelta(weeks=weeks)
|
|
533
|
+
|
|
534
|
+
# Build developer lookup
|
|
535
|
+
dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
|
|
536
|
+
|
|
537
|
+
# Initialize data structures
|
|
538
|
+
weekly_data = defaultdict(lambda: {
|
|
539
|
+
'commits': 0,
|
|
540
|
+
'developers': set(),
|
|
541
|
+
'projects': defaultdict(int),
|
|
542
|
+
'lines_changed': 0,
|
|
543
|
+
'story_points': 0
|
|
544
|
+
})
|
|
545
|
+
|
|
546
|
+
developer_weekly = defaultdict(lambda: defaultdict(lambda: {
|
|
547
|
+
'commits': 0, 'lines': 0, 'story_points': 0
|
|
548
|
+
}))
|
|
549
|
+
project_weekly = defaultdict(lambda: defaultdict(lambda: {
|
|
550
|
+
'commits': 0, 'lines': 0, 'developers': set(), 'story_points': 0
|
|
551
|
+
}))
|
|
552
|
+
|
|
553
|
+
# Process commits
|
|
554
|
+
for commit in commits:
|
|
555
|
+
week_start = self._get_week_start(commit['timestamp'])
|
|
556
|
+
week_key = week_start.strftime('%Y-%m-%d')
|
|
557
|
+
|
|
558
|
+
# Overall weekly metrics
|
|
559
|
+
weekly_data[week_key]['commits'] += 1
|
|
560
|
+
weekly_data[week_key]['developers'].add(commit.get('canonical_id'))
|
|
561
|
+
weekly_data[week_key]['projects'][commit.get('project_key', 'UNKNOWN')] += 1
|
|
562
|
+
lines = (
|
|
563
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
564
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
565
|
+
)
|
|
566
|
+
weekly_data[week_key]['lines_changed'] += lines
|
|
567
|
+
weekly_data[week_key]['story_points'] += commit.get('story_points', 0) or 0
|
|
568
|
+
|
|
569
|
+
# Developer-specific weekly data
|
|
570
|
+
dev_id = commit.get('canonical_id')
|
|
571
|
+
developer_weekly[dev_id][week_key]['commits'] += 1
|
|
572
|
+
developer_weekly[dev_id][week_key]['lines'] += lines
|
|
573
|
+
developer_weekly[dev_id][week_key]['story_points'] += commit.get('story_points', 0) or 0
|
|
574
|
+
|
|
575
|
+
# Project-specific weekly data
|
|
576
|
+
project = commit.get('project_key', 'UNKNOWN')
|
|
577
|
+
project_weekly[project][week_key]['commits'] += 1
|
|
578
|
+
project_weekly[project][week_key]['lines'] += lines
|
|
579
|
+
project_weekly[project][week_key]['developers'].add(dev_id)
|
|
580
|
+
project_weekly[project][week_key]['story_points'] += commit.get('story_points', 0) or 0
|
|
581
|
+
|
|
582
|
+
# Convert to rows for CSV
|
|
583
|
+
rows = []
|
|
584
|
+
sorted_weeks = sorted(weekly_data.keys())
|
|
585
|
+
|
|
586
|
+
# Track developer and project trends
|
|
587
|
+
dev_activity_changes = defaultdict(list) # dev_id -> list of weekly changes
|
|
588
|
+
project_activity_changes = defaultdict(list) # project -> list of weekly changes
|
|
589
|
+
|
|
590
|
+
for i, week in enumerate(sorted_weeks):
|
|
591
|
+
data = weekly_data[week]
|
|
592
|
+
|
|
593
|
+
# Calculate week-over-week changes
|
|
594
|
+
prev_week = sorted_weeks[i-1] if i > 0 else None
|
|
595
|
+
|
|
596
|
+
commits_change = 0
|
|
597
|
+
developers_change = 0
|
|
598
|
+
if prev_week:
|
|
599
|
+
prev_data = weekly_data[prev_week]
|
|
600
|
+
commits_change = data['commits'] - prev_data['commits']
|
|
601
|
+
developers_change = len(data['developers']) - len(prev_data['developers'])
|
|
602
|
+
|
|
603
|
+
# Top project and developer this week
|
|
604
|
+
top_project = max(data['projects'].items(), key=lambda x: x[1])[0] if data['projects'] else 'NONE'
|
|
605
|
+
|
|
606
|
+
# Find top developer this week
|
|
607
|
+
top_dev_id = None
|
|
608
|
+
top_dev_commits = 0
|
|
609
|
+
for dev_id in data['developers']:
|
|
610
|
+
dev_commits = developer_weekly[dev_id][week]['commits']
|
|
611
|
+
if dev_commits > top_dev_commits:
|
|
612
|
+
top_dev_commits = dev_commits
|
|
613
|
+
top_dev_id = dev_id
|
|
614
|
+
|
|
615
|
+
top_dev_name = self._anonymize_value(
|
|
616
|
+
self._get_canonical_display_name(
|
|
617
|
+
top_dev_id,
|
|
618
|
+
dev_lookup.get(top_dev_id, {}).get('primary_name', 'Unknown')
|
|
619
|
+
), 'name'
|
|
620
|
+
) if top_dev_id else 'None'
|
|
621
|
+
|
|
622
|
+
# Calculate developer trends for active developers this week
|
|
623
|
+
dev_trend_summary = []
|
|
624
|
+
for dev_id in data['developers']:
|
|
625
|
+
dev_data = developer_weekly[dev_id][week]
|
|
626
|
+
prev_dev_data = developer_weekly[dev_id].get(prev_week, {'commits': 0}) if prev_week else {'commits': 0}
|
|
627
|
+
change = dev_data['commits'] - prev_dev_data['commits']
|
|
628
|
+
if change != 0:
|
|
629
|
+
dev_name = self._anonymize_value(
|
|
630
|
+
self._get_canonical_display_name(
|
|
631
|
+
dev_id,
|
|
632
|
+
dev_lookup.get(dev_id, {}).get('primary_name', 'Unknown')
|
|
633
|
+
), 'name'
|
|
634
|
+
)
|
|
635
|
+
dev_activity_changes[dev_name].append(change)
|
|
636
|
+
if abs(change) >= 3: # Significant changes only
|
|
637
|
+
dev_trend_summary.append(f"{dev_name}({'+' if change > 0 else ''}{change})")
|
|
638
|
+
|
|
639
|
+
# Calculate project trends
|
|
640
|
+
project_trend_summary = []
|
|
641
|
+
for project, count in data['projects'].items():
|
|
642
|
+
prev_count = weekly_data[prev_week]['projects'].get(project, 0) if prev_week else 0
|
|
643
|
+
change = count - prev_count
|
|
644
|
+
if change != 0:
|
|
645
|
+
project_activity_changes[project].append(change)
|
|
646
|
+
if abs(change) >= 3: # Significant changes only
|
|
647
|
+
project_trend_summary.append(f"{project}({'+' if change > 0 else ''}{change})")
|
|
648
|
+
|
|
649
|
+
row = {
|
|
650
|
+
'week_start': week,
|
|
651
|
+
'commits': data['commits'],
|
|
652
|
+
'active_developers': len(data['developers']),
|
|
653
|
+
'active_projects': len(data['projects']),
|
|
654
|
+
'lines_changed': data['lines_changed'],
|
|
655
|
+
'story_points': data['story_points'],
|
|
656
|
+
'commits_change': commits_change,
|
|
657
|
+
'developers_change': developers_change,
|
|
658
|
+
'top_project': top_project,
|
|
659
|
+
'top_developer': top_dev_name,
|
|
660
|
+
'avg_commits_per_dev': round(data['commits'] / max(len(data['developers']), 1), 1),
|
|
661
|
+
'avg_lines_per_commit': round(data['lines_changed'] / max(data['commits'], 1), 1),
|
|
662
|
+
'developer_trends': '; '.join(dev_trend_summary[:5]) if dev_trend_summary else 'stable',
|
|
663
|
+
'project_trends': '; '.join(project_trend_summary[:5]) if project_trend_summary else 'stable'
|
|
664
|
+
}
|
|
665
|
+
rows.append(row)
|
|
666
|
+
|
|
667
|
+
# Write main CSV
|
|
668
|
+
df = pd.DataFrame(rows)
|
|
669
|
+
df.to_csv(output_path, index=False)
|
|
670
|
+
|
|
671
|
+
# Also generate detailed developer trends CSV with weekly columns
|
|
672
|
+
dev_trends_path = output_path.parent / f'developer_trends_{output_path.stem.split("_")[-1]}.csv'
|
|
673
|
+
dev_trend_rows = []
|
|
674
|
+
|
|
675
|
+
# Build developer activity by week
|
|
676
|
+
for dev_id, weekly_commits in developer_weekly.items():
|
|
677
|
+
dev_info = dev_lookup.get(dev_id, {})
|
|
678
|
+
dev_name = self._anonymize_value(
|
|
679
|
+
self._get_canonical_display_name(
|
|
680
|
+
dev_id,
|
|
681
|
+
dev_info.get('primary_name', 'Unknown')
|
|
682
|
+
), 'name'
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
# Calculate summary statistics
|
|
686
|
+
weekly_values = []
|
|
687
|
+
for week in sorted_weeks:
|
|
688
|
+
commits = weekly_commits.get(week, {}).get('commits', 0)
|
|
689
|
+
weekly_values.append(commits)
|
|
690
|
+
|
|
691
|
+
# Only include developers with any activity
|
|
692
|
+
if sum(weekly_values) > 0:
|
|
693
|
+
# Calculate trend metrics
|
|
694
|
+
changes = []
|
|
695
|
+
for i in range(1, len(weekly_values)):
|
|
696
|
+
changes.append(weekly_values[i] - weekly_values[i-1])
|
|
697
|
+
|
|
698
|
+
avg_change = sum(changes) / len(changes) if changes else 0
|
|
699
|
+
volatility = np.std(changes) if len(changes) > 1 else 0
|
|
700
|
+
trend = 'increasing' if avg_change > 1 else 'decreasing' if avg_change < -1 else 'stable'
|
|
701
|
+
|
|
702
|
+
row = {
|
|
703
|
+
'developer': dev_name,
|
|
704
|
+
'total_commits': sum(weekly_values),
|
|
705
|
+
'avg_weekly_commits': round(sum(weekly_values) / len(weekly_values), 1),
|
|
706
|
+
'avg_weekly_change': round(avg_change, 1),
|
|
707
|
+
'volatility': round(volatility, 1),
|
|
708
|
+
'trend': trend,
|
|
709
|
+
'total_weeks_active': len([v for v in weekly_values if v > 0]),
|
|
710
|
+
'max_week': max(weekly_values),
|
|
711
|
+
'min_week': min([v for v in weekly_values if v > 0]) if any(v > 0 for v in weekly_values) else 0
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
# Add weekly columns
|
|
715
|
+
for i, week in enumerate(sorted_weeks):
|
|
716
|
+
week_label = f'week_{i+1}_{week}'
|
|
717
|
+
row[week_label] = weekly_values[i]
|
|
718
|
+
|
|
719
|
+
dev_trend_rows.append(row)
|
|
720
|
+
|
|
721
|
+
if dev_trend_rows:
|
|
722
|
+
dev_trends_df = pd.DataFrame(dev_trend_rows)
|
|
723
|
+
# Sort by total commits to show most active developers first
|
|
724
|
+
dev_trends_df.sort_values('total_commits', ascending=False, inplace=True)
|
|
725
|
+
dev_trends_df.to_csv(dev_trends_path, index=False)
|
|
726
|
+
|
|
727
|
+
# Also generate detailed project trends CSV with weekly columns
|
|
728
|
+
proj_trends_path = output_path.parent / f'project_trends_{output_path.stem.split("_")[-1]}.csv'
|
|
729
|
+
proj_trend_rows = []
|
|
730
|
+
|
|
731
|
+
# Build project activity by week
|
|
732
|
+
for project, weekly_commits in project_weekly.items():
|
|
733
|
+
# Calculate summary statistics
|
|
734
|
+
weekly_values = []
|
|
735
|
+
weekly_developers = []
|
|
736
|
+
for week in sorted_weeks:
|
|
737
|
+
commits = weekly_commits.get(week, {}).get('commits', 0)
|
|
738
|
+
weekly_values.append(commits)
|
|
739
|
+
# Count unique developers for this project this week
|
|
740
|
+
devs = weekly_commits.get(week, {}).get('developers', set())
|
|
741
|
+
weekly_developers.append(len(devs))
|
|
742
|
+
|
|
743
|
+
# Only include projects with any activity
|
|
744
|
+
if sum(weekly_values) > 0:
|
|
745
|
+
# Calculate trend metrics
|
|
746
|
+
changes = []
|
|
747
|
+
for i in range(1, len(weekly_values)):
|
|
748
|
+
changes.append(weekly_values[i] - weekly_values[i-1])
|
|
749
|
+
|
|
750
|
+
avg_change = sum(changes) / len(changes) if changes else 0
|
|
751
|
+
volatility = np.std(changes) if len(changes) > 1 else 0
|
|
752
|
+
trend = 'growing' if avg_change > 2 else 'shrinking' if avg_change < -2 else 'stable'
|
|
753
|
+
|
|
754
|
+
row = {
|
|
755
|
+
'project': project,
|
|
756
|
+
'total_commits': sum(weekly_values),
|
|
757
|
+
'avg_weekly_commits': round(sum(weekly_values) / len(weekly_values), 1),
|
|
758
|
+
'avg_weekly_developers': round(sum(weekly_developers) / len(weekly_developers), 1),
|
|
759
|
+
'avg_weekly_change': round(avg_change, 1),
|
|
760
|
+
'volatility': round(volatility, 1),
|
|
761
|
+
'trend': trend,
|
|
762
|
+
'total_weeks_active': len([v for v in weekly_values if v > 0]),
|
|
763
|
+
'max_week': max(weekly_values),
|
|
764
|
+
'min_week': min([v for v in weekly_values if v > 0]) if any(v > 0 for v in weekly_values) else 0
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
# Add weekly columns for commits
|
|
768
|
+
for i, week in enumerate(sorted_weeks):
|
|
769
|
+
week_label = f'week_{i+1}_{week}'
|
|
770
|
+
row[week_label] = weekly_values[i]
|
|
771
|
+
|
|
772
|
+
# Add weekly columns for developer count
|
|
773
|
+
for i, week in enumerate(sorted_weeks):
|
|
774
|
+
week_label = f'devs_week_{i+1}'
|
|
775
|
+
row[week_label] = weekly_developers[i]
|
|
776
|
+
|
|
777
|
+
proj_trend_rows.append(row)
|
|
778
|
+
|
|
779
|
+
if proj_trend_rows:
|
|
780
|
+
proj_trends_df = pd.DataFrame(proj_trend_rows)
|
|
781
|
+
# Sort by total commits to show most active projects first
|
|
782
|
+
proj_trends_df.sort_values('total_commits', ascending=False, inplace=True)
|
|
783
|
+
proj_trends_df.to_csv(proj_trends_path, index=False)
|
|
784
|
+
|
|
785
|
+
return output_path
|
|
786
|
+
|
|
279
787
|
def _analyze_commit_patterns(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
280
788
|
"""Analyze patterns in commit data."""
|
|
281
789
|
insights = []
|
|
282
790
|
|
|
283
|
-
# Time-based patterns
|
|
284
|
-
commit_hours = [
|
|
791
|
+
# Time-based patterns (use local hour if available)
|
|
792
|
+
commit_hours = []
|
|
793
|
+
for c in commits:
|
|
794
|
+
if 'local_hour' in c:
|
|
795
|
+
commit_hours.append(c['local_hour'])
|
|
796
|
+
elif hasattr(c['timestamp'], 'hour'):
|
|
797
|
+
commit_hours.append(c['timestamp'].hour)
|
|
798
|
+
|
|
285
799
|
if commit_hours:
|
|
286
800
|
peak_hour = max(set(commit_hours), key=commit_hours.count)
|
|
287
801
|
insights.append({
|
|
@@ -410,7 +924,11 @@ class AnalyticsReportGenerator:
|
|
|
410
924
|
insights = []
|
|
411
925
|
|
|
412
926
|
# File change patterns
|
|
413
|
-
file_changes = [
|
|
927
|
+
file_changes = []
|
|
928
|
+
for c in commits:
|
|
929
|
+
files_count = self._get_files_changed_count(c)
|
|
930
|
+
if files_count > 0:
|
|
931
|
+
file_changes.append(files_count)
|
|
414
932
|
if file_changes:
|
|
415
933
|
avg_files = np.mean(file_changes)
|
|
416
934
|
|
|
@@ -442,18 +960,25 @@ class AnalyticsReportGenerator:
|
|
|
442
960
|
|
|
443
961
|
def _get_week_start(self, date: datetime) -> datetime:
|
|
444
962
|
"""Get Monday of the week for a given date."""
|
|
963
|
+
logger.debug(f"Getting week start for date: {date} (tzinfo: {getattr(date, 'tzinfo', 'N/A')})")
|
|
964
|
+
|
|
445
965
|
# Ensure consistent timezone handling - keep timezone info
|
|
446
966
|
if hasattr(date, 'tzinfo') and date.tzinfo is not None:
|
|
447
967
|
# Keep timezone-aware but ensure it's UTC
|
|
448
968
|
if date.tzinfo != timezone.utc:
|
|
449
969
|
date = date.astimezone(timezone.utc)
|
|
970
|
+
logger.debug(f" Converted to UTC: {date}")
|
|
450
971
|
else:
|
|
451
972
|
# Convert naive datetime to UTC timezone-aware
|
|
452
973
|
date = date.replace(tzinfo=timezone.utc)
|
|
974
|
+
logger.debug(f" Made timezone-aware: {date}")
|
|
453
975
|
|
|
454
976
|
days_since_monday = date.weekday()
|
|
455
977
|
monday = date - timedelta(days=days_since_monday)
|
|
456
|
-
|
|
978
|
+
result = monday.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
979
|
+
|
|
980
|
+
logger.debug(f" Week start result: {result} (tzinfo: {result.tzinfo})")
|
|
981
|
+
return result
|
|
457
982
|
|
|
458
983
|
def _anonymize_value(self, value: str, field_type: str) -> str:
|
|
459
984
|
"""Anonymize a value if anonymization is enabled."""
|