gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,232 @@
1
1
  """Advanced analytics report generation with percentage and qualitative metrics."""
2
2
  import csv
3
+ import logging
4
+ from collections import defaultdict
3
5
  from datetime import datetime, timedelta, timezone
4
6
  from pathlib import Path
5
- from typing import List, Dict, Any, Tuple
6
- from collections import defaultdict
7
- import pandas as pd
7
+ from typing import Any, Dict, List, Tuple
8
+
8
9
  import numpy as np
10
+ import pandas as pd
11
+
12
+ # Get logger for this module
13
+ logger = logging.getLogger(__name__)
9
14
 
10
15
 
11
16
  class AnalyticsReportGenerator:
12
17
  """Generate advanced analytics reports with percentage breakdowns and qualitative insights."""
13
18
 
14
- def __init__(self, anonymize: bool = False):
19
+ def __init__(self, anonymize: bool = False, exclude_authors: list[str] = None, identity_resolver=None):
15
20
  """Initialize analytics report generator."""
16
21
  self.anonymize = anonymize
17
22
  self._anonymization_map = {}
18
23
  self._anonymous_counter = 0
24
+ self.exclude_authors = exclude_authors or []
25
+ self.identity_resolver = identity_resolver
26
+
27
+ def _filter_excluded_authors(self, data_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
28
+ """
29
+ Filter out excluded authors from any data list using canonical_id.
30
+
31
+ WHY: Bot exclusion happens in Phase 2 (reporting) instead of Phase 1 (data collection)
32
+ to ensure manual identity mappings work correctly. This allows the system to see
33
+ consolidated bot identities via canonical_id instead of just original author_email/author_name.
34
+
35
+ Args:
36
+ data_list: List of data dictionaries containing canonical_id field
37
+
38
+ Returns:
39
+ Filtered list with excluded authors removed
40
+ """
41
+ if not self.exclude_authors:
42
+ return data_list
43
+
44
+ logger.debug(f"DEBUG EXCLUSION: Starting filter with {len(self.exclude_authors)} excluded authors: {self.exclude_authors}")
45
+ logger.debug(f"DEBUG EXCLUSION: Filtering {len(data_list)} items from data list")
46
+
47
+ excluded_lower = [author.lower() for author in self.exclude_authors]
48
+ logger.debug(f"DEBUG EXCLUSION: Excluded authors (lowercase): {excluded_lower}")
49
+
50
+ filtered_data = []
51
+ excluded_count = 0
52
+
53
+ # Sample first 5 items to see data structure
54
+ for i, item in enumerate(data_list[:5]):
55
+ logger.debug(f"DEBUG EXCLUSION: Sample item {i}: canonical_id='{item.get('canonical_id', '')}', "
56
+ f"author_email='{item.get('author_email', '')}', author_name='{item.get('author_name', '')}', "
57
+ f"author='{item.get('author', '')}', primary_name='{item.get('primary_name', '')}', "
58
+ f"name='{item.get('name', '')}', developer='{item.get('developer', '')}', "
59
+ f"display_name='{item.get('display_name', '')}'")
60
+
61
+ for item in data_list:
62
+ canonical_id = item.get("canonical_id", "")
63
+ # Also check original author fields as fallback for data without canonical_id
64
+ author_email = item.get("author_email", "")
65
+ author_name = item.get("author_name", "")
66
+
67
+ # Check all possible author fields to ensure we catch every variation
68
+ author = item.get("author", "")
69
+ primary_name = item.get("primary_name", "")
70
+ name = item.get("name", "")
71
+ developer = item.get("developer", "") # Common in analytics data
72
+ display_name = item.get("display_name", "") # Common in some data structures
73
+
74
+ # Check canonical_id FIRST - this is the primary exclusion check
75
+ should_exclude = False
76
+ if canonical_id and canonical_id.lower() in excluded_lower:
77
+ should_exclude = True
78
+ # CRITICAL: Also check primary_email for manual mappings (e.g. bots mapped to bot@excluded.local)
79
+ elif item.get("primary_email", "") and item.get("primary_email", "").lower() in excluded_lower:
80
+ should_exclude = True
81
+ # Fall back to checking other fields only if canonical_id and primary_email don't match
82
+ elif not should_exclude:
83
+ should_exclude = (
84
+ (author_email and author_email.lower() in excluded_lower) or
85
+ (author_name and author_name.lower() in excluded_lower) or
86
+ (author and author.lower() in excluded_lower) or
87
+ (primary_name and primary_name.lower() in excluded_lower) or
88
+ (name and name.lower() in excluded_lower) or
89
+ (developer and developer.lower() in excluded_lower) or
90
+ (display_name and display_name.lower() in excluded_lower)
91
+ )
92
+
93
+ if should_exclude:
94
+ excluded_count += 1
95
+ logger.debug(f"DEBUG EXCLUSION: EXCLUDING item - canonical_id='{canonical_id}', "
96
+ f"primary_email='{item.get('primary_email', '')}', "
97
+ f"author_email='{author_email}', author_name='{author_name}', author='{author}', "
98
+ f"primary_name='{primary_name}', name='{name}', developer='{developer}', "
99
+ f"display_name='{display_name}'")
100
+ else:
101
+ filtered_data.append(item)
102
+
103
+ logger.debug(f"DEBUG EXCLUSION: Excluded {excluded_count} items, kept {len(filtered_data)} items")
104
+ return filtered_data
105
+
106
+ def _get_canonical_display_name(self, canonical_id: str, fallback_name: str) -> str:
107
+ """
108
+ Get the canonical display name for a developer.
109
+
110
+ WHY: Manual identity mappings may have updated display names that aren't
111
+ reflected in the developer_stats data passed to report generators. This
112
+ method ensures we get the most current display name from the identity resolver.
113
+
114
+ Args:
115
+ canonical_id: The canonical ID to get the display name for
116
+ fallback_name: The fallback name to use if identity resolver is not available
117
+
118
+ Returns:
119
+ The canonical display name or fallback name
120
+ """
121
+ if self.identity_resolver and canonical_id:
122
+ try:
123
+ canonical_name = self.identity_resolver.get_canonical_name(canonical_id)
124
+ if canonical_name and canonical_name != "Unknown":
125
+ return canonical_name
126
+ except Exception as e:
127
+ logger.debug(f"Error getting canonical name for {canonical_id}: {e}")
128
+
129
+ return fallback_name
130
+
131
+ def _get_files_changed_count(self, commit: Dict[str, Any]) -> int:
132
+ """Safely extract files_changed count from commit data.
133
+
134
+ WHY: The files_changed field can be either an int (count) or list (file names).
135
+ This helper ensures we always get an integer count for calculations.
136
+
137
+ Args:
138
+ commit: Commit dictionary with files_changed field
139
+
140
+ Returns:
141
+ Integer count of files changed
142
+ """
143
+ files_changed = commit.get('files_changed', 0)
144
+
145
+ if isinstance(files_changed, int):
146
+ return files_changed
147
+ elif isinstance(files_changed, list):
148
+ return len(files_changed)
149
+ else:
150
+ # Fallback for unexpected types
151
+ logger.warning(f"Unexpected files_changed type: {type(files_changed)}, defaulting to 0")
152
+ return 0
153
+
154
+ def _log_datetime_comparison(self, dt1: datetime, dt2: datetime, operation: str, location: str) -> None:
155
+ """Log datetime comparison details for debugging timezone issues."""
156
+ logger.debug(f"Comparing dates in {location} ({operation}):")
157
+ logger.debug(f" dt1: {dt1} (tzinfo: {dt1.tzinfo}, aware: {dt1.tzinfo is not None})")
158
+ logger.debug(f" dt2: {dt2} (tzinfo: {dt2.tzinfo}, aware: {dt2.tzinfo is not None})")
159
+
160
+ def _safe_datetime_compare(self, dt1: datetime, dt2: datetime, operation: str, location: str) -> bool:
161
+ """Safely compare datetimes with logging and error handling."""
162
+ try:
163
+ self._log_datetime_comparison(dt1, dt2, operation, location)
164
+
165
+ if operation == 'lt':
166
+ result = dt1 < dt2
167
+ elif operation == 'gt':
168
+ result = dt1 > dt2
169
+ elif operation == 'le':
170
+ result = dt1 <= dt2
171
+ elif operation == 'ge':
172
+ result = dt1 >= dt2
173
+ elif operation == 'eq':
174
+ result = dt1 == dt2
175
+ else:
176
+ raise ValueError(f"Unknown operation: {operation}")
177
+
178
+ logger.debug(f" Result: {result}")
179
+ return result
180
+
181
+ except TypeError as e:
182
+ logger.error(f"Timezone comparison error in {location}:")
183
+ logger.error(f" dt1: {dt1} (type: {type(dt1)}, tzinfo: {getattr(dt1, 'tzinfo', 'N/A')})")
184
+ logger.error(f" dt2: {dt2} (type: {type(dt2)}, tzinfo: {getattr(dt2, 'tzinfo', 'N/A')})")
185
+ logger.error(f" Operation: {operation}")
186
+ logger.error(f" Error: {e}")
187
+
188
+ # Import traceback for detailed error info
189
+ import traceback
190
+ logger.error(f" Full traceback:\n{traceback.format_exc()}")
191
+
192
+ # Try to fix by making both timezone-aware in UTC
193
+ try:
194
+ if dt1.tzinfo is None:
195
+ dt1 = dt1.replace(tzinfo=timezone.utc)
196
+ logger.debug(f" Fixed dt1 to UTC: {dt1}")
197
+ if dt2.tzinfo is None:
198
+ dt2 = dt2.replace(tzinfo=timezone.utc)
199
+ logger.debug(f" Fixed dt2 to UTC: {dt2}")
200
+
201
+ # Retry comparison
202
+ if operation == 'lt':
203
+ result = dt1 < dt2
204
+ elif operation == 'gt':
205
+ result = dt1 > dt2
206
+ elif operation == 'le':
207
+ result = dt1 <= dt2
208
+ elif operation == 'ge':
209
+ result = dt1 >= dt2
210
+ elif operation == 'eq':
211
+ result = dt1 == dt2
212
+ else:
213
+ raise ValueError(f"Unknown operation: {operation}")
214
+
215
+ logger.info(f" Fixed comparison result: {result}")
216
+ return result
217
+
218
+ except Exception as fix_error:
219
+ logger.error(f" Failed to fix timezone issue: {fix_error}")
220
+ raise
19
221
 
20
222
  def generate_activity_distribution_report(self, commits: List[Dict[str, Any]],
21
223
  developer_stats: List[Dict[str, Any]],
22
224
  output_path: Path) -> Path:
23
225
  """Generate activity distribution report with percentage breakdowns."""
226
+ # Apply exclusion filtering in Phase 2
227
+ commits = self._filter_excluded_authors(commits)
228
+ developer_stats = self._filter_excluded_authors(developer_stats)
229
+
24
230
  # Build lookup maps
25
231
  dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
26
232
 
@@ -31,7 +237,7 @@ class AnalyticsReportGenerator:
31
237
  c.get('filtered_deletions', c.get('deletions', 0))
32
238
  for c in commits
33
239
  )
34
- total_files = sum(c['files_changed'] for c in commits)
240
+ total_files = sum(self._get_files_changed_count(c) for c in commits)
35
241
 
36
242
  # Group by developer and project
37
243
  dev_project_activity = defaultdict(lambda: defaultdict(lambda: {
@@ -47,7 +253,16 @@ class AnalyticsReportGenerator:
47
253
  commit.get('filtered_insertions', commit.get('insertions', 0)) +
48
254
  commit.get('filtered_deletions', commit.get('deletions', 0))
49
255
  )
50
- dev_project_activity[dev_id][project]['files'] += commit.get('filtered_files_changed', commit.get('files_changed', 0))
256
+ # Handle files_changed safely - could be int or list
257
+ files_changed = commit.get('filtered_files_changed')
258
+ if files_changed is None:
259
+ files_changed = self._get_files_changed_count(commit)
260
+ elif isinstance(files_changed, list):
261
+ files_changed = len(files_changed)
262
+ elif not isinstance(files_changed, int):
263
+ files_changed = 0
264
+
265
+ dev_project_activity[dev_id][project]['files'] += files_changed
51
266
  dev_project_activity[dev_id][project]['story_points'] += commit.get('story_points', 0) or 0
52
267
 
53
268
  # Build report data
@@ -55,7 +270,12 @@ class AnalyticsReportGenerator:
55
270
 
56
271
  for dev_id, projects in dev_project_activity.items():
57
272
  developer = dev_lookup.get(dev_id, {})
58
- dev_name = self._anonymize_value(developer.get('primary_name', 'Unknown'), 'name')
273
+ dev_name = self._anonymize_value(
274
+ self._get_canonical_display_name(
275
+ dev_id,
276
+ developer.get('primary_name', 'Unknown')
277
+ ), 'name'
278
+ )
59
279
 
60
280
  # Calculate developer totals
61
281
  dev_total_commits = sum(p['commits'] for p in projects.values())
@@ -98,6 +318,9 @@ class AnalyticsReportGenerator:
98
318
  ticket_analysis: Dict[str, Any],
99
319
  output_path: Path) -> Path:
100
320
  """Generate qualitative insights and patterns report."""
321
+ # Apply exclusion filtering in Phase 2
322
+ commits = self._filter_excluded_authors(commits)
323
+ developer_stats = self._filter_excluded_authors(developer_stats)
101
324
  insights = []
102
325
 
103
326
  # Analyze commit patterns
@@ -127,10 +350,18 @@ class AnalyticsReportGenerator:
127
350
  output_path: Path,
128
351
  weeks: int = 12) -> Path:
129
352
  """Generate developer focus analysis showing concentration patterns and activity across all projects."""
353
+ # Apply exclusion filtering in Phase 2
354
+ commits = self._filter_excluded_authors(commits)
355
+ developer_stats = self._filter_excluded_authors(developer_stats)
356
+
130
357
  # Calculate week boundaries (timezone-aware to match commit timestamps)
131
358
  end_date = datetime.now(timezone.utc)
132
359
  start_date = end_date - timedelta(weeks=weeks)
133
360
 
361
+ logger.debug(f"Developer focus report date range:")
362
+ logger.debug(f" start_date: {start_date} (tzinfo: {start_date.tzinfo})")
363
+ logger.debug(f" end_date: {end_date} (tzinfo: {end_date.tzinfo})")
364
+
134
365
  # Build developer lookup
135
366
  dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
136
367
 
@@ -149,7 +380,12 @@ class AnalyticsReportGenerator:
149
380
 
150
381
  for dev in developer_stats:
151
382
  dev_id = dev['canonical_id']
152
- dev_name = self._anonymize_value(dev['primary_name'], 'name')
383
+ dev_name = self._anonymize_value(
384
+ self._get_canonical_display_name(
385
+ dev_id,
386
+ dev['primary_name']
387
+ ), 'name'
388
+ )
153
389
 
154
390
  # Get developer's commits
155
391
  dev_commits = [c for c in commits if c.get('canonical_id') == dev_id]
@@ -164,6 +400,10 @@ class AnalyticsReportGenerator:
164
400
  commit_hours = []
165
401
 
166
402
  for commit in dev_commits:
403
+ # Log commit processing
404
+ logger.debug(f"Processing commit for developer {dev_name}: {commit.get('hash', 'unknown')[:8]}")
405
+ logger.debug(f" timestamp: {commit['timestamp']} (tzinfo: {getattr(commit['timestamp'], 'tzinfo', 'N/A')})")
406
+
167
407
  # Project distribution
168
408
  project_key = commit.get('project_key', 'UNKNOWN')
169
409
  projects[project_key] += 1
@@ -182,8 +422,10 @@ class AnalyticsReportGenerator:
182
422
  # Commit size
183
423
  commit_sizes.append(lines_changed)
184
424
 
185
- # Time of day
186
- if hasattr(commit['timestamp'], 'hour'):
425
+ # Time of day (use local hour if available, fallback to UTC)
426
+ if 'local_hour' in commit:
427
+ commit_hours.append(commit['local_hour'])
428
+ elif hasattr(commit['timestamp'], 'hour'):
187
429
  commit_hours.append(commit['timestamp'].hour)
188
430
 
189
431
  # Calculate metrics
@@ -276,12 +518,284 @@ class AnalyticsReportGenerator:
276
518
 
277
519
  return output_path
278
520
 
521
+ def generate_weekly_trends_report(self, commits: List[Dict[str, Any]],
522
+ developer_stats: List[Dict[str, Any]],
523
+ output_path: Path,
524
+ weeks: int = 12) -> Path:
525
+ """Generate weekly trends analysis showing changes in activity patterns."""
526
+ # Apply exclusion filtering in Phase 2
527
+ commits = self._filter_excluded_authors(commits)
528
+ developer_stats = self._filter_excluded_authors(developer_stats)
529
+
530
+ # Calculate week boundaries
531
+ end_date = datetime.now(timezone.utc)
532
+ start_date = end_date - timedelta(weeks=weeks)
533
+
534
+ # Build developer lookup
535
+ dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
536
+
537
+ # Initialize data structures
538
+ weekly_data = defaultdict(lambda: {
539
+ 'commits': 0,
540
+ 'developers': set(),
541
+ 'projects': defaultdict(int),
542
+ 'lines_changed': 0,
543
+ 'story_points': 0
544
+ })
545
+
546
+ developer_weekly = defaultdict(lambda: defaultdict(lambda: {
547
+ 'commits': 0, 'lines': 0, 'story_points': 0
548
+ }))
549
+ project_weekly = defaultdict(lambda: defaultdict(lambda: {
550
+ 'commits': 0, 'lines': 0, 'developers': set(), 'story_points': 0
551
+ }))
552
+
553
+ # Process commits
554
+ for commit in commits:
555
+ week_start = self._get_week_start(commit['timestamp'])
556
+ week_key = week_start.strftime('%Y-%m-%d')
557
+
558
+ # Overall weekly metrics
559
+ weekly_data[week_key]['commits'] += 1
560
+ weekly_data[week_key]['developers'].add(commit.get('canonical_id'))
561
+ weekly_data[week_key]['projects'][commit.get('project_key', 'UNKNOWN')] += 1
562
+ lines = (
563
+ commit.get('filtered_insertions', commit.get('insertions', 0)) +
564
+ commit.get('filtered_deletions', commit.get('deletions', 0))
565
+ )
566
+ weekly_data[week_key]['lines_changed'] += lines
567
+ weekly_data[week_key]['story_points'] += commit.get('story_points', 0) or 0
568
+
569
+ # Developer-specific weekly data
570
+ dev_id = commit.get('canonical_id')
571
+ developer_weekly[dev_id][week_key]['commits'] += 1
572
+ developer_weekly[dev_id][week_key]['lines'] += lines
573
+ developer_weekly[dev_id][week_key]['story_points'] += commit.get('story_points', 0) or 0
574
+
575
+ # Project-specific weekly data
576
+ project = commit.get('project_key', 'UNKNOWN')
577
+ project_weekly[project][week_key]['commits'] += 1
578
+ project_weekly[project][week_key]['lines'] += lines
579
+ project_weekly[project][week_key]['developers'].add(dev_id)
580
+ project_weekly[project][week_key]['story_points'] += commit.get('story_points', 0) or 0
581
+
582
+ # Convert to rows for CSV
583
+ rows = []
584
+ sorted_weeks = sorted(weekly_data.keys())
585
+
586
+ # Track developer and project trends
587
+ dev_activity_changes = defaultdict(list) # dev_id -> list of weekly changes
588
+ project_activity_changes = defaultdict(list) # project -> list of weekly changes
589
+
590
+ for i, week in enumerate(sorted_weeks):
591
+ data = weekly_data[week]
592
+
593
+ # Calculate week-over-week changes
594
+ prev_week = sorted_weeks[i-1] if i > 0 else None
595
+
596
+ commits_change = 0
597
+ developers_change = 0
598
+ if prev_week:
599
+ prev_data = weekly_data[prev_week]
600
+ commits_change = data['commits'] - prev_data['commits']
601
+ developers_change = len(data['developers']) - len(prev_data['developers'])
602
+
603
+ # Top project and developer this week
604
+ top_project = max(data['projects'].items(), key=lambda x: x[1])[0] if data['projects'] else 'NONE'
605
+
606
+ # Find top developer this week
607
+ top_dev_id = None
608
+ top_dev_commits = 0
609
+ for dev_id in data['developers']:
610
+ dev_commits = developer_weekly[dev_id][week]['commits']
611
+ if dev_commits > top_dev_commits:
612
+ top_dev_commits = dev_commits
613
+ top_dev_id = dev_id
614
+
615
+ top_dev_name = self._anonymize_value(
616
+ self._get_canonical_display_name(
617
+ top_dev_id,
618
+ dev_lookup.get(top_dev_id, {}).get('primary_name', 'Unknown')
619
+ ), 'name'
620
+ ) if top_dev_id else 'None'
621
+
622
+ # Calculate developer trends for active developers this week
623
+ dev_trend_summary = []
624
+ for dev_id in data['developers']:
625
+ dev_data = developer_weekly[dev_id][week]
626
+ prev_dev_data = developer_weekly[dev_id].get(prev_week, {'commits': 0}) if prev_week else {'commits': 0}
627
+ change = dev_data['commits'] - prev_dev_data['commits']
628
+ if change != 0:
629
+ dev_name = self._anonymize_value(
630
+ self._get_canonical_display_name(
631
+ dev_id,
632
+ dev_lookup.get(dev_id, {}).get('primary_name', 'Unknown')
633
+ ), 'name'
634
+ )
635
+ dev_activity_changes[dev_name].append(change)
636
+ if abs(change) >= 3: # Significant changes only
637
+ dev_trend_summary.append(f"{dev_name}({'+' if change > 0 else ''}{change})")
638
+
639
+ # Calculate project trends
640
+ project_trend_summary = []
641
+ for project, count in data['projects'].items():
642
+ prev_count = weekly_data[prev_week]['projects'].get(project, 0) if prev_week else 0
643
+ change = count - prev_count
644
+ if change != 0:
645
+ project_activity_changes[project].append(change)
646
+ if abs(change) >= 3: # Significant changes only
647
+ project_trend_summary.append(f"{project}({'+' if change > 0 else ''}{change})")
648
+
649
+ row = {
650
+ 'week_start': week,
651
+ 'commits': data['commits'],
652
+ 'active_developers': len(data['developers']),
653
+ 'active_projects': len(data['projects']),
654
+ 'lines_changed': data['lines_changed'],
655
+ 'story_points': data['story_points'],
656
+ 'commits_change': commits_change,
657
+ 'developers_change': developers_change,
658
+ 'top_project': top_project,
659
+ 'top_developer': top_dev_name,
660
+ 'avg_commits_per_dev': round(data['commits'] / max(len(data['developers']), 1), 1),
661
+ 'avg_lines_per_commit': round(data['lines_changed'] / max(data['commits'], 1), 1),
662
+ 'developer_trends': '; '.join(dev_trend_summary[:5]) if dev_trend_summary else 'stable',
663
+ 'project_trends': '; '.join(project_trend_summary[:5]) if project_trend_summary else 'stable'
664
+ }
665
+ rows.append(row)
666
+
667
+ # Write main CSV
668
+ df = pd.DataFrame(rows)
669
+ df.to_csv(output_path, index=False)
670
+
671
+ # Also generate detailed developer trends CSV with weekly columns
672
+ dev_trends_path = output_path.parent / f'developer_trends_{output_path.stem.split("_")[-1]}.csv'
673
+ dev_trend_rows = []
674
+
675
+ # Build developer activity by week
676
+ for dev_id, weekly_commits in developer_weekly.items():
677
+ dev_info = dev_lookup.get(dev_id, {})
678
+ dev_name = self._anonymize_value(
679
+ self._get_canonical_display_name(
680
+ dev_id,
681
+ dev_info.get('primary_name', 'Unknown')
682
+ ), 'name'
683
+ )
684
+
685
+ # Calculate summary statistics
686
+ weekly_values = []
687
+ for week in sorted_weeks:
688
+ commits = weekly_commits.get(week, {}).get('commits', 0)
689
+ weekly_values.append(commits)
690
+
691
+ # Only include developers with any activity
692
+ if sum(weekly_values) > 0:
693
+ # Calculate trend metrics
694
+ changes = []
695
+ for i in range(1, len(weekly_values)):
696
+ changes.append(weekly_values[i] - weekly_values[i-1])
697
+
698
+ avg_change = sum(changes) / len(changes) if changes else 0
699
+ volatility = np.std(changes) if len(changes) > 1 else 0
700
+ trend = 'increasing' if avg_change > 1 else 'decreasing' if avg_change < -1 else 'stable'
701
+
702
+ row = {
703
+ 'developer': dev_name,
704
+ 'total_commits': sum(weekly_values),
705
+ 'avg_weekly_commits': round(sum(weekly_values) / len(weekly_values), 1),
706
+ 'avg_weekly_change': round(avg_change, 1),
707
+ 'volatility': round(volatility, 1),
708
+ 'trend': trend,
709
+ 'total_weeks_active': len([v for v in weekly_values if v > 0]),
710
+ 'max_week': max(weekly_values),
711
+ 'min_week': min([v for v in weekly_values if v > 0]) if any(v > 0 for v in weekly_values) else 0
712
+ }
713
+
714
+ # Add weekly columns
715
+ for i, week in enumerate(sorted_weeks):
716
+ week_label = f'week_{i+1}_{week}'
717
+ row[week_label] = weekly_values[i]
718
+
719
+ dev_trend_rows.append(row)
720
+
721
+ if dev_trend_rows:
722
+ dev_trends_df = pd.DataFrame(dev_trend_rows)
723
+ # Sort by total commits to show most active developers first
724
+ dev_trends_df.sort_values('total_commits', ascending=False, inplace=True)
725
+ dev_trends_df.to_csv(dev_trends_path, index=False)
726
+
727
+ # Also generate detailed project trends CSV with weekly columns
728
+ proj_trends_path = output_path.parent / f'project_trends_{output_path.stem.split("_")[-1]}.csv'
729
+ proj_trend_rows = []
730
+
731
+ # Build project activity by week
732
+ for project, weekly_commits in project_weekly.items():
733
+ # Calculate summary statistics
734
+ weekly_values = []
735
+ weekly_developers = []
736
+ for week in sorted_weeks:
737
+ commits = weekly_commits.get(week, {}).get('commits', 0)
738
+ weekly_values.append(commits)
739
+ # Count unique developers for this project this week
740
+ devs = weekly_commits.get(week, {}).get('developers', set())
741
+ weekly_developers.append(len(devs))
742
+
743
+ # Only include projects with any activity
744
+ if sum(weekly_values) > 0:
745
+ # Calculate trend metrics
746
+ changes = []
747
+ for i in range(1, len(weekly_values)):
748
+ changes.append(weekly_values[i] - weekly_values[i-1])
749
+
750
+ avg_change = sum(changes) / len(changes) if changes else 0
751
+ volatility = np.std(changes) if len(changes) > 1 else 0
752
+ trend = 'growing' if avg_change > 2 else 'shrinking' if avg_change < -2 else 'stable'
753
+
754
+ row = {
755
+ 'project': project,
756
+ 'total_commits': sum(weekly_values),
757
+ 'avg_weekly_commits': round(sum(weekly_values) / len(weekly_values), 1),
758
+ 'avg_weekly_developers': round(sum(weekly_developers) / len(weekly_developers), 1),
759
+ 'avg_weekly_change': round(avg_change, 1),
760
+ 'volatility': round(volatility, 1),
761
+ 'trend': trend,
762
+ 'total_weeks_active': len([v for v in weekly_values if v > 0]),
763
+ 'max_week': max(weekly_values),
764
+ 'min_week': min([v for v in weekly_values if v > 0]) if any(v > 0 for v in weekly_values) else 0
765
+ }
766
+
767
+ # Add weekly columns for commits
768
+ for i, week in enumerate(sorted_weeks):
769
+ week_label = f'week_{i+1}_{week}'
770
+ row[week_label] = weekly_values[i]
771
+
772
+ # Add weekly columns for developer count
773
+ for i, week in enumerate(sorted_weeks):
774
+ week_label = f'devs_week_{i+1}'
775
+ row[week_label] = weekly_developers[i]
776
+
777
+ proj_trend_rows.append(row)
778
+
779
+ if proj_trend_rows:
780
+ proj_trends_df = pd.DataFrame(proj_trend_rows)
781
+ # Sort by total commits to show most active projects first
782
+ proj_trends_df.sort_values('total_commits', ascending=False, inplace=True)
783
+ proj_trends_df.to_csv(proj_trends_path, index=False)
784
+
785
+ return output_path
786
+
279
787
  def _analyze_commit_patterns(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
280
788
  """Analyze patterns in commit data."""
281
789
  insights = []
282
790
 
283
- # Time-based patterns
284
- commit_hours = [c['timestamp'].hour for c in commits if hasattr(c['timestamp'], 'hour')]
791
+ # Time-based patterns (use local hour if available)
792
+ commit_hours = []
793
+ for c in commits:
794
+ if 'local_hour' in c:
795
+ commit_hours.append(c['local_hour'])
796
+ elif hasattr(c['timestamp'], 'hour'):
797
+ commit_hours.append(c['timestamp'].hour)
798
+
285
799
  if commit_hours:
286
800
  peak_hour = max(set(commit_hours), key=commit_hours.count)
287
801
  insights.append({
@@ -410,7 +924,11 @@ class AnalyticsReportGenerator:
410
924
  insights = []
411
925
 
412
926
  # File change patterns
413
- file_changes = [c['files_changed'] for c in commits if c['files_changed'] > 0]
927
+ file_changes = []
928
+ for c in commits:
929
+ files_count = self._get_files_changed_count(c)
930
+ if files_count > 0:
931
+ file_changes.append(files_count)
414
932
  if file_changes:
415
933
  avg_files = np.mean(file_changes)
416
934
 
@@ -442,18 +960,25 @@ class AnalyticsReportGenerator:
442
960
 
443
961
  def _get_week_start(self, date: datetime) -> datetime:
444
962
  """Get Monday of the week for a given date."""
963
+ logger.debug(f"Getting week start for date: {date} (tzinfo: {getattr(date, 'tzinfo', 'N/A')})")
964
+
445
965
  # Ensure consistent timezone handling - keep timezone info
446
966
  if hasattr(date, 'tzinfo') and date.tzinfo is not None:
447
967
  # Keep timezone-aware but ensure it's UTC
448
968
  if date.tzinfo != timezone.utc:
449
969
  date = date.astimezone(timezone.utc)
970
+ logger.debug(f" Converted to UTC: {date}")
450
971
  else:
451
972
  # Convert naive datetime to UTC timezone-aware
452
973
  date = date.replace(tzinfo=timezone.utc)
974
+ logger.debug(f" Made timezone-aware: {date}")
453
975
 
454
976
  days_since_monday = date.weekday()
455
977
  monday = date - timedelta(days=days_since_monday)
456
- return monday.replace(hour=0, minute=0, second=0, microsecond=0)
978
+ result = monday.replace(hour=0, minute=0, second=0, microsecond=0)
979
+
980
+ logger.debug(f" Week start result: {result} (tzinfo: {result.tzinfo})")
981
+ return result
457
982
 
458
983
  def _anonymize_value(self, value: str, field_type: str) -> str:
459
984
  """Anonymize a value if anonymization is enabled."""