gitflow-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,454 @@
1
+ """Advanced analytics report generation with percentage and qualitative metrics."""
2
+ import csv
3
+ from datetime import datetime, timedelta
4
+ from pathlib import Path
5
+ from typing import List, Dict, Any, Tuple
6
+ from collections import defaultdict
7
+ import pandas as pd
8
+ import numpy as np
9
+
10
+
11
+ class AnalyticsReportGenerator:
12
+ """Generate advanced analytics reports with percentage breakdowns and qualitative insights."""
13
+
14
+ def __init__(self, anonymize: bool = False):
15
+ """Initialize analytics report generator."""
16
+ self.anonymize = anonymize
17
+ self._anonymization_map = {}
18
+ self._anonymous_counter = 0
19
+
20
+ def generate_activity_distribution_report(self, commits: List[Dict[str, Any]],
21
+ developer_stats: List[Dict[str, Any]],
22
+ output_path: Path) -> Path:
23
+ """Generate activity distribution report with percentage breakdowns."""
24
+ # Build lookup maps
25
+ dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
26
+
27
+ # Calculate totals
28
+ total_commits = len(commits)
29
+ total_lines = sum(c['insertions'] + c['deletions'] for c in commits)
30
+ total_files = sum(c['files_changed'] for c in commits)
31
+
32
+ # Group by developer and project
33
+ dev_project_activity = defaultdict(lambda: defaultdict(lambda: {
34
+ 'commits': 0, 'lines': 0, 'files': 0, 'story_points': 0
35
+ }))
36
+
37
+ for commit in commits:
38
+ dev_id = commit.get('canonical_id', commit.get('author_email'))
39
+ project = commit.get('project_key', 'UNKNOWN')
40
+
41
+ dev_project_activity[dev_id][project]['commits'] += 1
42
+ dev_project_activity[dev_id][project]['lines'] += commit['insertions'] + commit['deletions']
43
+ dev_project_activity[dev_id][project]['files'] += commit['files_changed']
44
+ dev_project_activity[dev_id][project]['story_points'] += commit.get('story_points', 0) or 0
45
+
46
+ # Build report data
47
+ rows = []
48
+
49
+ for dev_id, projects in dev_project_activity.items():
50
+ developer = dev_lookup.get(dev_id, {})
51
+ dev_name = self._anonymize_value(developer.get('primary_name', 'Unknown'), 'name')
52
+
53
+ # Calculate developer totals
54
+ dev_total_commits = sum(p['commits'] for p in projects.values())
55
+ dev_total_lines = sum(p['lines'] for p in projects.values())
56
+ dev_total_files = sum(p['files'] for p in projects.values())
57
+
58
+ for project, activity in projects.items():
59
+ row = {
60
+ 'developer': dev_name,
61
+ 'project': project,
62
+ # Raw numbers
63
+ 'commits': activity['commits'],
64
+ 'lines_changed': activity['lines'],
65
+ 'files_changed': activity['files'],
66
+ 'story_points': activity['story_points'],
67
+ # Developer perspective (% of developer's time on this project)
68
+ 'dev_commit_pct': round(activity['commits'] / dev_total_commits * 100, 1),
69
+ 'dev_lines_pct': round(activity['lines'] / dev_total_lines * 100, 1) if dev_total_lines > 0 else 0,
70
+ 'dev_files_pct': round(activity['files'] / dev_total_files * 100, 1) if dev_total_files > 0 else 0,
71
+ # Project perspective (% of project work by this developer)
72
+ 'proj_commit_pct': round(activity['commits'] / total_commits * 100, 1),
73
+ 'proj_lines_pct': round(activity['lines'] / total_lines * 100, 1) if total_lines > 0 else 0,
74
+ 'proj_files_pct': round(activity['files'] / total_files * 100, 1) if total_files > 0 else 0,
75
+ # Overall perspective (% of total activity)
76
+ 'total_activity_pct': round(activity['commits'] / total_commits * 100, 1)
77
+ }
78
+ rows.append(row)
79
+
80
+ # Sort by total activity
81
+ rows.sort(key=lambda x: x['total_activity_pct'], reverse=True)
82
+
83
+ # Write CSV
84
+ df = pd.DataFrame(rows)
85
+ df.to_csv(output_path, index=False)
86
+
87
+ return output_path
88
+
89
+ def generate_qualitative_insights_report(self, commits: List[Dict[str, Any]],
90
+ developer_stats: List[Dict[str, Any]],
91
+ ticket_analysis: Dict[str, Any],
92
+ output_path: Path) -> Path:
93
+ """Generate qualitative insights and patterns report."""
94
+ insights = []
95
+
96
+ # Analyze commit patterns
97
+ commit_insights = self._analyze_commit_patterns(commits)
98
+ insights.extend(commit_insights)
99
+
100
+ # Analyze developer patterns
101
+ dev_insights = self._analyze_developer_patterns(commits, developer_stats)
102
+ insights.extend(dev_insights)
103
+
104
+ # Analyze collaboration patterns
105
+ collab_insights = self._analyze_collaboration_patterns(commits)
106
+ insights.extend(collab_insights)
107
+
108
+ # Analyze work distribution
109
+ dist_insights = self._analyze_work_distribution(commits)
110
+ insights.extend(dist_insights)
111
+
112
+ # Write insights to CSV
113
+ df = pd.DataFrame(insights)
114
+ df.to_csv(output_path, index=False)
115
+
116
+ return output_path
117
+
118
+ def generate_developer_focus_report(self, commits: List[Dict[str, Any]],
119
+ developer_stats: List[Dict[str, Any]],
120
+ output_path: Path,
121
+ weeks: int = 12) -> Path:
122
+ """Generate developer focus analysis showing concentration patterns and activity across all projects."""
123
+ # Calculate week boundaries
124
+ end_date = datetime.now()
125
+ start_date = end_date - timedelta(weeks=weeks)
126
+
127
+ # Build developer lookup
128
+ dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
129
+
130
+ # Get all unique projects
131
+ all_projects = sorted(list(set(c.get('project_key', 'UNKNOWN') for c in commits)))
132
+
133
+ # Analyze focus patterns
134
+ focus_data = []
135
+
136
+ # Calculate total commits per project for percentage calculations
137
+ project_totals = defaultdict(int)
138
+ for commit in commits:
139
+ project_totals[commit.get('project_key', 'UNKNOWN')] += 1
140
+
141
+ total_commits = len(commits)
142
+
143
+ for dev in developer_stats:
144
+ dev_id = dev['canonical_id']
145
+ dev_name = self._anonymize_value(dev['primary_name'], 'name')
146
+
147
+ # Get developer's commits
148
+ dev_commits = [c for c in commits if c.get('canonical_id') == dev_id]
149
+ if not dev_commits:
150
+ continue
151
+
152
+ # Calculate focus metrics
153
+ projects = defaultdict(int)
154
+ project_lines = defaultdict(int)
155
+ weekly_activity = defaultdict(int)
156
+ commit_sizes = []
157
+ commit_hours = []
158
+
159
+ for commit in dev_commits:
160
+ # Project distribution
161
+ project_key = commit.get('project_key', 'UNKNOWN')
162
+ projects[project_key] += 1
163
+
164
+ # Lines changed per project
165
+ lines_changed = commit['insertions'] + commit['deletions']
166
+ project_lines[project_key] += lines_changed
167
+
168
+ # Weekly distribution
169
+ week_start = self._get_week_start(commit['timestamp'])
170
+ weekly_activity[week_start] += 1
171
+
172
+ # Commit size
173
+ commit_sizes.append(lines_changed)
174
+
175
+ # Time of day
176
+ if hasattr(commit['timestamp'], 'hour'):
177
+ commit_hours.append(commit['timestamp'].hour)
178
+
179
+ # Calculate metrics
180
+ num_projects = len(projects)
181
+ primary_project = max(projects, key=projects.get) if projects else 'UNKNOWN'
182
+ primary_project_pct = round(projects[primary_project] / len(dev_commits) * 100, 1)
183
+
184
+ # Focus score (100% = single project, lower = more scattered)
185
+ focus_score = round(100 / num_projects if num_projects > 0 else 0, 1)
186
+
187
+ # Consistency score (active weeks / total weeks)
188
+ active_weeks = len(weekly_activity)
189
+ consistency_score = round(active_weeks / weeks * 100, 1)
190
+
191
+ # Work pattern
192
+ avg_commit_size = np.mean(commit_sizes) if commit_sizes else 0
193
+ if avg_commit_size < 50:
194
+ work_style = "Small, frequent changes"
195
+ elif avg_commit_size < 200:
196
+ work_style = "Moderate batch changes"
197
+ else:
198
+ work_style = "Large batch changes"
199
+
200
+ # Time pattern
201
+ if commit_hours:
202
+ avg_hour = np.mean(commit_hours)
203
+ if avg_hour < 10:
204
+ time_pattern = "Morning developer"
205
+ elif avg_hour < 14:
206
+ time_pattern = "Midday developer"
207
+ elif avg_hour < 18:
208
+ time_pattern = "Afternoon developer"
209
+ else:
210
+ time_pattern = "Evening developer"
211
+ else:
212
+ time_pattern = "Unknown"
213
+
214
+ # Build the row with basic metrics
215
+ row = {
216
+ 'developer': dev_name,
217
+ 'total_commits': len(dev_commits),
218
+ 'num_projects': num_projects,
219
+ 'primary_project': primary_project,
220
+ 'primary_project_pct': primary_project_pct,
221
+ 'focus_score': focus_score,
222
+ 'active_weeks': active_weeks,
223
+ 'consistency_score': consistency_score,
224
+ 'avg_commit_size': round(avg_commit_size, 1),
225
+ 'work_style': work_style,
226
+ 'time_pattern': time_pattern
227
+ }
228
+
229
+ # Add project-specific metrics
230
+ for project in all_projects:
231
+ # Gross commits
232
+ gross_commits = projects.get(project, 0)
233
+ row[f'{project}_gross_commits'] = gross_commits
234
+
235
+ # Adjusted commits (weighted by lines changed)
236
+ if gross_commits > 0 and project_lines[project] > 0:
237
+ # Adjustment factor based on average lines per commit
238
+ project_avg_lines = project_lines[project] / gross_commits
239
+ overall_avg_lines = sum(commit_sizes) / len(commit_sizes) if commit_sizes else 1
240
+ adjustment_factor = project_avg_lines / overall_avg_lines if overall_avg_lines > 0 else 1
241
+ adjusted_commits = round(gross_commits * adjustment_factor, 1)
242
+ else:
243
+ adjusted_commits = 0
244
+ row[f'{project}_adjusted_commits'] = adjusted_commits
245
+
246
+ # Percent of developer's activity
247
+ dev_pct = round(gross_commits / len(dev_commits) * 100, 1) if len(dev_commits) > 0 else 0
248
+ row[f'{project}_dev_pct'] = dev_pct
249
+
250
+ # Percent of project's total activity
251
+ proj_pct = round(gross_commits / project_totals[project] * 100, 1) if project_totals[project] > 0 else 0
252
+ row[f'{project}_proj_pct'] = proj_pct
253
+
254
+ # Percent of total repository activity
255
+ total_pct = round(gross_commits / total_commits * 100, 1) if total_commits > 0 else 0
256
+ row[f'{project}_total_pct'] = total_pct
257
+
258
+ focus_data.append(row)
259
+
260
+ # Sort by focus score
261
+ focus_data.sort(key=lambda x: x['focus_score'], reverse=True)
262
+
263
+ # Write CSV
264
+ df = pd.DataFrame(focus_data)
265
+ df.to_csv(output_path, index=False)
266
+
267
+ return output_path
268
+
269
+ def _analyze_commit_patterns(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
270
+ """Analyze patterns in commit data."""
271
+ insights = []
272
+
273
+ # Time-based patterns
274
+ commit_hours = [c['timestamp'].hour for c in commits if hasattr(c['timestamp'], 'hour')]
275
+ if commit_hours:
276
+ peak_hour = max(set(commit_hours), key=commit_hours.count)
277
+ insights.append({
278
+ 'category': 'Timing',
279
+ 'insight': 'Peak commit hour',
280
+ 'value': f"{peak_hour}:00",
281
+ 'impact': 'Indicates team working hours'
282
+ })
283
+
284
+ # Commit message patterns
285
+ message_lengths = [len(c['message'].split()) for c in commits]
286
+ avg_message_length = np.mean(message_lengths)
287
+
288
+ if avg_message_length < 5:
289
+ quality = "Very brief"
290
+ elif avg_message_length < 10:
291
+ quality = "Concise"
292
+ elif avg_message_length < 20:
293
+ quality = "Detailed"
294
+ else:
295
+ quality = "Very detailed"
296
+
297
+ insights.append({
298
+ 'category': 'Quality',
299
+ 'insight': 'Commit message quality',
300
+ 'value': quality,
301
+ 'impact': f"Average {avg_message_length:.1f} words per message"
302
+ })
303
+
304
+ # Ticket coverage insights
305
+ commits_with_tickets = sum(1 for c in commits if c.get('ticket_references'))
306
+ coverage_pct = commits_with_tickets / len(commits) * 100 if commits else 0
307
+
308
+ if coverage_pct < 30:
309
+ tracking = "Poor tracking"
310
+ elif coverage_pct < 60:
311
+ tracking = "Moderate tracking"
312
+ elif coverage_pct < 80:
313
+ tracking = "Good tracking"
314
+ else:
315
+ tracking = "Excellent tracking"
316
+
317
+ insights.append({
318
+ 'category': 'Process',
319
+ 'insight': 'Ticket tracking adherence',
320
+ 'value': tracking,
321
+ 'impact': f"{coverage_pct:.1f}% commits have ticket references"
322
+ })
323
+
324
+ return insights
325
+
326
+ def _analyze_developer_patterns(self, commits: List[Dict[str, Any]],
327
+ developer_stats: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
328
+ """Analyze developer behavior patterns."""
329
+ insights = []
330
+
331
+ # Team size insights
332
+ team_size = len(developer_stats)
333
+ if team_size < 3:
334
+ team_type = "Very small team"
335
+ elif team_size < 6:
336
+ team_type = "Small team"
337
+ elif team_size < 12:
338
+ team_type = "Medium team"
339
+ else:
340
+ team_type = "Large team"
341
+
342
+ insights.append({
343
+ 'category': 'Team',
344
+ 'insight': 'Team size',
345
+ 'value': team_type,
346
+ 'impact': f"{team_size} active developers"
347
+ })
348
+
349
+ # Contribution distribution
350
+ commit_counts = [dev['total_commits'] for dev in developer_stats]
351
+ if commit_counts:
352
+ gini_coef = self._calculate_gini_coefficient(commit_counts)
353
+
354
+ if gini_coef < 0.3:
355
+ distribution = "Very balanced"
356
+ elif gini_coef < 0.5:
357
+ distribution = "Moderately balanced"
358
+ elif gini_coef < 0.7:
359
+ distribution = "Somewhat unbalanced"
360
+ else:
361
+ distribution = "Highly concentrated"
362
+
363
+ insights.append({
364
+ 'category': 'Team',
365
+ 'insight': 'Work distribution',
366
+ 'value': distribution,
367
+ 'impact': f"Gini coefficient: {gini_coef:.2f}"
368
+ })
369
+
370
+ return insights
371
+
372
+ def _analyze_collaboration_patterns(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
373
+ """Analyze collaboration patterns."""
374
+ insights = []
375
+
376
+ # Merge commit analysis
377
+ merge_commits = [c for c in commits if c.get('is_merge')]
378
+ merge_pct = len(merge_commits) / len(commits) * 100 if commits else 0
379
+
380
+ if merge_pct < 5:
381
+ branching = "Minimal branching"
382
+ elif merge_pct < 15:
383
+ branching = "Moderate branching"
384
+ elif merge_pct < 25:
385
+ branching = "Active branching"
386
+ else:
387
+ branching = "Heavy branching"
388
+
389
+ insights.append({
390
+ 'category': 'Workflow',
391
+ 'insight': 'Branching strategy',
392
+ 'value': branching,
393
+ 'impact': f"{merge_pct:.1f}% merge commits"
394
+ })
395
+
396
+ return insights
397
+
398
+ def _analyze_work_distribution(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
399
+ """Analyze work distribution patterns."""
400
+ insights = []
401
+
402
+ # File change patterns
403
+ file_changes = [c['files_changed'] for c in commits if c['files_changed'] > 0]
404
+ if file_changes:
405
+ avg_files = np.mean(file_changes)
406
+
407
+ if avg_files < 3:
408
+ pattern = "Focused changes"
409
+ elif avg_files < 8:
410
+ pattern = "Moderate scope changes"
411
+ else:
412
+ pattern = "Broad scope changes"
413
+
414
+ insights.append({
415
+ 'category': 'Workflow',
416
+ 'insight': 'Change scope pattern',
417
+ 'value': pattern,
418
+ 'impact': f"Average {avg_files:.1f} files per commit"
419
+ })
420
+
421
+ return insights
422
+
423
+ def _calculate_gini_coefficient(self, values: List[float]) -> float:
424
+ """Calculate Gini coefficient for distribution analysis."""
425
+ if not values or len(values) == 1:
426
+ return 0.0
427
+
428
+ sorted_values = sorted(values)
429
+ n = len(values)
430
+ cumsum = np.cumsum(sorted_values)
431
+ return (2 * np.sum((i + 1) * sorted_values[i] for i in range(n))) / (n * cumsum[-1]) - (n + 1) / n
432
+
433
+ def _get_week_start(self, date: datetime) -> datetime:
434
+ """Get Monday of the week for a given date."""
435
+ if hasattr(date, 'tzinfo') and date.tzinfo is not None:
436
+ date = date.replace(tzinfo=None)
437
+ days_since_monday = date.weekday()
438
+ monday = date - timedelta(days=days_since_monday)
439
+ return monday.replace(hour=0, minute=0, second=0, microsecond=0)
440
+
441
+ def _anonymize_value(self, value: str, field_type: str) -> str:
442
+ """Anonymize a value if anonymization is enabled."""
443
+ if not self.anonymize or not value:
444
+ return value
445
+
446
+ if value not in self._anonymization_map:
447
+ self._anonymous_counter += 1
448
+ if field_type == 'name':
449
+ anonymous = f"Developer{self._anonymous_counter}"
450
+ else:
451
+ anonymous = f"anon{self._anonymous_counter}"
452
+ self._anonymization_map[value] = anonymous
453
+
454
+ return self._anonymization_map[value]