gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -9
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +691 -243
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +389 -96
- gitflow_analytics/core/analyzer.py +175 -78
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +242 -173
- gitflow_analytics/core/identity.py +214 -178
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +111 -88
- gitflow_analytics/integrations/github_integration.py +91 -77
- gitflow_analytics/integrations/jira_integration.py +284 -0
- gitflow_analytics/integrations/orchestrator.py +99 -72
- gitflow_analytics/metrics/dora.py +183 -179
- gitflow_analytics/models/database.py +191 -54
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +25 -8
- gitflow_analytics/reports/csv_writer.py +60 -32
- gitflow_analytics/reports/narrative_writer.py +21 -15
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Text processing utilities for qualitative analysis."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
from typing import Dict, List, Set
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TextProcessor:
|
|
9
|
+
"""Utility class for text preprocessing and feature extraction.
|
|
10
|
+
|
|
11
|
+
This class provides common text processing operations needed across
|
|
12
|
+
the qualitative analysis pipeline, including normalization, feature
|
|
13
|
+
extraction, and similarity calculations.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
"""Initialize text processor with common patterns."""
|
|
18
|
+
# Common patterns for normalization
|
|
19
|
+
self.url_pattern = re.compile(r'https?://[^\s]+')
|
|
20
|
+
self.email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
|
|
21
|
+
self.hash_pattern = re.compile(r'\b[a-f0-9]{7,40}\b') # Git hashes
|
|
22
|
+
self.ticket_pattern = re.compile(r'\b(?:JIRA|TICKET|ISSUE|BUG|TASK)-?\d+\b', re.IGNORECASE)
|
|
23
|
+
|
|
24
|
+
# Stop words for feature extraction
|
|
25
|
+
self.stop_words: Set[str] = {
|
|
26
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
27
|
+
'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
28
|
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
|
|
29
|
+
'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those'
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def normalize_message(self, message: str) -> str:
|
|
33
|
+
"""Normalize commit message for consistent processing.
|
|
34
|
+
|
|
35
|
+
This method standardizes commit messages by removing URLs, emails,
|
|
36
|
+
hashes, and other variable content that doesn't contribute to
|
|
37
|
+
semantic classification.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
message: Raw commit message
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Normalized message suitable for classification
|
|
44
|
+
"""
|
|
45
|
+
if not message:
|
|
46
|
+
return ""
|
|
47
|
+
|
|
48
|
+
# Convert to lowercase for consistency
|
|
49
|
+
normalized = message.lower().strip()
|
|
50
|
+
|
|
51
|
+
# Remove URLs, emails, and hashes
|
|
52
|
+
normalized = self.url_pattern.sub('[URL]', normalized)
|
|
53
|
+
normalized = self.email_pattern.sub('[EMAIL]', normalized)
|
|
54
|
+
normalized = self.hash_pattern.sub('[HASH]', normalized)
|
|
55
|
+
|
|
56
|
+
# Normalize ticket references
|
|
57
|
+
normalized = self.ticket_pattern.sub('[TICKET]', normalized)
|
|
58
|
+
|
|
59
|
+
# Remove extra whitespace
|
|
60
|
+
normalized = re.sub(r'\s+', ' ', normalized)
|
|
61
|
+
|
|
62
|
+
return normalized.strip()
|
|
63
|
+
|
|
64
|
+
def extract_keywords(self, text: str, min_length: int = 3) -> List[str]:
|
|
65
|
+
"""Extract meaningful keywords from text.
|
|
66
|
+
|
|
67
|
+
Extracts keywords by removing stop words, punctuation, and short words
|
|
68
|
+
that are unlikely to be semantically meaningful.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
text: Input text to extract keywords from
|
|
72
|
+
min_length: Minimum length for keywords
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of extracted keywords
|
|
76
|
+
"""
|
|
77
|
+
if not text:
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
# Split into words and clean
|
|
81
|
+
words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
|
|
82
|
+
|
|
83
|
+
# Filter stop words and short words
|
|
84
|
+
keywords = [
|
|
85
|
+
word for word in words
|
|
86
|
+
if word not in self.stop_words and len(word) >= min_length
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
return keywords
|
|
90
|
+
|
|
91
|
+
def create_semantic_fingerprint(self, message: str, files: List[str]) -> str:
|
|
92
|
+
"""Create a semantic fingerprint for similarity matching.
|
|
93
|
+
|
|
94
|
+
Creates a hash-based fingerprint that captures the semantic essence
|
|
95
|
+
of a commit for pattern matching and caching.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
message: Commit message
|
|
99
|
+
files: List of changed files
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Hex-encoded fingerprint string
|
|
103
|
+
"""
|
|
104
|
+
# Normalize message for consistent fingerprinting
|
|
105
|
+
normalized_msg = self.normalize_message(message)
|
|
106
|
+
keywords = self.extract_keywords(normalized_msg)
|
|
107
|
+
|
|
108
|
+
# Extract file patterns (extensions, directories)
|
|
109
|
+
file_patterns = []
|
|
110
|
+
for file_path in files[:10]: # Limit to prevent huge fingerprints
|
|
111
|
+
# Get file extension
|
|
112
|
+
if '.' in file_path:
|
|
113
|
+
ext = file_path.split('.')[-1].lower()
|
|
114
|
+
file_patterns.append(f"ext:{ext}")
|
|
115
|
+
|
|
116
|
+
# Get directory patterns
|
|
117
|
+
parts = file_path.split('/')
|
|
118
|
+
if len(parts) > 1:
|
|
119
|
+
# First directory
|
|
120
|
+
file_patterns.append(f"dir:{parts[0]}")
|
|
121
|
+
# Last directory before file
|
|
122
|
+
if len(parts) > 2:
|
|
123
|
+
file_patterns.append(f"dir:{parts[-2]}")
|
|
124
|
+
|
|
125
|
+
# Combine keywords and file patterns
|
|
126
|
+
semantic_elements = sorted(keywords[:10]) + sorted(set(file_patterns))
|
|
127
|
+
|
|
128
|
+
# Create fingerprint
|
|
129
|
+
fingerprint_text = '|'.join(semantic_elements)
|
|
130
|
+
return hashlib.md5(fingerprint_text.encode()).hexdigest()
|
|
131
|
+
|
|
132
|
+
def calculate_message_similarity(self, msg1: str, msg2: str) -> float:
|
|
133
|
+
"""Calculate semantic similarity between two commit messages.
|
|
134
|
+
|
|
135
|
+
Uses keyword overlap to estimate semantic similarity between
|
|
136
|
+
commit messages for grouping similar commits.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
msg1: First commit message
|
|
140
|
+
msg2: Second commit message
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Similarity score between 0.0 and 1.0
|
|
144
|
+
"""
|
|
145
|
+
if not msg1 or not msg2:
|
|
146
|
+
return 0.0
|
|
147
|
+
|
|
148
|
+
# Extract keywords from both messages
|
|
149
|
+
keywords1 = set(self.extract_keywords(self.normalize_message(msg1)))
|
|
150
|
+
keywords2 = set(self.extract_keywords(self.normalize_message(msg2)))
|
|
151
|
+
|
|
152
|
+
if not keywords1 or not keywords2:
|
|
153
|
+
return 0.0
|
|
154
|
+
|
|
155
|
+
# Calculate Jaccard similarity
|
|
156
|
+
intersection = len(keywords1.intersection(keywords2))
|
|
157
|
+
union = len(keywords1.union(keywords2))
|
|
158
|
+
|
|
159
|
+
return intersection / union if union > 0 else 0.0
|
|
160
|
+
|
|
161
|
+
def extract_file_patterns(self, files: List[str]) -> Dict[str, int]:
|
|
162
|
+
"""Extract file patterns for domain classification.
|
|
163
|
+
|
|
164
|
+
Analyzes file paths to extract patterns useful for determining
|
|
165
|
+
the technical domain of changes.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
files: List of file paths
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Dictionary mapping pattern types to counts
|
|
172
|
+
"""
|
|
173
|
+
patterns = {
|
|
174
|
+
'extensions': {},
|
|
175
|
+
'directories': {},
|
|
176
|
+
'special_files': {},
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
for file_path in files:
|
|
180
|
+
# File extensions
|
|
181
|
+
if '.' in file_path:
|
|
182
|
+
ext = file_path.split('.')[-1].lower()
|
|
183
|
+
patterns['extensions'][ext] = patterns['extensions'].get(ext, 0) + 1
|
|
184
|
+
|
|
185
|
+
# Directory patterns
|
|
186
|
+
parts = file_path.split('/')
|
|
187
|
+
for part in parts[:-1]: # Exclude filename
|
|
188
|
+
if part: # Skip empty parts
|
|
189
|
+
patterns['directories'][part] = patterns['directories'].get(part, 0) + 1
|
|
190
|
+
|
|
191
|
+
# Special files
|
|
192
|
+
filename = parts[-1].lower()
|
|
193
|
+
special_files = [
|
|
194
|
+
'dockerfile', 'makefile', 'readme', 'license', 'changelog',
|
|
195
|
+
'package.json', 'requirements.txt', 'setup.py', 'pom.xml'
|
|
196
|
+
]
|
|
197
|
+
for special in special_files:
|
|
198
|
+
if special in filename:
|
|
199
|
+
patterns['special_files'][special] = patterns['special_files'].get(special, 0) + 1
|
|
200
|
+
|
|
201
|
+
return patterns
|
|
202
|
+
|
|
203
|
+
def calculate_commit_complexity(self, message: str, files: List[str],
|
|
204
|
+
insertions: int, deletions: int) -> Dict[str, float]:
|
|
205
|
+
"""Calculate various complexity metrics for a commit.
|
|
206
|
+
|
|
207
|
+
Estimates the complexity of a commit based on message content,
|
|
208
|
+
file changes, and line changes to help with risk assessment.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
message: Commit message
|
|
212
|
+
files: List of changed files
|
|
213
|
+
insertions: Number of lines inserted
|
|
214
|
+
deletions: Number of lines deleted
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Dictionary of complexity metrics
|
|
218
|
+
"""
|
|
219
|
+
metrics = {}
|
|
220
|
+
|
|
221
|
+
# Message complexity (length, keywords)
|
|
222
|
+
metrics['message_length'] = len(message)
|
|
223
|
+
keywords = self.extract_keywords(message)
|
|
224
|
+
metrics['keyword_count'] = len(keywords)
|
|
225
|
+
metrics['message_complexity'] = min(1.0, len(keywords) / 10.0)
|
|
226
|
+
|
|
227
|
+
# File complexity
|
|
228
|
+
metrics['files_changed'] = len(files)
|
|
229
|
+
metrics['file_complexity'] = min(1.0, len(files) / 20.0)
|
|
230
|
+
|
|
231
|
+
# Line change complexity
|
|
232
|
+
total_changes = insertions + deletions
|
|
233
|
+
metrics['total_changes'] = total_changes
|
|
234
|
+
metrics['change_complexity'] = min(1.0, total_changes / 500.0)
|
|
235
|
+
|
|
236
|
+
# Overall complexity score (0.0 to 1.0)
|
|
237
|
+
metrics['overall_complexity'] = (
|
|
238
|
+
metrics['message_complexity'] * 0.2 +
|
|
239
|
+
metrics['file_complexity'] * 0.3 +
|
|
240
|
+
metrics['change_complexity'] * 0.5
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return metrics
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Advanced analytics report generation with percentage and qualitative metrics."""
|
|
2
2
|
import csv
|
|
3
|
-
from datetime import datetime, timedelta
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import List, Dict, Any, Tuple
|
|
6
6
|
from collections import defaultdict
|
|
@@ -26,7 +26,11 @@ class AnalyticsReportGenerator:
|
|
|
26
26
|
|
|
27
27
|
# Calculate totals
|
|
28
28
|
total_commits = len(commits)
|
|
29
|
-
total_lines = sum(
|
|
29
|
+
total_lines = sum(
|
|
30
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
31
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
32
|
+
for c in commits
|
|
33
|
+
)
|
|
30
34
|
total_files = sum(c['files_changed'] for c in commits)
|
|
31
35
|
|
|
32
36
|
# Group by developer and project
|
|
@@ -39,8 +43,11 @@ class AnalyticsReportGenerator:
|
|
|
39
43
|
project = commit.get('project_key', 'UNKNOWN')
|
|
40
44
|
|
|
41
45
|
dev_project_activity[dev_id][project]['commits'] += 1
|
|
42
|
-
dev_project_activity[dev_id][project]['lines'] +=
|
|
43
|
-
|
|
46
|
+
dev_project_activity[dev_id][project]['lines'] += (
|
|
47
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
48
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
49
|
+
)
|
|
50
|
+
dev_project_activity[dev_id][project]['files'] += commit.get('filtered_files_changed', commit.get('files_changed', 0))
|
|
44
51
|
dev_project_activity[dev_id][project]['story_points'] += commit.get('story_points', 0) or 0
|
|
45
52
|
|
|
46
53
|
# Build report data
|
|
@@ -120,8 +127,8 @@ class AnalyticsReportGenerator:
|
|
|
120
127
|
output_path: Path,
|
|
121
128
|
weeks: int = 12) -> Path:
|
|
122
129
|
"""Generate developer focus analysis showing concentration patterns and activity across all projects."""
|
|
123
|
-
# Calculate week boundaries
|
|
124
|
-
end_date = datetime.now()
|
|
130
|
+
# Calculate week boundaries (timezone-aware to match commit timestamps)
|
|
131
|
+
end_date = datetime.now(timezone.utc)
|
|
125
132
|
start_date = end_date - timedelta(weeks=weeks)
|
|
126
133
|
|
|
127
134
|
# Build developer lookup
|
|
@@ -162,7 +169,10 @@ class AnalyticsReportGenerator:
|
|
|
162
169
|
projects[project_key] += 1
|
|
163
170
|
|
|
164
171
|
# Lines changed per project
|
|
165
|
-
lines_changed =
|
|
172
|
+
lines_changed = (
|
|
173
|
+
commit.get('filtered_insertions', commit.get('insertions', 0)) +
|
|
174
|
+
commit.get('filtered_deletions', commit.get('deletions', 0))
|
|
175
|
+
)
|
|
166
176
|
project_lines[project_key] += lines_changed
|
|
167
177
|
|
|
168
178
|
# Weekly distribution
|
|
@@ -432,8 +442,15 @@ class AnalyticsReportGenerator:
|
|
|
432
442
|
|
|
433
443
|
def _get_week_start(self, date: datetime) -> datetime:
|
|
434
444
|
"""Get Monday of the week for a given date."""
|
|
445
|
+
# Ensure consistent timezone handling - keep timezone info
|
|
435
446
|
if hasattr(date, 'tzinfo') and date.tzinfo is not None:
|
|
436
|
-
|
|
447
|
+
# Keep timezone-aware but ensure it's UTC
|
|
448
|
+
if date.tzinfo != timezone.utc:
|
|
449
|
+
date = date.astimezone(timezone.utc)
|
|
450
|
+
else:
|
|
451
|
+
# Convert naive datetime to UTC timezone-aware
|
|
452
|
+
date = date.replace(tzinfo=timezone.utc)
|
|
453
|
+
|
|
437
454
|
days_since_monday = date.weekday()
|
|
438
455
|
monday = date - timedelta(days=days_since_monday)
|
|
439
456
|
return monday.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""CSV report generation for GitFlow Analytics."""
|
|
2
2
|
import csv
|
|
3
|
-
from datetime import datetime, timedelta
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import List, Dict, Any, Optional
|
|
5
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
6
6
|
from collections import defaultdict
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
@@ -13,7 +13,7 @@ class CSVReportGenerator:
|
|
|
13
13
|
def __init__(self, anonymize: bool = False):
|
|
14
14
|
"""Initialize report generator."""
|
|
15
15
|
self.anonymize = anonymize
|
|
16
|
-
self._anonymization_map = {}
|
|
16
|
+
self._anonymization_map: Dict[str, str] = {}
|
|
17
17
|
self._anonymous_counter = 0
|
|
18
18
|
|
|
19
19
|
def generate_weekly_report(self, commits: List[Dict[str, Any]],
|
|
@@ -21,12 +21,12 @@ class CSVReportGenerator:
|
|
|
21
21
|
output_path: Path,
|
|
22
22
|
weeks: int = 12) -> Path:
|
|
23
23
|
"""Generate weekly metrics CSV report."""
|
|
24
|
-
# Calculate week boundaries
|
|
25
|
-
end_date = datetime.now()
|
|
24
|
+
# Calculate week boundaries (timezone-aware to match commit timestamps)
|
|
25
|
+
end_date = datetime.now(timezone.utc)
|
|
26
26
|
start_date = end_date - timedelta(weeks=weeks)
|
|
27
27
|
|
|
28
28
|
# Group commits by week and developer
|
|
29
|
-
weekly_data = self._aggregate_weekly_data(commits, start_date, end_date)
|
|
29
|
+
weekly_data: Dict[Tuple[datetime, str, str], Dict[str, Any]] = self._aggregate_weekly_data(commits, start_date, end_date)
|
|
30
30
|
|
|
31
31
|
# Create developer lookup
|
|
32
32
|
dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
|
|
@@ -90,7 +90,12 @@ class CSVReportGenerator:
|
|
|
90
90
|
# Overall statistics
|
|
91
91
|
total_commits = len(commits)
|
|
92
92
|
total_story_points = sum(c.get('story_points', 0) or 0 for c in commits)
|
|
93
|
-
|
|
93
|
+
# Use filtered stats if available, otherwise fall back to raw stats
|
|
94
|
+
total_lines = sum(
|
|
95
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
96
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
97
|
+
for c in commits
|
|
98
|
+
)
|
|
94
99
|
|
|
95
100
|
summary_data.append({
|
|
96
101
|
'metric': 'Total Commits',
|
|
@@ -193,9 +198,9 @@ class CSVReportGenerator:
|
|
|
193
198
|
|
|
194
199
|
def _aggregate_weekly_data(self, commits: List[Dict[str, Any]],
|
|
195
200
|
start_date: datetime,
|
|
196
|
-
end_date: datetime) -> Dict[
|
|
201
|
+
end_date: datetime) -> Dict[Tuple[datetime, str, str], Dict[str, Any]]:
|
|
197
202
|
"""Aggregate commit data by week."""
|
|
198
|
-
weekly_data = defaultdict(lambda: {
|
|
203
|
+
weekly_data: defaultdict[Tuple[datetime, str, str], Dict[str, Any]] = defaultdict(lambda: {
|
|
199
204
|
'commits': 0,
|
|
200
205
|
'story_points': 0,
|
|
201
206
|
'lines_added': 0,
|
|
@@ -209,10 +214,14 @@ class CSVReportGenerator:
|
|
|
209
214
|
|
|
210
215
|
for commit in commits:
|
|
211
216
|
timestamp = commit['timestamp']
|
|
212
|
-
#
|
|
217
|
+
# Ensure consistent timezone handling
|
|
213
218
|
if hasattr(timestamp, 'tzinfo') and timestamp.tzinfo is not None:
|
|
214
|
-
#
|
|
215
|
-
timestamp
|
|
219
|
+
# Keep timezone-aware but ensure it's UTC
|
|
220
|
+
if timestamp.tzinfo != timezone.utc:
|
|
221
|
+
timestamp = timestamp.astimezone(timezone.utc)
|
|
222
|
+
else:
|
|
223
|
+
# Convert naive datetime to UTC timezone-aware
|
|
224
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
216
225
|
|
|
217
226
|
if timestamp < start_date or timestamp > end_date:
|
|
218
227
|
continue
|
|
@@ -229,43 +238,53 @@ class CSVReportGenerator:
|
|
|
229
238
|
key = (week_start, canonical_id, project_key)
|
|
230
239
|
|
|
231
240
|
# Aggregate metrics
|
|
232
|
-
weekly_data[key]
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
241
|
+
data = weekly_data[key]
|
|
242
|
+
data['commits'] += 1
|
|
243
|
+
data['story_points'] += commit.get('story_points', 0) or 0
|
|
244
|
+
|
|
245
|
+
# Use filtered stats if available, otherwise fall back to raw stats
|
|
246
|
+
data['lines_added'] += commit.get('filtered_insertions', commit.get('insertions', 0)) or 0
|
|
247
|
+
data['lines_removed'] += commit.get('filtered_deletions', commit.get('deletions', 0)) or 0
|
|
248
|
+
data['files_changed'] += commit.get('filtered_files_changed', commit.get('files_changed', 0)) or 0
|
|
249
|
+
|
|
250
|
+
data['complexity_delta'] += commit.get('complexity_delta', 0.0) or 0.0
|
|
238
251
|
|
|
239
252
|
# Track tickets
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
253
|
+
ticket_refs = commit.get('ticket_references', [])
|
|
254
|
+
if ticket_refs:
|
|
255
|
+
data['commits_with_tickets'] += 1
|
|
256
|
+
tickets_set = data['tickets']
|
|
257
|
+
for ticket in ticket_refs:
|
|
243
258
|
if isinstance(ticket, dict):
|
|
244
|
-
|
|
259
|
+
tickets_set.add(ticket.get('full_id', ''))
|
|
245
260
|
else:
|
|
246
|
-
|
|
261
|
+
tickets_set.add(str(ticket))
|
|
247
262
|
|
|
248
263
|
# Track PRs (if available)
|
|
249
|
-
|
|
250
|
-
|
|
264
|
+
pr_number = commit.get('pr_number')
|
|
265
|
+
if pr_number:
|
|
266
|
+
prs_set = data['prs']
|
|
267
|
+
prs_set.add(pr_number)
|
|
251
268
|
|
|
252
269
|
# Calculate derived metrics
|
|
253
|
-
result = {}
|
|
270
|
+
result: Dict[Tuple[datetime, str, str], Dict[str, Any]] = {}
|
|
254
271
|
for key, metrics in weekly_data.items():
|
|
255
|
-
|
|
256
|
-
if
|
|
272
|
+
commits_count = metrics['commits']
|
|
273
|
+
if commits_count > 0:
|
|
257
274
|
metrics['ticket_coverage_pct'] = (
|
|
258
|
-
metrics['commits_with_tickets'] /
|
|
275
|
+
metrics['commits_with_tickets'] / commits_count * 100
|
|
259
276
|
)
|
|
260
277
|
metrics['avg_commit_size'] = (
|
|
261
|
-
(metrics['lines_added'] + metrics['lines_removed']) /
|
|
278
|
+
(metrics['lines_added'] + metrics['lines_removed']) / commits_count
|
|
262
279
|
)
|
|
263
280
|
else:
|
|
264
281
|
metrics['ticket_coverage_pct'] = 0
|
|
265
282
|
metrics['avg_commit_size'] = 0
|
|
266
283
|
|
|
267
|
-
|
|
268
|
-
|
|
284
|
+
tickets_set = metrics['tickets']
|
|
285
|
+
prs_set = metrics['prs']
|
|
286
|
+
metrics['unique_tickets'] = len(tickets_set)
|
|
287
|
+
metrics['prs_involved'] = len(prs_set)
|
|
269
288
|
|
|
270
289
|
# Remove sets before returning
|
|
271
290
|
del metrics['tickets']
|
|
@@ -278,6 +297,15 @@ class CSVReportGenerator:
|
|
|
278
297
|
|
|
279
298
|
def _get_week_start(self, date: datetime) -> datetime:
|
|
280
299
|
"""Get Monday of the week for a given date."""
|
|
300
|
+
# Ensure consistent timezone handling - keep timezone info
|
|
301
|
+
if hasattr(date, 'tzinfo') and date.tzinfo is not None:
|
|
302
|
+
# Keep timezone-aware but ensure it's UTC
|
|
303
|
+
if date.tzinfo != timezone.utc:
|
|
304
|
+
date = date.astimezone(timezone.utc)
|
|
305
|
+
else:
|
|
306
|
+
# Convert naive datetime to UTC timezone-aware
|
|
307
|
+
date = date.replace(tzinfo=timezone.utc)
|
|
308
|
+
|
|
281
309
|
days_since_monday = date.weekday()
|
|
282
310
|
monday = date - timedelta(days=days_since_monday)
|
|
283
311
|
return monday.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
"""Narrative report generation in Markdown format."""
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import List, Dict, Any
|
|
4
|
+
from typing import List, Dict, Any, Set
|
|
5
5
|
from io import StringIO
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class NarrativeReportGenerator:
|
|
9
9
|
"""Generate human-readable narrative reports in Markdown."""
|
|
10
10
|
|
|
11
|
-
def __init__(self):
|
|
11
|
+
def __init__(self) -> None:
|
|
12
12
|
"""Initialize narrative report generator."""
|
|
13
13
|
self.templates = {
|
|
14
14
|
'high_performer': "{name} led development with {commits} commits ({pct}% of total activity)",
|
|
@@ -74,11 +74,15 @@ class NarrativeReportGenerator:
|
|
|
74
74
|
|
|
75
75
|
def _write_executive_summary(self, report: StringIO, commits: List[Dict[str, Any]],
|
|
76
76
|
developer_stats: List[Dict[str, Any]],
|
|
77
|
-
ticket_analysis: Dict[str, Any]):
|
|
77
|
+
ticket_analysis: Dict[str, Any]) -> None:
|
|
78
78
|
"""Write executive summary section."""
|
|
79
79
|
total_commits = len(commits)
|
|
80
80
|
total_developers = len(developer_stats)
|
|
81
|
-
total_lines = sum(
|
|
81
|
+
total_lines = sum(
|
|
82
|
+
c.get('filtered_insertions', c.get('insertions', 0)) +
|
|
83
|
+
c.get('filtered_deletions', c.get('deletions', 0))
|
|
84
|
+
for c in commits
|
|
85
|
+
)
|
|
82
86
|
|
|
83
87
|
report.write(f"- **Total Commits**: {total_commits:,}\n")
|
|
84
88
|
report.write(f"- **Active Developers**: {total_developers}\n")
|
|
@@ -96,7 +100,7 @@ class NarrativeReportGenerator:
|
|
|
96
100
|
report.write(f"({top_dev['total_commits']} commits)\n")
|
|
97
101
|
|
|
98
102
|
def _write_team_composition(self, report: StringIO, developer_stats: List[Dict[str, Any]],
|
|
99
|
-
focus_data: List[Dict[str, Any]]):
|
|
103
|
+
focus_data: List[Dict[str, Any]]) -> None:
|
|
100
104
|
"""Write team composition analysis."""
|
|
101
105
|
report.write("### Developer Profiles\n\n")
|
|
102
106
|
|
|
@@ -121,10 +125,10 @@ class NarrativeReportGenerator:
|
|
|
121
125
|
report.write("\n")
|
|
122
126
|
|
|
123
127
|
def _write_project_activity(self, report: StringIO, activity_dist: List[Dict[str, Any]],
|
|
124
|
-
commits: List[Dict[str, Any]]):
|
|
128
|
+
commits: List[Dict[str, Any]]) -> None:
|
|
125
129
|
"""Write project activity breakdown."""
|
|
126
130
|
# Aggregate by project
|
|
127
|
-
project_totals = {}
|
|
131
|
+
project_totals: Dict[str, Dict[str, Any]] = {}
|
|
128
132
|
for row in activity_dist:
|
|
129
133
|
project = row['project']
|
|
130
134
|
if project not in project_totals:
|
|
@@ -133,9 +137,11 @@ class NarrativeReportGenerator:
|
|
|
133
137
|
'lines': 0,
|
|
134
138
|
'developers': set()
|
|
135
139
|
}
|
|
136
|
-
project_totals[project]
|
|
137
|
-
|
|
138
|
-
|
|
140
|
+
data = project_totals[project]
|
|
141
|
+
data['commits'] += row['commits']
|
|
142
|
+
data['lines'] += row['lines_changed']
|
|
143
|
+
developers_set: Set[str] = data['developers']
|
|
144
|
+
developers_set.add(row['developer'])
|
|
139
145
|
|
|
140
146
|
# Sort by commits
|
|
141
147
|
sorted_projects = sorted(project_totals.items(),
|
|
@@ -150,12 +156,12 @@ class NarrativeReportGenerator:
|
|
|
150
156
|
report.write(f"- Active Developers: {len(data['developers'])}\n\n")
|
|
151
157
|
|
|
152
158
|
def _write_development_patterns(self, report: StringIO, insights: List[Dict[str, Any]],
|
|
153
|
-
focus_data: List[Dict[str, Any]]):
|
|
159
|
+
focus_data: List[Dict[str, Any]]) -> None:
|
|
154
160
|
"""Write development patterns analysis."""
|
|
155
161
|
report.write("### Key Patterns Identified\n\n")
|
|
156
162
|
|
|
157
163
|
# Group insights by category
|
|
158
|
-
by_category = {}
|
|
164
|
+
by_category: Dict[str, List[Dict[str, Any]]] = {}
|
|
159
165
|
for insight in insights:
|
|
160
166
|
category = insight['category']
|
|
161
167
|
if category not in by_category:
|
|
@@ -182,7 +188,7 @@ class NarrativeReportGenerator:
|
|
|
182
188
|
report.write("suggests high context switching\n")
|
|
183
189
|
|
|
184
190
|
def _write_pr_analysis(self, report: StringIO, pr_metrics: Dict[str, Any],
|
|
185
|
-
prs: List[Dict[str, Any]]):
|
|
191
|
+
prs: List[Dict[str, Any]]) -> None:
|
|
186
192
|
"""Write pull request analysis."""
|
|
187
193
|
report.write(f"- **Total PRs Merged**: {pr_metrics['total_prs']}\n")
|
|
188
194
|
report.write(f"- **Average PR Size**: {pr_metrics['avg_pr_size']:.0f} lines\n")
|
|
@@ -194,7 +200,7 @@ class NarrativeReportGenerator:
|
|
|
194
200
|
avg_comments = pr_metrics['total_review_comments'] / pr_metrics['total_prs']
|
|
195
201
|
report.write(f"- **Average Comments per PR**: {avg_comments:.1f}\n")
|
|
196
202
|
|
|
197
|
-
def _write_ticket_tracking(self, report: StringIO, ticket_analysis: Dict[str, Any]):
|
|
203
|
+
def _write_ticket_tracking(self, report: StringIO, ticket_analysis: Dict[str, Any]) -> None:
|
|
198
204
|
"""Write ticket tracking analysis."""
|
|
199
205
|
report.write("### Platform Usage\n\n")
|
|
200
206
|
|
|
@@ -217,7 +223,7 @@ class NarrativeReportGenerator:
|
|
|
217
223
|
report.write(f"({commit['files_changed']} files)\n")
|
|
218
224
|
|
|
219
225
|
def _write_recommendations(self, report: StringIO, insights: List[Dict[str, Any]],
|
|
220
|
-
ticket_analysis: Dict[str, Any], focus_data: List[Dict[str, Any]]):
|
|
226
|
+
ticket_analysis: Dict[str, Any], focus_data: List[Dict[str, Any]]) -> None:
|
|
221
227
|
"""Write recommendations based on analysis."""
|
|
222
228
|
recommendations = []
|
|
223
229
|
|