gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,269 +1,2398 @@
|
|
|
1
1
|
"""Narrative report generation in Markdown format."""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime, timedelta, timezone
|
|
5
5
|
from io import StringIO
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..metrics.activity_scoring import ActivityScorer
|
|
10
|
+
|
|
11
|
+
# Get logger for this module
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
6
13
|
|
|
7
14
|
|
|
8
15
|
class NarrativeReportGenerator:
|
|
9
16
|
"""Generate human-readable narrative reports in Markdown."""
|
|
10
|
-
|
|
17
|
+
|
|
11
18
|
def __init__(self) -> None:
|
|
12
19
|
"""Initialize narrative report generator."""
|
|
20
|
+
self.activity_scorer = ActivityScorer()
|
|
13
21
|
self.templates = {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
22
|
+
"high_performer": "{name} led development with {commits} commits ({pct}% of total activity)",
|
|
23
|
+
"multi_project": "{name} worked across {count} projects, primarily on {primary} ({primary_pct}%)",
|
|
24
|
+
"focused_developer": "{name} showed strong focus on {project} with {pct}% of their time",
|
|
25
|
+
"ticket_coverage": "The team maintained {coverage}% ticket coverage, indicating {quality} process adherence",
|
|
26
|
+
"work_distribution": "Work distribution shows a {distribution} pattern with a Gini coefficient of {gini}",
|
|
19
27
|
}
|
|
20
|
-
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
|
|
29
|
+
def _filter_excluded_authors(self, data_list: list[dict[str, Any]], exclude_authors: list[str]) -> list[dict[str, Any]]:
|
|
30
|
+
"""
|
|
31
|
+
Filter out excluded authors from any data list using canonical_id and enhanced bot detection.
|
|
32
|
+
|
|
33
|
+
WHY: Bot exclusion happens in Phase 2 (reporting) instead of Phase 1 (data collection)
|
|
34
|
+
to ensure manual identity mappings work correctly. This allows the system to see
|
|
35
|
+
consolidated bot identities via canonical_id instead of just original author_email/author_name.
|
|
36
|
+
|
|
37
|
+
ENHANCEMENT: Added enhanced bot pattern matching to catch bots that weren't properly
|
|
38
|
+
consolidated via manual mappings, preventing bot leakage in reports.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
data_list: List of data dictionaries containing canonical_id field
|
|
42
|
+
exclude_authors: List of author identifiers to exclude (checked against canonical_id)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Filtered list with excluded authors removed
|
|
46
|
+
"""
|
|
47
|
+
if not exclude_authors:
|
|
48
|
+
return data_list
|
|
49
|
+
|
|
50
|
+
logger.debug(f"DEBUG EXCLUSION: Starting filter with {len(exclude_authors)} excluded authors: {exclude_authors}")
|
|
51
|
+
logger.debug(f"DEBUG EXCLUSION: Filtering {len(data_list)} items from data list")
|
|
52
|
+
|
|
53
|
+
excluded_lower = [author.lower() for author in exclude_authors]
|
|
54
|
+
logger.debug(f"DEBUG EXCLUSION: Excluded authors (lowercase): {excluded_lower}")
|
|
55
|
+
|
|
56
|
+
# Separate explicit excludes from bot patterns
|
|
57
|
+
explicit_excludes = []
|
|
58
|
+
bot_patterns = []
|
|
59
|
+
|
|
60
|
+
for exclude in excluded_lower:
|
|
61
|
+
if '[bot]' in exclude or 'bot' in exclude.split():
|
|
62
|
+
bot_patterns.append(exclude)
|
|
63
|
+
else:
|
|
64
|
+
explicit_excludes.append(exclude)
|
|
65
|
+
|
|
66
|
+
logger.debug(f"DEBUG EXCLUSION: Explicit excludes: {explicit_excludes}")
|
|
67
|
+
logger.debug(f"DEBUG EXCLUSION: Bot patterns: {bot_patterns}")
|
|
68
|
+
|
|
69
|
+
filtered_data = []
|
|
70
|
+
excluded_count = 0
|
|
71
|
+
|
|
72
|
+
# Sample first 5 items to see data structure
|
|
73
|
+
for i, item in enumerate(data_list[:5]):
|
|
74
|
+
logger.debug(f"DEBUG EXCLUSION: Sample item {i}: canonical_id='{item.get('canonical_id', '')}', "
|
|
75
|
+
f"author_email='{item.get('author_email', '')}', author_name='{item.get('author_name', '')}', "
|
|
76
|
+
f"author='{item.get('author', '')}', primary_name='{item.get('primary_name', '')}', "
|
|
77
|
+
f"name='{item.get('name', '')}'")
|
|
78
|
+
|
|
79
|
+
for item in data_list:
|
|
80
|
+
canonical_id = item.get("canonical_id", "")
|
|
81
|
+
# Also check original author fields as fallback for data without canonical_id
|
|
82
|
+
author_email = item.get("author_email", "")
|
|
83
|
+
author_name = item.get("author_name", "")
|
|
84
|
+
|
|
85
|
+
# Check all possible author fields
|
|
86
|
+
author = item.get("author", "")
|
|
87
|
+
primary_name = item.get("primary_name", "")
|
|
88
|
+
name = item.get("name", "")
|
|
89
|
+
|
|
90
|
+
# Collect all identity fields for checking
|
|
91
|
+
identity_fields = [
|
|
92
|
+
canonical_id,
|
|
93
|
+
item.get("primary_email", ""),
|
|
94
|
+
author_email,
|
|
95
|
+
author_name,
|
|
96
|
+
author,
|
|
97
|
+
primary_name,
|
|
98
|
+
name
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
should_exclude = False
|
|
102
|
+
exclusion_reason = ""
|
|
103
|
+
|
|
104
|
+
# Check for exact matches with explicit excludes first
|
|
105
|
+
for field in identity_fields:
|
|
106
|
+
if field and field.lower() in explicit_excludes:
|
|
107
|
+
should_exclude = True
|
|
108
|
+
exclusion_reason = f"exact match with '{field}' in explicit excludes"
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
# If not explicitly excluded, check for bot patterns
|
|
112
|
+
if not should_exclude:
|
|
113
|
+
for field in identity_fields:
|
|
114
|
+
if not field:
|
|
115
|
+
continue
|
|
116
|
+
field_lower = field.lower()
|
|
117
|
+
|
|
118
|
+
# Enhanced bot detection: check if any field contains bot-like patterns
|
|
119
|
+
for bot_pattern in bot_patterns:
|
|
120
|
+
if bot_pattern in field_lower:
|
|
121
|
+
should_exclude = True
|
|
122
|
+
exclusion_reason = f"bot pattern '{bot_pattern}' matches field '{field}'"
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
# Additional bot detection: check for common bot patterns not in explicit list
|
|
126
|
+
if not should_exclude:
|
|
127
|
+
bot_indicators = ['[bot]', 'bot@', '-bot', 'automated', 'github-actions', 'dependabot', 'renovate']
|
|
128
|
+
for indicator in bot_indicators:
|
|
129
|
+
if indicator in field_lower:
|
|
130
|
+
# Only exclude if this bot-like pattern matches something in our exclude list
|
|
131
|
+
for exclude in excluded_lower:
|
|
132
|
+
if indicator.replace('[', '').replace(']', '') in exclude or exclude in field_lower:
|
|
133
|
+
should_exclude = True
|
|
134
|
+
exclusion_reason = f"bot indicator '{indicator}' in field '{field}' matches exclude pattern '{exclude}'"
|
|
135
|
+
break
|
|
136
|
+
if should_exclude:
|
|
137
|
+
break
|
|
138
|
+
|
|
139
|
+
if should_exclude:
|
|
140
|
+
break
|
|
141
|
+
|
|
142
|
+
if should_exclude:
|
|
143
|
+
excluded_count += 1
|
|
144
|
+
logger.debug(f"DEBUG EXCLUSION: EXCLUDING item - {exclusion_reason}")
|
|
145
|
+
logger.debug(f" canonical_id='{canonical_id}', primary_email='{item.get('primary_email', '')}', "
|
|
146
|
+
f"author_email='{author_email}', author_name='{author_name}', author='{author}', "
|
|
147
|
+
f"primary_name='{primary_name}', name='{name}'")
|
|
148
|
+
else:
|
|
149
|
+
filtered_data.append(item)
|
|
150
|
+
|
|
151
|
+
logger.debug(f"DEBUG EXCLUSION: Excluded {excluded_count} items, kept {len(filtered_data)} items")
|
|
152
|
+
return filtered_data
|
|
153
|
+
|
|
154
|
+
def generate_narrative_report(
|
|
155
|
+
self,
|
|
156
|
+
commits: list[dict[str, Any]],
|
|
157
|
+
prs: list[dict[str, Any]],
|
|
158
|
+
developer_stats: list[dict[str, Any]],
|
|
159
|
+
activity_dist: list[dict[str, Any]],
|
|
160
|
+
focus_data: list[dict[str, Any]],
|
|
161
|
+
insights: list[dict[str, Any]],
|
|
162
|
+
ticket_analysis: dict[str, Any],
|
|
163
|
+
pr_metrics: dict[str, Any],
|
|
164
|
+
output_path: Path,
|
|
165
|
+
weeks: int,
|
|
166
|
+
pm_data: dict[str, Any] = None,
|
|
167
|
+
chatgpt_summary: str = None,
|
|
168
|
+
branch_health_metrics: dict[str, dict[str, Any]] = None,
|
|
169
|
+
exclude_authors: list[str] = None,
|
|
170
|
+
analysis_start_date: datetime = None,
|
|
171
|
+
analysis_end_date: datetime = None,
|
|
172
|
+
) -> Path:
|
|
32
173
|
"""Generate comprehensive narrative report."""
|
|
33
|
-
|
|
174
|
+
# Store analysis period for use in weekly trends calculation
|
|
175
|
+
self._analysis_start_date = analysis_start_date
|
|
176
|
+
self._analysis_end_date = analysis_end_date
|
|
177
|
+
|
|
178
|
+
logger.debug(f"DEBUG NARRATIVE: Starting report generation with exclude_authors: {exclude_authors}")
|
|
179
|
+
logger.debug(f"DEBUG NARRATIVE: Analysis period: {analysis_start_date} to {analysis_end_date}")
|
|
180
|
+
logger.debug(f"DEBUG NARRATIVE: Input data sizes - commits: {len(commits)}, developer_stats: {len(developer_stats)}, "
|
|
181
|
+
f"activity_dist: {len(activity_dist)}, focus_data: {len(focus_data)}")
|
|
182
|
+
|
|
183
|
+
# Sample some developer_stats to see their structure
|
|
184
|
+
if developer_stats:
|
|
185
|
+
for i, dev in enumerate(developer_stats[:3]):
|
|
186
|
+
logger.debug(f"DEBUG NARRATIVE: Sample developer_stats[{i}]: canonical_id='{dev.get('canonical_id', '')}', "
|
|
187
|
+
f"primary_name='{dev.get('primary_name', '')}', name='{dev.get('name', '')}', "
|
|
188
|
+
f"primary_email='{dev.get('primary_email', '')}'")
|
|
189
|
+
|
|
190
|
+
# Filter out excluded authors in Phase 2 using canonical_id
|
|
191
|
+
if exclude_authors:
|
|
192
|
+
logger.debug(f"DEBUG NARRATIVE: Applying exclusion filter with {len(exclude_authors)} excluded authors")
|
|
193
|
+
|
|
194
|
+
original_commits = len(commits)
|
|
195
|
+
commits = self._filter_excluded_authors(commits, exclude_authors)
|
|
196
|
+
filtered_commits = original_commits - len(commits)
|
|
197
|
+
|
|
198
|
+
# Filter other data structures too
|
|
199
|
+
logger.debug(f"DEBUG NARRATIVE: Filtering developer_stats (original: {len(developer_stats)})")
|
|
200
|
+
developer_stats = self._filter_excluded_authors(developer_stats, exclude_authors)
|
|
201
|
+
logger.debug(f"DEBUG NARRATIVE: After filtering developer_stats: {len(developer_stats)}")
|
|
202
|
+
|
|
203
|
+
activity_dist = self._filter_excluded_authors(activity_dist, exclude_authors)
|
|
204
|
+
focus_data = self._filter_excluded_authors(focus_data, exclude_authors)
|
|
205
|
+
|
|
206
|
+
if filtered_commits > 0:
|
|
207
|
+
logger.info(f"Filtered out {filtered_commits} commits from {len(exclude_authors)} excluded authors in narrative report")
|
|
208
|
+
|
|
209
|
+
# Log remaining developers after filtering
|
|
210
|
+
if developer_stats:
|
|
211
|
+
remaining_devs = [dev.get('primary_name', dev.get('name', 'Unknown')) for dev in developer_stats]
|
|
212
|
+
logger.debug(f"DEBUG NARRATIVE: Remaining developers after filtering: {remaining_devs}")
|
|
213
|
+
else:
|
|
214
|
+
logger.debug("DEBUG NARRATIVE: No exclusion filter applied")
|
|
34
215
|
|
|
216
|
+
report = StringIO()
|
|
217
|
+
|
|
35
218
|
# Header
|
|
36
219
|
report.write("# GitFlow Analytics Report\n\n")
|
|
37
|
-
|
|
220
|
+
|
|
221
|
+
# Log datetime formatting
|
|
222
|
+
now = datetime.now()
|
|
223
|
+
logger.debug(
|
|
224
|
+
f"Formatting current datetime for report header: {now} (tzinfo: {getattr(now, 'tzinfo', 'N/A')})"
|
|
225
|
+
)
|
|
226
|
+
formatted_time = now.strftime("%Y-%m-%d %H:%M:%S")
|
|
227
|
+
logger.debug(f" Formatted time: {formatted_time}")
|
|
228
|
+
|
|
229
|
+
report.write(f"**Generated**: {formatted_time}\n")
|
|
38
230
|
report.write(f"**Analysis Period**: Last {weeks} weeks\n\n")
|
|
39
|
-
|
|
231
|
+
|
|
40
232
|
# Executive Summary
|
|
41
233
|
report.write("## Executive Summary\n\n")
|
|
42
|
-
self._write_executive_summary(report, commits, developer_stats, ticket_analysis)
|
|
43
|
-
|
|
234
|
+
self._write_executive_summary(report, commits, developer_stats, ticket_analysis, prs, branch_health_metrics, pm_data)
|
|
235
|
+
|
|
236
|
+
# Add ChatGPT qualitative insights if available
|
|
237
|
+
if chatgpt_summary:
|
|
238
|
+
report.write("\n## Qualitative Analysis\n\n")
|
|
239
|
+
report.write(chatgpt_summary)
|
|
240
|
+
report.write("\n")
|
|
241
|
+
|
|
44
242
|
# Team Composition
|
|
45
243
|
report.write("\n## Team Composition\n\n")
|
|
46
|
-
self._write_team_composition(report, developer_stats, focus_data)
|
|
47
|
-
|
|
244
|
+
self._write_team_composition(report, developer_stats, focus_data, commits, prs, ticket_analysis, weeks)
|
|
245
|
+
|
|
48
246
|
# Project Activity
|
|
49
247
|
report.write("\n## Project Activity\n\n")
|
|
50
|
-
self._write_project_activity(report, activity_dist, commits)
|
|
51
|
-
|
|
248
|
+
self._write_project_activity(report, activity_dist, commits, branch_health_metrics, ticket_analysis, weeks)
|
|
249
|
+
|
|
250
|
+
|
|
52
251
|
# Development Patterns
|
|
53
252
|
report.write("\n## Development Patterns\n\n")
|
|
54
253
|
self._write_development_patterns(report, insights, focus_data)
|
|
55
|
-
|
|
254
|
+
|
|
255
|
+
# Commit Classification Analysis (if ML analysis is available)
|
|
256
|
+
if ticket_analysis.get("ml_analysis", {}).get("enabled", False):
|
|
257
|
+
report.write("\n## Commit Classification Analysis\n\n")
|
|
258
|
+
self._write_commit_classification_analysis(report, ticket_analysis)
|
|
259
|
+
|
|
56
260
|
# Pull Request Analysis (if available)
|
|
57
|
-
if pr_metrics and pr_metrics.get(
|
|
261
|
+
if pr_metrics and pr_metrics.get("total_prs", 0) > 0:
|
|
58
262
|
report.write("\n## Pull Request Analysis\n\n")
|
|
59
263
|
self._write_pr_analysis(report, pr_metrics, prs)
|
|
60
|
-
|
|
264
|
+
|
|
61
265
|
# Ticket Tracking
|
|
62
266
|
report.write("\n## Issue Tracking\n\n")
|
|
63
|
-
self._write_ticket_tracking(report, ticket_analysis)
|
|
64
|
-
|
|
267
|
+
self._write_ticket_tracking(report, ticket_analysis, developer_stats)
|
|
268
|
+
|
|
269
|
+
# PM Platform Insights
|
|
270
|
+
if pm_data and "metrics" in pm_data:
|
|
271
|
+
report.write("\n## PM Platform Integration\n\n")
|
|
272
|
+
self._write_pm_insights(report, pm_data)
|
|
273
|
+
|
|
65
274
|
# Recommendations
|
|
66
275
|
report.write("\n## Recommendations\n\n")
|
|
67
276
|
self._write_recommendations(report, insights, ticket_analysis, focus_data)
|
|
68
|
-
|
|
277
|
+
|
|
69
278
|
# Write to file
|
|
70
|
-
with open(output_path,
|
|
279
|
+
with open(output_path, "w") as f:
|
|
71
280
|
f.write(report.getvalue())
|
|
72
|
-
|
|
281
|
+
|
|
73
282
|
return output_path
|
|
74
|
-
|
|
75
|
-
def _write_executive_summary(
|
|
76
|
-
|
|
77
|
-
|
|
283
|
+
|
|
284
|
+
def _write_executive_summary(
|
|
285
|
+
self,
|
|
286
|
+
report: StringIO,
|
|
287
|
+
commits: list[dict[str, Any]],
|
|
288
|
+
developer_stats: list[dict[str, Any]],
|
|
289
|
+
ticket_analysis: dict[str, Any],
|
|
290
|
+
prs: list[dict[str, Any]],
|
|
291
|
+
branch_health_metrics: dict[str, dict[str, Any]] = None,
|
|
292
|
+
pm_data: dict[str, Any] = None,
|
|
293
|
+
) -> None:
|
|
78
294
|
"""Write executive summary section."""
|
|
79
295
|
total_commits = len(commits)
|
|
80
296
|
total_developers = len(developer_stats)
|
|
81
297
|
total_lines = sum(
|
|
82
|
-
c.get(
|
|
83
|
-
c.get(
|
|
298
|
+
c.get("filtered_insertions", c.get("insertions", 0))
|
|
299
|
+
+ c.get("filtered_deletions", c.get("deletions", 0))
|
|
84
300
|
for c in commits
|
|
85
301
|
)
|
|
86
|
-
|
|
302
|
+
|
|
87
303
|
report.write(f"- **Total Commits**: {total_commits:,}\n")
|
|
88
304
|
report.write(f"- **Active Developers**: {total_developers}\n")
|
|
89
305
|
report.write(f"- **Lines Changed**: {total_lines:,}\n")
|
|
90
306
|
report.write(f"- **Ticket Coverage**: {ticket_analysis['commit_coverage_pct']:.1f}%\n")
|
|
91
307
|
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
308
|
+
# PM Platform Story Points (if available)
|
|
309
|
+
if pm_data and "metrics" in pm_data:
|
|
310
|
+
metrics = pm_data.get("metrics", {})
|
|
311
|
+
story_analysis = metrics.get("story_point_analysis", {})
|
|
312
|
+
pm_story_points = story_analysis.get("pm_total_story_points", 0)
|
|
313
|
+
git_story_points = story_analysis.get("git_total_story_points", 0)
|
|
314
|
+
|
|
315
|
+
if pm_story_points > 0 or git_story_points > 0:
|
|
316
|
+
report.write(f"- **PM Story Points**: {pm_story_points:,} (platform) / {git_story_points:,} (commit-linked)\n")
|
|
95
317
|
|
|
96
|
-
#
|
|
97
|
-
if
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
318
|
+
# Add repository branch health summary
|
|
319
|
+
if branch_health_metrics:
|
|
320
|
+
# Aggregate branch health across all repositories
|
|
321
|
+
total_branches = 0
|
|
322
|
+
total_stale = 0
|
|
323
|
+
overall_health_scores = []
|
|
324
|
+
|
|
325
|
+
for _repo_name, metrics in branch_health_metrics.items():
|
|
326
|
+
summary = metrics.get("summary", {})
|
|
327
|
+
health_indicators = metrics.get("health_indicators", {})
|
|
328
|
+
|
|
329
|
+
total_branches += summary.get("total_branches", 0)
|
|
330
|
+
total_stale += summary.get("stale_branches", 0)
|
|
331
|
+
|
|
332
|
+
if health_indicators.get("overall_health_score") is not None:
|
|
333
|
+
overall_health_scores.append(health_indicators["overall_health_score"])
|
|
334
|
+
|
|
335
|
+
# Calculate average health score
|
|
336
|
+
avg_health_score = sum(overall_health_scores) / len(overall_health_scores) if overall_health_scores else 0
|
|
337
|
+
|
|
338
|
+
# Determine health status
|
|
339
|
+
if avg_health_score >= 80:
|
|
340
|
+
health_status = "Excellent"
|
|
341
|
+
elif avg_health_score >= 60:
|
|
342
|
+
health_status = "Good"
|
|
343
|
+
elif avg_health_score >= 40:
|
|
344
|
+
health_status = "Fair"
|
|
345
|
+
else:
|
|
346
|
+
health_status = "Needs Attention"
|
|
347
|
+
|
|
348
|
+
report.write(f"- **Branch Health**: {health_status} ({avg_health_score:.0f}/100) - "
|
|
349
|
+
f"{total_branches} branches, {total_stale} stale\n")
|
|
350
|
+
|
|
351
|
+
# Projects worked on - show full list instead of just count
|
|
352
|
+
projects = set(c.get("project_key", "UNKNOWN") for c in commits)
|
|
353
|
+
projects_list = sorted(projects)
|
|
354
|
+
report.write(f"- **Active Projects**: {', '.join(projects_list)}\n")
|
|
355
|
+
|
|
356
|
+
# Top contributor with proper format matching old report
|
|
357
|
+
if developer_stats and commits:
|
|
358
|
+
# BUGFIX: Calculate period-specific commit counts instead of using all-time totals
|
|
359
|
+
period_commit_counts = {}
|
|
360
|
+
for commit in commits:
|
|
361
|
+
canonical_id = commit.get("canonical_id", "")
|
|
362
|
+
period_commit_counts[canonical_id] = period_commit_counts.get(canonical_id, 0) + 1
|
|
363
|
+
|
|
364
|
+
# Find the developer with most commits in this period
|
|
365
|
+
if period_commit_counts:
|
|
366
|
+
top_canonical_id = max(period_commit_counts, key=period_commit_counts.get)
|
|
367
|
+
top_period_commits = period_commit_counts[top_canonical_id]
|
|
368
|
+
|
|
369
|
+
# Find the developer stats entry for this canonical_id
|
|
370
|
+
top_dev = None
|
|
371
|
+
for dev in developer_stats:
|
|
372
|
+
if dev.get("canonical_id") == top_canonical_id:
|
|
373
|
+
top_dev = dev
|
|
374
|
+
break
|
|
375
|
+
|
|
376
|
+
if top_dev:
|
|
377
|
+
# Handle both 'primary_name' (production) and 'name' (tests) for backward compatibility
|
|
378
|
+
dev_name = top_dev.get("primary_name", top_dev.get("name", "Unknown Developer"))
|
|
379
|
+
report.write(
|
|
380
|
+
f"- **Top Contributor**: {dev_name} with {top_period_commits} commits\n"
|
|
381
|
+
)
|
|
382
|
+
elif developer_stats:
|
|
383
|
+
# Fallback: use first developer but with 0 commits (shouldn't happen with proper filtering)
|
|
384
|
+
top_dev = developer_stats[0]
|
|
385
|
+
dev_name = top_dev.get("primary_name", top_dev.get("name", "Unknown Developer"))
|
|
386
|
+
report.write(
|
|
387
|
+
f"- **Top Contributor**: {dev_name} with 0 commits\n"
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Calculate team average activity
|
|
391
|
+
if commits:
|
|
392
|
+
# Quick activity score calculation for executive summary
|
|
393
|
+
# total_prs = len(prs) if prs else 0 # Not used yet
|
|
394
|
+
total_lines = sum(
|
|
395
|
+
c.get("filtered_insertions", c.get("insertions", 0))
|
|
396
|
+
+ c.get("filtered_deletions", c.get("deletions", 0))
|
|
397
|
+
for c in commits
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# BUGFIX: Basic team activity assessment using only active developers in period
|
|
401
|
+
active_devs_in_period = len(period_commit_counts) if period_commit_counts else 0
|
|
402
|
+
avg_commits_per_dev = len(commits) / active_devs_in_period if active_devs_in_period > 0 else 0
|
|
403
|
+
if avg_commits_per_dev >= 10:
|
|
404
|
+
activity_assessment = "high activity"
|
|
405
|
+
elif avg_commits_per_dev >= 5:
|
|
406
|
+
activity_assessment = "moderate activity"
|
|
407
|
+
else:
|
|
408
|
+
activity_assessment = "low activity"
|
|
409
|
+
|
|
410
|
+
report.write(
|
|
411
|
+
f"- **Team Activity**: {activity_assessment} (avg {avg_commits_per_dev:.1f} commits/developer)\n"
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
def _aggregate_commit_classifications(
|
|
415
|
+
self,
|
|
416
|
+
ticket_analysis: dict[str, Any],
|
|
417
|
+
commits: list[dict[str, Any]] = None,
|
|
418
|
+
developer_stats: list[dict[str, Any]] = None
|
|
419
|
+
) -> dict[str, dict[str, int]]:
|
|
420
|
+
"""Aggregate commit classifications per developer.
|
|
421
|
+
|
|
422
|
+
WHY: This method provides detailed breakdown of commit types per developer,
|
|
423
|
+
replacing simple commit counts with actionable insights into what types of
|
|
424
|
+
work each developer is doing. This helps identify patterns and training needs.
|
|
425
|
+
|
|
426
|
+
DESIGN DECISION: Classify ALL commits (tracked and untracked) into proper
|
|
427
|
+
categories (feature, bug_fix, refactor, etc.) rather than using 'tracked_work'
|
|
428
|
+
as a category. For tracked commits, use ticket information to enhance accuracy.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
ticket_analysis: Ticket analysis data containing classification info
|
|
432
|
+
commits: Optional list of all commits for complete categorization
|
|
433
|
+
developer_stats: Developer statistics for mapping canonical IDs
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
Dictionary mapping developer canonical_id to category counts:
|
|
437
|
+
{
|
|
438
|
+
'dev_canonical_id': {
|
|
439
|
+
'feature': 15,
|
|
440
|
+
'bug_fix': 8,
|
|
441
|
+
'maintenance': 5,
|
|
442
|
+
...
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
"""
|
|
446
|
+
# Defensive type checking
|
|
447
|
+
if not isinstance(ticket_analysis, dict):
|
|
448
|
+
return {}
|
|
449
|
+
|
|
450
|
+
if commits is not None and not isinstance(commits, list):
|
|
451
|
+
# Log the error and continue without commits data
|
|
452
|
+
import logging
|
|
453
|
+
logger = logging.getLogger(__name__)
|
|
454
|
+
logger.warning(f"Expected commits to be list or None, got {type(commits)}: {commits}")
|
|
455
|
+
commits = None
|
|
456
|
+
|
|
457
|
+
if developer_stats is not None and not isinstance(developer_stats, list):
|
|
458
|
+
developer_stats = None
|
|
459
|
+
|
|
460
|
+
classifications = {}
|
|
461
|
+
|
|
462
|
+
# If we have full commits data, classify ALL commits properly
|
|
463
|
+
if commits and isinstance(commits, list):
|
|
464
|
+
# Import the ticket extractor for classification
|
|
465
|
+
try:
|
|
466
|
+
from ..extractors.ml_tickets import MLTicketExtractor
|
|
467
|
+
extractor = MLTicketExtractor(enable_ml=True)
|
|
468
|
+
except Exception:
|
|
469
|
+
# Fallback to basic ticket extractor
|
|
470
|
+
from ..extractors.tickets import TicketExtractor
|
|
471
|
+
extractor = TicketExtractor()
|
|
472
|
+
|
|
473
|
+
# Classify all commits
|
|
474
|
+
for commit in commits:
|
|
475
|
+
canonical_id = commit.get("canonical_id", "Unknown")
|
|
476
|
+
message = commit.get("message", "")
|
|
477
|
+
|
|
478
|
+
# Get files_changed in proper format for classification
|
|
479
|
+
files_changed = commit.get("files_changed", [])
|
|
480
|
+
if isinstance(files_changed, int):
|
|
481
|
+
# If files_changed is just a count, we can't provide file names
|
|
482
|
+
files_changed = []
|
|
483
|
+
elif not isinstance(files_changed, list):
|
|
484
|
+
files_changed = []
|
|
485
|
+
|
|
486
|
+
# Use ticket information to enhance classification for tracked commits
|
|
487
|
+
ticket_refs = commit.get("ticket_references", [])
|
|
488
|
+
|
|
489
|
+
if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
|
|
490
|
+
# Use ML categorization with confidence for tracked commits
|
|
491
|
+
try:
|
|
492
|
+
result = extractor.categorize_commit_with_confidence(message, files_changed)
|
|
493
|
+
category = result['category']
|
|
494
|
+
# For tracked commits with ticket info, try to infer better category from ticket type
|
|
495
|
+
category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
|
|
496
|
+
except Exception:
|
|
497
|
+
# Fallback to basic categorization
|
|
498
|
+
category = extractor.categorize_commit(message)
|
|
499
|
+
else:
|
|
500
|
+
# Use basic categorization for untracked commits
|
|
501
|
+
category = extractor.categorize_commit(message)
|
|
502
|
+
|
|
503
|
+
# Initialize developer classification if not exists
|
|
504
|
+
if canonical_id not in classifications:
|
|
505
|
+
classifications[canonical_id] = {}
|
|
506
|
+
|
|
507
|
+
# Initialize category count if not exists
|
|
508
|
+
if category not in classifications[canonical_id]:
|
|
509
|
+
classifications[canonical_id][category] = 0
|
|
510
|
+
|
|
511
|
+
# Increment category count
|
|
512
|
+
classifications[canonical_id][category] += 1
|
|
513
|
+
|
|
514
|
+
else:
|
|
515
|
+
# Fallback: Only process untracked commits (legacy behavior)
|
|
516
|
+
untracked_commits = ticket_analysis.get("untracked_commits", [])
|
|
517
|
+
|
|
518
|
+
# Process untracked commits (these have category information)
|
|
519
|
+
for commit in untracked_commits:
|
|
520
|
+
author = commit.get("author", "Unknown")
|
|
521
|
+
category = commit.get("category", "other")
|
|
522
|
+
|
|
523
|
+
# Map author to canonical_id if developer_stats is available
|
|
524
|
+
canonical_id = author # fallback
|
|
525
|
+
if developer_stats:
|
|
526
|
+
for dev in developer_stats:
|
|
527
|
+
# Check multiple possible name mappings
|
|
528
|
+
if (dev.get("primary_name") == author or
|
|
529
|
+
dev.get("primary_email") == author or
|
|
530
|
+
dev.get("canonical_id") == author):
|
|
531
|
+
canonical_id = dev.get("canonical_id", author)
|
|
532
|
+
break
|
|
533
|
+
|
|
534
|
+
if canonical_id not in classifications:
|
|
535
|
+
classifications[canonical_id] = {}
|
|
536
|
+
|
|
537
|
+
if category not in classifications[canonical_id]:
|
|
538
|
+
classifications[canonical_id][category] = 0
|
|
539
|
+
|
|
540
|
+
classifications[canonical_id][category] += 1
|
|
541
|
+
|
|
542
|
+
return classifications
|
|
101
543
|
|
|
102
|
-
def
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
544
|
+
def _enhance_category_with_ticket_info(self, category: str, ticket_refs: list, message: str) -> str:
|
|
545
|
+
"""Enhance commit categorization using ticket reference information.
|
|
546
|
+
|
|
547
|
+
WHY: For tracked commits, we can often infer better categories by examining
|
|
548
|
+
the ticket references and message content. This improves classification accuracy
|
|
549
|
+
for tracked work versus relying purely on message patterns.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
category: Base category from ML/rule-based classification
|
|
553
|
+
ticket_refs: List of ticket references for this commit
|
|
554
|
+
message: Commit message
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
Enhanced category, potentially refined based on ticket information
|
|
558
|
+
"""
|
|
559
|
+
if not ticket_refs:
|
|
560
|
+
return category
|
|
561
|
+
|
|
562
|
+
# Try to extract insights from ticket references and message
|
|
563
|
+
message_lower = message.lower()
|
|
564
|
+
|
|
565
|
+
# Look for ticket type patterns in the message or ticket IDs
|
|
566
|
+
# These patterns suggest specific categories regardless of base classification
|
|
567
|
+
if any(pattern in message_lower for pattern in ['hotfix', 'critical', 'urgent', 'prod', 'production']):
|
|
568
|
+
return 'bug_fix' # Production/critical issues are typically bug fixes
|
|
569
|
+
|
|
570
|
+
if any(pattern in message_lower for pattern in ['feature', 'epic', 'story', 'user story']):
|
|
571
|
+
return 'feature' # Explicitly mentioned features
|
|
572
|
+
|
|
573
|
+
# Look for JIRA/GitHub issue patterns that might indicate bug fixes
|
|
574
|
+
for ticket_ref in ticket_refs:
|
|
575
|
+
if isinstance(ticket_ref, dict):
|
|
576
|
+
ticket_id = ticket_ref.get('id', '').lower()
|
|
577
|
+
else:
|
|
578
|
+
ticket_id = str(ticket_ref).lower()
|
|
579
|
+
|
|
580
|
+
# Common bug fix patterns in ticket IDs
|
|
581
|
+
if any(pattern in ticket_id for pattern in ['bug', 'fix', 'issue', 'defect']):
|
|
582
|
+
return 'bug_fix'
|
|
583
|
+
|
|
584
|
+
# Feature patterns in ticket IDs
|
|
585
|
+
if any(pattern in ticket_id for pattern in ['feat', 'feature', 'epic', 'story']):
|
|
586
|
+
return 'feature'
|
|
587
|
+
|
|
588
|
+
# If no specific enhancement found, return original category
|
|
589
|
+
return category
|
|
590
|
+
|
|
591
|
+
def _get_project_classifications(
|
|
592
|
+
self, project: str, commits: list[dict[str, Any]], ticket_analysis: dict[str, Any]
|
|
593
|
+
) -> dict[str, int]:
|
|
594
|
+
"""Get commit classification breakdown for a specific project.
|
|
595
|
+
|
|
596
|
+
WHY: This method filters classification data to show only commits belonging
|
|
597
|
+
to a specific project, enabling project-specific classification insights
|
|
598
|
+
in the project activity section.
|
|
599
|
+
|
|
600
|
+
DESIGN DECISION: Classify ALL commits (tracked and untracked) for this project
|
|
601
|
+
into proper categories rather than lumping tracked commits as 'tracked_work'.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
project: Project key to filter by
|
|
605
|
+
commits: List of all commits for mapping
|
|
606
|
+
ticket_analysis: Ticket analysis data containing classifications
|
|
607
|
+
|
|
608
|
+
Returns:
|
|
609
|
+
Dictionary mapping category names to commit counts for this project:
|
|
610
|
+
{'feature': 15, 'bug_fix': 8, 'refactor': 5, ...}
|
|
611
|
+
"""
|
|
612
|
+
if not isinstance(ticket_analysis, dict):
|
|
613
|
+
return {}
|
|
614
|
+
|
|
615
|
+
project_classifications = {}
|
|
616
|
+
|
|
617
|
+
# First, try to use already classified untracked commits
|
|
618
|
+
untracked_commits = ticket_analysis.get("untracked_commits", [])
|
|
619
|
+
for commit in untracked_commits:
|
|
620
|
+
commit_project = commit.get("project_key", "UNKNOWN")
|
|
621
|
+
if commit_project == project:
|
|
622
|
+
category = commit.get("category", "other")
|
|
623
|
+
if category not in project_classifications:
|
|
624
|
+
project_classifications[category] = 0
|
|
625
|
+
project_classifications[category] += 1
|
|
626
|
+
|
|
627
|
+
# If we have classifications from untracked commits, use those
|
|
628
|
+
if project_classifications:
|
|
629
|
+
return project_classifications
|
|
630
|
+
|
|
631
|
+
# Fallback: If no untracked commits data, classify all commits for this project
|
|
632
|
+
if isinstance(commits, list):
|
|
633
|
+
# Import the ticket extractor for classification
|
|
634
|
+
try:
|
|
635
|
+
from ..extractors.ml_tickets import MLTicketExtractor
|
|
636
|
+
extractor = MLTicketExtractor(enable_ml=True)
|
|
637
|
+
except Exception:
|
|
638
|
+
# Fallback to basic ticket extractor
|
|
639
|
+
from ..extractors.tickets import TicketExtractor
|
|
640
|
+
extractor = TicketExtractor()
|
|
641
|
+
|
|
642
|
+
# Classify all commits for this project
|
|
643
|
+
for commit in commits:
|
|
644
|
+
commit_project = commit.get("project_key", "UNKNOWN")
|
|
645
|
+
if commit_project == project:
|
|
646
|
+
message = commit.get("message", "")
|
|
647
|
+
|
|
648
|
+
# Get files_changed in proper format for classification
|
|
649
|
+
files_changed = commit.get("files_changed", [])
|
|
650
|
+
if isinstance(files_changed, int):
|
|
651
|
+
# If files_changed is just a count, we can't provide file names
|
|
652
|
+
files_changed = []
|
|
653
|
+
elif not isinstance(files_changed, list):
|
|
654
|
+
files_changed = []
|
|
655
|
+
|
|
656
|
+
# Use ticket information to enhance classification for tracked commits
|
|
657
|
+
ticket_refs = commit.get("ticket_references", [])
|
|
658
|
+
|
|
659
|
+
if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
|
|
660
|
+
# Use ML categorization with confidence for tracked commits
|
|
661
|
+
try:
|
|
662
|
+
result = extractor.categorize_commit_with_confidence(message, files_changed)
|
|
663
|
+
category = result['category']
|
|
664
|
+
# For tracked commits with ticket info, try to infer better category from ticket type
|
|
665
|
+
category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
|
|
666
|
+
except Exception:
|
|
667
|
+
# Fallback to basic categorization
|
|
668
|
+
category = extractor.categorize_commit(message)
|
|
669
|
+
else:
|
|
670
|
+
# Use basic categorization for untracked commits
|
|
671
|
+
category = extractor.categorize_commit(message)
|
|
672
|
+
|
|
673
|
+
# Initialize category count if not exists
|
|
674
|
+
if category not in project_classifications:
|
|
675
|
+
project_classifications[category] = 0
|
|
676
|
+
|
|
677
|
+
# Increment category count
|
|
678
|
+
project_classifications[category] += 1
|
|
679
|
+
|
|
680
|
+
return project_classifications
|
|
681
|
+
|
|
682
|
+
def _format_category_name(self, category: str) -> str:
|
|
683
|
+
"""Convert internal category names to user-friendly display names.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
category: Internal category name (e.g., 'bug_fix', 'feature', 'refactor')
|
|
687
|
+
|
|
688
|
+
Returns:
|
|
689
|
+
User-friendly display name (e.g., 'Bug Fixes', 'Features', 'Refactoring')
|
|
690
|
+
"""
|
|
691
|
+
category_mapping = {
|
|
692
|
+
'bug_fix': 'Bug Fixes',
|
|
693
|
+
'feature': 'Features',
|
|
694
|
+
'refactor': 'Refactoring',
|
|
695
|
+
'documentation': 'Documentation',
|
|
696
|
+
'maintenance': 'Maintenance',
|
|
697
|
+
'test': 'Testing',
|
|
698
|
+
'style': 'Code Style',
|
|
699
|
+
'build': 'Build/CI',
|
|
700
|
+
'other': 'Other'
|
|
701
|
+
}
|
|
702
|
+
return category_mapping.get(category, category.replace('_', ' ').title())
|
|
703
|
+
|
|
704
|
+
def _calculate_weekly_classification_percentages(
|
|
705
|
+
self,
|
|
706
|
+
commits: list[dict[str, Any]],
|
|
707
|
+
developer_id: str = None,
|
|
708
|
+
project_key: str = None,
|
|
709
|
+
weeks: int = 4,
|
|
710
|
+
analysis_start_date: datetime = None,
|
|
711
|
+
analysis_end_date: datetime = None
|
|
712
|
+
) -> list[dict[str, Any]]:
|
|
713
|
+
"""Calculate weekly classification percentages for trend lines.
|
|
714
|
+
|
|
715
|
+
WHY: This method creates detailed week-by-week breakdown of commit classifications
|
|
716
|
+
showing how work type distribution changes over time, providing granular insights
|
|
717
|
+
into development patterns and workload shifts.
|
|
718
|
+
|
|
719
|
+
DESIGN DECISION: Only show weeks that contain actual commit activity within the
|
|
720
|
+
analysis period. This prevents phantom "No activity" weeks for periods outside
|
|
721
|
+
the actual data collection range, providing more accurate and meaningful reports.
|
|
722
|
+
|
|
723
|
+
Args:
|
|
724
|
+
commits: List of all commits with timestamps and classifications
|
|
725
|
+
developer_id: Optional canonical developer ID to filter by
|
|
726
|
+
project_key: Optional project key to filter by
|
|
727
|
+
weeks: Total analysis period in weeks
|
|
728
|
+
analysis_start_date: Analysis period start (from CLI)
|
|
729
|
+
analysis_end_date: Analysis period end (from CLI)
|
|
730
|
+
|
|
731
|
+
Returns:
|
|
732
|
+
List of weekly data dictionaries:
|
|
733
|
+
[
|
|
734
|
+
{
|
|
735
|
+
'week_start': datetime,
|
|
736
|
+
'week_display': 'Jul 7-13',
|
|
737
|
+
'classifications': {'Features': 45.0, 'Bug Fixes': 30.0, 'Maintenance': 25.0},
|
|
738
|
+
'changes': {'Features': 5.0, 'Bug Fixes': -5.0, 'Maintenance': 0.0},
|
|
739
|
+
'has_activity': True
|
|
740
|
+
},
|
|
741
|
+
...
|
|
742
|
+
]
|
|
743
|
+
"""
|
|
744
|
+
if not commits or weeks < 1:
|
|
745
|
+
return []
|
|
746
|
+
|
|
747
|
+
# Filter commits by developer or project if specified
|
|
748
|
+
filtered_commits = []
|
|
749
|
+
for commit in commits:
|
|
750
|
+
if developer_id and commit.get('canonical_id') != developer_id:
|
|
751
|
+
continue
|
|
752
|
+
if project_key and commit.get('project_key') != project_key:
|
|
753
|
+
continue
|
|
754
|
+
filtered_commits.append(commit)
|
|
755
|
+
|
|
756
|
+
# If no commits match the filter, return empty
|
|
757
|
+
if not filtered_commits:
|
|
758
|
+
return []
|
|
759
|
+
|
|
760
|
+
# Determine the analysis period bounds
|
|
761
|
+
if analysis_start_date and analysis_end_date:
|
|
762
|
+
# Use the exact analysis period from the CLI
|
|
763
|
+
analysis_start = analysis_start_date
|
|
764
|
+
analysis_end = analysis_end_date
|
|
765
|
+
else:
|
|
766
|
+
# Fallback: Use the actual date range of the filtered commits
|
|
767
|
+
# This ensures we only show weeks that have potential for activity
|
|
768
|
+
filtered_timestamps = []
|
|
769
|
+
for commit in filtered_commits:
|
|
770
|
+
timestamp = commit.get('timestamp')
|
|
771
|
+
if timestamp:
|
|
772
|
+
# Ensure timezone consistency
|
|
773
|
+
if hasattr(timestamp, 'tzinfo'):
|
|
774
|
+
if timestamp.tzinfo is None:
|
|
775
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
776
|
+
elif timestamp.tzinfo != timezone.utc:
|
|
777
|
+
timestamp = timestamp.astimezone(timezone.utc)
|
|
778
|
+
filtered_timestamps.append(timestamp)
|
|
779
|
+
|
|
780
|
+
if not filtered_timestamps:
|
|
781
|
+
return []
|
|
782
|
+
|
|
783
|
+
# Use the actual range of commits for this developer/project
|
|
784
|
+
analysis_start = min(filtered_timestamps)
|
|
785
|
+
analysis_end = max(filtered_timestamps)
|
|
786
|
+
|
|
787
|
+
# Generate ALL weeks in the analysis period (not just weeks with commits)
|
|
788
|
+
# This ensures complete week coverage from start to end
|
|
789
|
+
# FIX: Only include complete weeks (Monday-Sunday) within the analysis period
|
|
790
|
+
analysis_weeks = []
|
|
791
|
+
current_week_start = self._get_week_start(analysis_start)
|
|
792
|
+
|
|
793
|
+
# Only include weeks where the entire week (including Sunday) is within the analysis period
|
|
794
|
+
while current_week_start <= analysis_end:
|
|
795
|
+
week_end = current_week_start + timedelta(days=6, hours=23, minutes=59, seconds=59)
|
|
796
|
+
# Only include this week if it ends before or on the analysis end date
|
|
797
|
+
if week_end <= analysis_end:
|
|
798
|
+
analysis_weeks.append(current_week_start)
|
|
799
|
+
current_week_start += timedelta(weeks=1)
|
|
800
|
+
|
|
801
|
+
# Group commits by week
|
|
802
|
+
weekly_commits = {}
|
|
803
|
+
for week_start in analysis_weeks:
|
|
804
|
+
weekly_commits[week_start] = []
|
|
805
|
+
|
|
806
|
+
for commit in filtered_commits:
|
|
807
|
+
timestamp = commit.get('timestamp')
|
|
808
|
+
if not timestamp:
|
|
809
|
+
continue
|
|
810
|
+
|
|
811
|
+
# Ensure timezone consistency
|
|
812
|
+
if hasattr(timestamp, 'tzinfo'):
|
|
813
|
+
if timestamp.tzinfo is None:
|
|
814
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
815
|
+
elif timestamp.tzinfo != timezone.utc:
|
|
816
|
+
timestamp = timestamp.astimezone(timezone.utc)
|
|
817
|
+
|
|
818
|
+
# Only include commits within the analysis period bounds
|
|
819
|
+
if analysis_start_date and analysis_end_date and not (analysis_start <= timestamp <= analysis_end):
|
|
820
|
+
continue
|
|
821
|
+
|
|
822
|
+
# Get week start (Monday) for this commit
|
|
823
|
+
commit_week_start = self._get_week_start(timestamp)
|
|
824
|
+
|
|
825
|
+
# Only include commits in weeks we're tracking
|
|
826
|
+
if commit_week_start in weekly_commits:
|
|
827
|
+
weekly_commits[commit_week_start].append(commit)
|
|
828
|
+
|
|
829
|
+
# Import classifiers
|
|
830
|
+
try:
|
|
831
|
+
from ..extractors.ml_tickets import MLTicketExtractor
|
|
832
|
+
extractor = MLTicketExtractor(enable_ml=True)
|
|
833
|
+
except Exception:
|
|
834
|
+
from ..extractors.tickets import TicketExtractor
|
|
835
|
+
extractor = TicketExtractor()
|
|
836
|
+
|
|
837
|
+
# Calculate classifications for each week in the analysis period
|
|
838
|
+
# This includes both weeks with activity and weeks with no commits
|
|
839
|
+
weekly_data = []
|
|
840
|
+
previous_percentages = {}
|
|
841
|
+
|
|
842
|
+
for week_start in analysis_weeks:
|
|
843
|
+
week_commits = weekly_commits[week_start]
|
|
844
|
+
has_activity = len(week_commits) > 0
|
|
845
|
+
|
|
846
|
+
# Classify commits for this week
|
|
847
|
+
week_classifications = {}
|
|
848
|
+
week_percentages = {}
|
|
849
|
+
|
|
850
|
+
if has_activity:
|
|
851
|
+
for commit in week_commits:
|
|
852
|
+
message = commit.get('message', '')
|
|
853
|
+
files_changed = commit.get('files_changed', [])
|
|
854
|
+
if isinstance(files_changed, int) or not isinstance(files_changed, list):
|
|
855
|
+
files_changed = []
|
|
856
|
+
|
|
857
|
+
ticket_refs = commit.get('ticket_references', [])
|
|
858
|
+
|
|
859
|
+
if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
|
|
860
|
+
try:
|
|
861
|
+
result = extractor.categorize_commit_with_confidence(message, files_changed)
|
|
862
|
+
category = result['category']
|
|
863
|
+
category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
|
|
864
|
+
except Exception:
|
|
865
|
+
category = extractor.categorize_commit(message)
|
|
866
|
+
else:
|
|
867
|
+
category = extractor.categorize_commit(message)
|
|
868
|
+
|
|
869
|
+
if category not in week_classifications:
|
|
870
|
+
week_classifications[category] = 0
|
|
871
|
+
week_classifications[category] += 1
|
|
872
|
+
|
|
873
|
+
# Calculate percentages for weeks with activity
|
|
874
|
+
total_commits = sum(week_classifications.values())
|
|
875
|
+
if total_commits > 0:
|
|
876
|
+
for category, count in week_classifications.items():
|
|
877
|
+
percentage = (count / total_commits) * 100
|
|
878
|
+
if percentage >= 5.0: # Only include significant categories
|
|
879
|
+
display_name = self._format_category_name(category)
|
|
880
|
+
week_percentages[display_name] = percentage
|
|
881
|
+
|
|
882
|
+
# Calculate changes from previous week
|
|
883
|
+
changes = {}
|
|
884
|
+
if previous_percentages and week_percentages:
|
|
885
|
+
for category in set(week_percentages.keys()) | set(previous_percentages.keys()):
|
|
886
|
+
current_pct = week_percentages.get(category, 0.0)
|
|
887
|
+
prev_pct = previous_percentages.get(category, 0.0)
|
|
888
|
+
change = current_pct - prev_pct
|
|
889
|
+
if abs(change) >= 1.0: # Only show changes >= 1%
|
|
890
|
+
changes[category] = change
|
|
891
|
+
|
|
892
|
+
# Format week display
|
|
893
|
+
week_end = week_start + timedelta(days=6)
|
|
894
|
+
week_display = f"{week_start.strftime('%b %d')}-{week_end.strftime('%d')}"
|
|
895
|
+
|
|
896
|
+
# Calculate ticket coverage stats for this week
|
|
897
|
+
total_commits_week = len(week_commits)
|
|
898
|
+
commits_with_tickets = sum(1 for commit in week_commits if commit.get('ticket_references'))
|
|
899
|
+
ticket_coverage_pct = (commits_with_tickets / total_commits_week * 100) if total_commits_week > 0 else 0
|
|
900
|
+
|
|
901
|
+
# Calculate activity score for this week
|
|
902
|
+
week_activity_score = 0.0
|
|
903
|
+
if total_commits_week > 0:
|
|
904
|
+
# Aggregate weekly metrics for activity score
|
|
905
|
+
total_lines_added = sum(commit.get('lines_added', 0) for commit in week_commits)
|
|
906
|
+
total_lines_deleted = sum(commit.get('lines_deleted', 0) for commit in week_commits)
|
|
907
|
+
total_files_changed = sum(commit.get('files_changed_count', 0) for commit in week_commits)
|
|
908
|
+
|
|
909
|
+
week_metrics = {
|
|
910
|
+
'commits': total_commits_week,
|
|
911
|
+
'prs_involved': 0, # PR data not available in commit data
|
|
912
|
+
'lines_added': total_lines_added,
|
|
913
|
+
'lines_removed': total_lines_deleted,
|
|
914
|
+
'files_changed_count': total_files_changed,
|
|
915
|
+
'complexity_delta': 0 # Complexity data not available
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
activity_result = self.activity_scorer.calculate_activity_score(week_metrics)
|
|
919
|
+
week_activity_score = activity_result.get('normalized_score', 0.0)
|
|
920
|
+
|
|
921
|
+
weekly_data.append({
|
|
922
|
+
'week_start': week_start,
|
|
923
|
+
'week_display': week_display,
|
|
924
|
+
'classifications': week_percentages,
|
|
925
|
+
'classification_counts': week_classifications, # Absolute counts
|
|
926
|
+
'changes': changes,
|
|
927
|
+
'has_activity': has_activity,
|
|
928
|
+
'total_commits': total_commits_week,
|
|
929
|
+
'commits_with_tickets': commits_with_tickets,
|
|
930
|
+
'ticket_coverage': ticket_coverage_pct,
|
|
931
|
+
'activity_score': week_activity_score
|
|
932
|
+
})
|
|
933
|
+
|
|
934
|
+
# Update previous percentages only if there was activity
|
|
935
|
+
if has_activity and week_percentages:
|
|
936
|
+
previous_percentages = week_percentages.copy()
|
|
937
|
+
|
|
938
|
+
return weekly_data
|
|
939
|
+
|
|
940
|
+
def _calculate_classification_trends(
|
|
941
|
+
self,
|
|
942
|
+
commits: list[dict[str, Any]],
|
|
943
|
+
developer_id: str = None,
|
|
944
|
+
project_key: str = None,
|
|
945
|
+
weeks: int = 4
|
|
946
|
+
) -> dict[str, float]:
|
|
947
|
+
"""Calculate week-over-week changes in classification percentages.
|
|
948
|
+
|
|
949
|
+
WHY: This method provides trend analysis showing how development patterns
|
|
950
|
+
change over time, helping identify shifts in work type distribution.
|
|
951
|
+
|
|
952
|
+
DESIGN DECISION: Compare the most recent half of the analysis period
|
|
953
|
+
with the earlier half to show meaningful trends. For shorter periods,
|
|
954
|
+
compare week-to-week. Use percentage point changes for clarity.
|
|
955
|
+
|
|
956
|
+
Args:
|
|
957
|
+
commits: List of all commits with timestamps and classifications
|
|
958
|
+
developer_id: Optional canonical developer ID to filter by
|
|
959
|
+
project_key: Optional project key to filter by
|
|
960
|
+
weeks: Total analysis period in weeks
|
|
961
|
+
|
|
962
|
+
Returns:
|
|
963
|
+
Dictionary mapping category names to percentage point changes:
|
|
964
|
+
{'Features': 15.2, 'Bug Fixes': -8.1, 'Refactoring': 3.4}
|
|
965
|
+
Positive values indicate increases, negative indicate decreases.
|
|
966
|
+
"""
|
|
967
|
+
if not commits or len(commits) < 2:
|
|
968
|
+
return {}
|
|
969
|
+
|
|
970
|
+
# Filter commits by developer or project if specified
|
|
971
|
+
filtered_commits = []
|
|
972
|
+
for commit in commits:
|
|
973
|
+
if developer_id and commit.get('canonical_id') != developer_id:
|
|
974
|
+
continue
|
|
975
|
+
if project_key and commit.get('project_key') != project_key:
|
|
976
|
+
continue
|
|
977
|
+
filtered_commits.append(commit)
|
|
978
|
+
|
|
979
|
+
if len(filtered_commits) < 2:
|
|
980
|
+
return {}
|
|
981
|
+
|
|
982
|
+
# Sort commits by timestamp
|
|
983
|
+
def safe_timestamp_key(commit):
|
|
984
|
+
ts = commit.get('timestamp')
|
|
985
|
+
if ts is None:
|
|
986
|
+
return datetime.min.replace(tzinfo=timezone.utc)
|
|
987
|
+
if hasattr(ts, 'tzinfo'):
|
|
988
|
+
if ts.tzinfo is None:
|
|
989
|
+
ts = ts.replace(tzinfo=timezone.utc)
|
|
990
|
+
return ts
|
|
991
|
+
return ts
|
|
992
|
+
|
|
993
|
+
sorted_commits = sorted(filtered_commits, key=safe_timestamp_key)
|
|
994
|
+
|
|
995
|
+
if len(sorted_commits) < 4: # Need at least 4 commits for meaningful trend
|
|
996
|
+
return {}
|
|
997
|
+
|
|
998
|
+
# Determine time split strategy based on analysis period
|
|
999
|
+
if weeks <= 2:
|
|
1000
|
+
# For short periods (1-2 weeks), compare last 3 days vs previous 3+ days
|
|
1001
|
+
cutoff_days = 3
|
|
1002
|
+
elif weeks <= 4:
|
|
1003
|
+
# For 3-4 week periods, compare last week vs previous weeks
|
|
1004
|
+
cutoff_days = 7
|
|
1005
|
+
else:
|
|
1006
|
+
# For longer periods, compare recent half vs older half
|
|
1007
|
+
cutoff_days = (weeks * 7) // 2
|
|
1008
|
+
|
|
1009
|
+
# Calculate cutoff timestamp
|
|
1010
|
+
latest_timestamp = safe_timestamp_key(sorted_commits[-1])
|
|
1011
|
+
cutoff_timestamp = latest_timestamp - timedelta(days=cutoff_days)
|
|
1012
|
+
|
|
1013
|
+
# Split commits into recent and previous periods
|
|
1014
|
+
recent_commits = [c for c in sorted_commits if safe_timestamp_key(c) >= cutoff_timestamp]
|
|
1015
|
+
previous_commits = [c for c in sorted_commits if safe_timestamp_key(c) < cutoff_timestamp]
|
|
1016
|
+
|
|
1017
|
+
if not recent_commits or not previous_commits:
|
|
1018
|
+
return {}
|
|
1019
|
+
|
|
1020
|
+
# Classify commits for both periods
|
|
1021
|
+
def get_period_classifications(period_commits):
|
|
1022
|
+
period_classifications = {}
|
|
1023
|
+
|
|
1024
|
+
# Import classifiers
|
|
1025
|
+
try:
|
|
1026
|
+
from ..extractors.ml_tickets import MLTicketExtractor
|
|
1027
|
+
extractor = MLTicketExtractor(enable_ml=True)
|
|
1028
|
+
except Exception:
|
|
1029
|
+
from ..extractors.tickets import TicketExtractor
|
|
1030
|
+
extractor = TicketExtractor()
|
|
1031
|
+
|
|
1032
|
+
for commit in period_commits:
|
|
1033
|
+
message = commit.get('message', '')
|
|
1034
|
+
files_changed = commit.get('files_changed', [])
|
|
1035
|
+
if isinstance(files_changed, int) or not isinstance(files_changed, list):
|
|
1036
|
+
files_changed = []
|
|
1037
|
+
|
|
1038
|
+
# Get ticket info for enhancement
|
|
1039
|
+
ticket_refs = commit.get('ticket_references', [])
|
|
1040
|
+
|
|
1041
|
+
if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
|
|
1042
|
+
try:
|
|
1043
|
+
result = extractor.categorize_commit_with_confidence(message, files_changed)
|
|
1044
|
+
category = result['category']
|
|
1045
|
+
category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
|
|
1046
|
+
except Exception:
|
|
1047
|
+
category = extractor.categorize_commit(message)
|
|
1048
|
+
else:
|
|
1049
|
+
category = extractor.categorize_commit(message)
|
|
1050
|
+
|
|
1051
|
+
if category not in period_classifications:
|
|
1052
|
+
period_classifications[category] = 0
|
|
1053
|
+
period_classifications[category] += 1
|
|
1054
|
+
|
|
1055
|
+
return period_classifications
|
|
1056
|
+
|
|
1057
|
+
recent_classifications = get_period_classifications(recent_commits)
|
|
1058
|
+
previous_classifications = get_period_classifications(previous_commits)
|
|
1059
|
+
|
|
1060
|
+
# Calculate percentage changes
|
|
1061
|
+
trends = {}
|
|
1062
|
+
all_categories = set(recent_classifications.keys()) | set(previous_classifications.keys())
|
|
1063
|
+
|
|
1064
|
+
total_recent = sum(recent_classifications.values())
|
|
1065
|
+
total_previous = sum(previous_classifications.values())
|
|
1066
|
+
|
|
1067
|
+
if total_recent == 0 or total_previous == 0:
|
|
1068
|
+
return {}
|
|
1069
|
+
|
|
1070
|
+
for category in all_categories:
|
|
1071
|
+
recent_count = recent_classifications.get(category, 0)
|
|
1072
|
+
previous_count = previous_classifications.get(category, 0)
|
|
1073
|
+
|
|
1074
|
+
recent_pct = (recent_count / total_recent) * 100
|
|
1075
|
+
previous_pct = (previous_count / total_previous) * 100
|
|
1076
|
+
|
|
1077
|
+
change = recent_pct - previous_pct
|
|
1078
|
+
|
|
1079
|
+
# Only include significant changes (>= 5% absolute change)
|
|
1080
|
+
if abs(change) >= 5.0:
|
|
1081
|
+
display_name = self._format_category_name(category)
|
|
1082
|
+
trends[display_name] = change
|
|
1083
|
+
|
|
1084
|
+
return trends
|
|
1085
|
+
|
|
1086
|
+
def _format_trend_line(self, trends: dict[str, float], prefix: str = "📈 Trends") -> str:
|
|
1087
|
+
"""Format trend data into a readable line with appropriate icons.
|
|
1088
|
+
|
|
1089
|
+
WHY: This method provides consistent formatting for trend display across
|
|
1090
|
+
different sections of the report, using visual indicators to highlight
|
|
1091
|
+
increases, decreases, and overall patterns.
|
|
1092
|
+
|
|
1093
|
+
Args:
|
|
1094
|
+
trends: Dictionary of category name to percentage change
|
|
1095
|
+
prefix: Text prefix for the trend line
|
|
1096
|
+
|
|
1097
|
+
Returns:
|
|
1098
|
+
Formatted trend line string, or empty string if no significant trends
|
|
1099
|
+
"""
|
|
1100
|
+
if not trends:
|
|
1101
|
+
return ""
|
|
1102
|
+
|
|
1103
|
+
# Sort by absolute change magnitude (largest first)
|
|
1104
|
+
sorted_trends = sorted(trends.items(), key=lambda x: abs(x[1]), reverse=True)
|
|
1105
|
+
|
|
1106
|
+
trend_parts = []
|
|
1107
|
+
for category, change in sorted_trends[:4]: # Show top 4 trends
|
|
1108
|
+
if change > 0:
|
|
1109
|
+
icon = "⬆️"
|
|
1110
|
+
sign = "+"
|
|
1111
|
+
else:
|
|
1112
|
+
icon = "⬇️"
|
|
1113
|
+
sign = ""
|
|
1114
|
+
|
|
1115
|
+
trend_parts.append(f"{category} {icon}{sign}{change:.0f}%")
|
|
1116
|
+
|
|
1117
|
+
if trend_parts:
|
|
1118
|
+
return f"{prefix}: {', '.join(trend_parts)}"
|
|
1119
|
+
|
|
1120
|
+
return ""
|
|
1121
|
+
|
|
1122
|
+
def _write_weekly_trend_lines(
|
|
1123
|
+
self,
|
|
1124
|
+
report: StringIO,
|
|
1125
|
+
weekly_trends: list[dict[str, Any]],
|
|
1126
|
+
prefix: str = ""
|
|
1127
|
+
) -> None:
|
|
1128
|
+
"""Write weekly trend lines showing week-by-week classification changes.
|
|
106
1129
|
|
|
1130
|
+
WHY: This method provides detailed weekly breakdown of work patterns,
|
|
1131
|
+
showing how development focus shifts over time with specific percentages
|
|
1132
|
+
and change indicators from previous weeks. Shows ALL weeks in the analysis
|
|
1133
|
+
period, including weeks with no activity for complete timeline coverage.
|
|
1134
|
+
|
|
1135
|
+
Args:
|
|
1136
|
+
report: StringIO buffer to write to
|
|
1137
|
+
weekly_trends: List of weekly classification data (all weeks in period)
|
|
1138
|
+
prefix: Optional prefix for the trend section (e.g., "Project ")
|
|
1139
|
+
"""
|
|
1140
|
+
if not weekly_trends:
|
|
1141
|
+
return
|
|
1142
|
+
|
|
1143
|
+
report.write(f"- {prefix}Weekly Trends:\n")
|
|
1144
|
+
|
|
1145
|
+
for i, week_data in enumerate(weekly_trends):
|
|
1146
|
+
week_display = week_data['week_display']
|
|
1147
|
+
classifications = week_data['classifications']
|
|
1148
|
+
changes = week_data['changes']
|
|
1149
|
+
has_activity = week_data.get('has_activity', True)
|
|
1150
|
+
|
|
1151
|
+
# Get additional data from week_data
|
|
1152
|
+
classification_counts = week_data.get('classification_counts', {})
|
|
1153
|
+
total_commits = week_data.get('total_commits', 0)
|
|
1154
|
+
commits_with_tickets = week_data.get('commits_with_tickets', 0)
|
|
1155
|
+
ticket_coverage = week_data.get('ticket_coverage', 0)
|
|
1156
|
+
activity_score = week_data.get('activity_score', 0.0)
|
|
1157
|
+
|
|
1158
|
+
# Handle weeks with no activity
|
|
1159
|
+
if not classifications and not has_activity:
|
|
1160
|
+
report.write(f" - Week {i+1} ({week_display}): No activity\n")
|
|
1161
|
+
continue
|
|
1162
|
+
elif not classifications:
|
|
1163
|
+
# Should not happen, but handle gracefully
|
|
1164
|
+
continue
|
|
1165
|
+
|
|
1166
|
+
# Format classifications with absolute numbers and percentages
|
|
1167
|
+
classification_parts = []
|
|
1168
|
+
for category in sorted(classifications.keys()):
|
|
1169
|
+
percentage = classifications[category]
|
|
1170
|
+
|
|
1171
|
+
# Find the count for this formatted category name by reverse mapping
|
|
1172
|
+
count = 0
|
|
1173
|
+
for raw_category, raw_count in classification_counts.items():
|
|
1174
|
+
if self._format_category_name(raw_category) == category:
|
|
1175
|
+
count = raw_count
|
|
1176
|
+
break
|
|
1177
|
+
|
|
1178
|
+
change = changes.get(category, 0.0)
|
|
1179
|
+
|
|
1180
|
+
if i == 0 or abs(change) < 1.0:
|
|
1181
|
+
# First week or no significant change - show count and percentage
|
|
1182
|
+
classification_parts.append(f"{category} {count} ({percentage:.0f}%)")
|
|
1183
|
+
else:
|
|
1184
|
+
# Show change from previous week
|
|
1185
|
+
change_indicator = f"(+{change:.0f}%)" if change > 0 else f"({change:.0f}%)"
|
|
1186
|
+
classification_parts.append(f"{category} {count} ({percentage:.0f}% {change_indicator})")
|
|
1187
|
+
|
|
1188
|
+
if classification_parts:
|
|
1189
|
+
classifications_text = ", ".join(classification_parts)
|
|
1190
|
+
# Add total commits, ticket coverage, and activity score to the week summary
|
|
1191
|
+
if total_commits > 0:
|
|
1192
|
+
ticket_info = f" | {commits_with_tickets}/{total_commits} tickets ({ticket_coverage:.0f}%)" if commits_with_tickets > 0 else f" | 0/{total_commits} tickets (0%)"
|
|
1193
|
+
activity_info = f" | Activity: {activity_score:.1f}/100"
|
|
1194
|
+
report.write(f" - Week {i+1} ({week_display}): {classifications_text}{ticket_info}{activity_info}\n")
|
|
1195
|
+
else:
|
|
1196
|
+
report.write(f" - Week {i+1} ({week_display}): {classifications_text}\n")
|
|
1197
|
+
else:
|
|
1198
|
+
# Fallback in case classifications exist but are empty
|
|
1199
|
+
report.write(f" - Week {i+1} ({week_display}): No significant activity\n")
|
|
1200
|
+
|
|
1201
|
+
# Add a blank line after trend lines for spacing
|
|
1202
|
+
# (Note: Don't add extra newline here as the caller will handle spacing)
|
|
1203
|
+
|
|
1204
|
+
def _write_team_composition(
|
|
1205
|
+
self,
|
|
1206
|
+
report: StringIO,
|
|
1207
|
+
developer_stats: list[dict[str, Any]],
|
|
1208
|
+
focus_data: list[dict[str, Any]],
|
|
1209
|
+
commits: list[dict[str, Any]] = None,
|
|
1210
|
+
prs: list[dict[str, Any]] = None,
|
|
1211
|
+
ticket_analysis: dict[str, Any] = None,
|
|
1212
|
+
weeks: int = 4,
|
|
1213
|
+
) -> None:
|
|
1214
|
+
"""Write team composition analysis with activity scores and commit classifications.
|
|
1215
|
+
|
|
1216
|
+
WHY: Enhanced team composition shows not just how much each developer commits,
|
|
1217
|
+
but what types of work they're doing. This provides actionable insights into
|
|
1218
|
+
developer specializations, training needs, and work distribution patterns.
|
|
1219
|
+
"""
|
|
1220
|
+
report.write("### Developer Profiles\n\n")
|
|
1221
|
+
|
|
107
1222
|
# Create developer lookup for focus data
|
|
108
|
-
focus_lookup = {d[
|
|
1223
|
+
focus_lookup = {d["developer"]: d for d in focus_data}
|
|
1224
|
+
|
|
1225
|
+
# Calculate activity scores for all developers
|
|
1226
|
+
activity_scores = {}
|
|
1227
|
+
dev_metrics = {} # Initialize outside if block to ensure it's always defined
|
|
1228
|
+
|
|
1229
|
+
if commits:
|
|
1230
|
+
# Aggregate metrics by developer
|
|
1231
|
+
for commit in commits:
|
|
1232
|
+
canonical_id = commit.get("canonical_id", "")
|
|
1233
|
+
if canonical_id not in dev_metrics:
|
|
1234
|
+
dev_metrics[canonical_id] = {
|
|
1235
|
+
"commits": 0,
|
|
1236
|
+
"lines_added": 0,
|
|
1237
|
+
"lines_removed": 0,
|
|
1238
|
+
"files_changed": set(),
|
|
1239
|
+
"complexity_delta": 0,
|
|
1240
|
+
"prs_involved": 0,
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
metrics = dev_metrics[canonical_id]
|
|
1244
|
+
metrics["commits"] += 1
|
|
1245
|
+
metrics["lines_added"] += commit.get(
|
|
1246
|
+
"filtered_insertions", commit.get("insertions", 0)
|
|
1247
|
+
) or 0
|
|
1248
|
+
metrics["lines_removed"] += commit.get(
|
|
1249
|
+
"filtered_deletions", commit.get("deletions", 0)
|
|
1250
|
+
) or 0
|
|
1251
|
+
metrics["complexity_delta"] += commit.get("complexity_delta", 0) or 0
|
|
1252
|
+
|
|
1253
|
+
# Track unique files
|
|
1254
|
+
files = commit.get("files_changed", [])
|
|
1255
|
+
if isinstance(files, list):
|
|
1256
|
+
# Only update if metrics["files_changed"] is still a set
|
|
1257
|
+
if isinstance(metrics["files_changed"], set):
|
|
1258
|
+
metrics["files_changed"].update(files)
|
|
1259
|
+
else:
|
|
1260
|
+
# If it's already an int, convert back to set and update
|
|
1261
|
+
metrics["files_changed"] = set()
|
|
1262
|
+
metrics["files_changed"].update(files)
|
|
1263
|
+
elif isinstance(files, int):
|
|
1264
|
+
# If it's already aggregated, just add the count
|
|
1265
|
+
if isinstance(metrics["files_changed"], set):
|
|
1266
|
+
metrics["files_changed"] = len(metrics["files_changed"]) + files
|
|
1267
|
+
else:
|
|
1268
|
+
metrics["files_changed"] += files
|
|
1269
|
+
|
|
1270
|
+
# Count PRs per developer
|
|
1271
|
+
if prs:
|
|
1272
|
+
for pr in prs:
|
|
1273
|
+
author = pr.get("author", "")
|
|
1274
|
+
# Map PR author to canonical ID - need to look up in developer_stats
|
|
1275
|
+
for dev in developer_stats:
|
|
1276
|
+
if (
|
|
1277
|
+
dev.get("github_username") == author
|
|
1278
|
+
or dev.get("primary_name") == author
|
|
1279
|
+
):
|
|
1280
|
+
canonical_id = dev.get("canonical_id")
|
|
1281
|
+
if canonical_id in dev_metrics:
|
|
1282
|
+
dev_metrics[canonical_id]["prs_involved"] += 1
|
|
1283
|
+
break
|
|
1284
|
+
|
|
1285
|
+
# Calculate scores
|
|
1286
|
+
raw_scores_for_curve = {}
|
|
1287
|
+
for canonical_id, metrics in dev_metrics.items():
|
|
1288
|
+
# Convert set to count
|
|
1289
|
+
if isinstance(metrics["files_changed"], set):
|
|
1290
|
+
metrics["files_changed"] = len(metrics["files_changed"])
|
|
1291
|
+
|
|
1292
|
+
score_result = self.activity_scorer.calculate_activity_score(metrics)
|
|
1293
|
+
activity_scores[canonical_id] = score_result
|
|
1294
|
+
raw_scores_for_curve[canonical_id] = score_result["raw_score"]
|
|
1295
|
+
|
|
1296
|
+
# Apply curve normalization
|
|
1297
|
+
curve_normalized = self.activity_scorer.normalize_scores_on_curve(raw_scores_for_curve)
|
|
1298
|
+
|
|
1299
|
+
# Update activity scores with curve data
|
|
1300
|
+
for canonical_id, curve_data in curve_normalized.items():
|
|
1301
|
+
if canonical_id in activity_scores:
|
|
1302
|
+
activity_scores[canonical_id]["curve_data"] = curve_data
|
|
1303
|
+
|
|
1304
|
+
# Calculate team scores for relative ranking
|
|
1305
|
+
all_scores = [score["raw_score"] for score in activity_scores.values()]
|
|
1306
|
+
|
|
1307
|
+
# Consolidate developer_stats by canonical_id to avoid duplicates from identity aliasing
|
|
1308
|
+
consolidated_devs = {}
|
|
1309
|
+
for dev in developer_stats:
|
|
1310
|
+
canonical_id = dev.get("canonical_id")
|
|
1311
|
+
if canonical_id and canonical_id not in consolidated_devs:
|
|
1312
|
+
consolidated_devs[canonical_id] = dev
|
|
1313
|
+
|
|
1314
|
+
# BUGFIX: Only include developers who have commits in the analysis period
|
|
1315
|
+
# Filter using dev_metrics (period-specific) instead of developer_stats (all-time)
|
|
1316
|
+
active_devs = {}
|
|
109
1317
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
commits
|
|
1318
|
+
# Only process developers if we have commit data for the period
|
|
1319
|
+
for canonical_id, dev in consolidated_devs.items():
|
|
1320
|
+
# Only include developers who have commits in the current analysis period
|
|
1321
|
+
if canonical_id in dev_metrics:
|
|
1322
|
+
active_devs[canonical_id] = dev
|
|
1323
|
+
# If no commits in period, no developers will be shown
|
|
1324
|
+
# (This handles the case where all commits are outside the analysis period)
|
|
1325
|
+
|
|
1326
|
+
for canonical_id, dev in active_devs.items(): # Only developers with commits in period
|
|
1327
|
+
# Handle both 'primary_name' (production) and 'name' (tests) for backward compatibility
|
|
1328
|
+
name = dev.get("primary_name", dev.get("name", "Unknown Developer"))
|
|
113
1329
|
|
|
1330
|
+
# BUGFIX: Use period-specific commit count instead of all-time total
|
|
1331
|
+
# Safety check: dev_metrics should exist if we got here, but be defensive
|
|
1332
|
+
if canonical_id in dev_metrics:
|
|
1333
|
+
period_commits = dev_metrics[canonical_id]["commits"]
|
|
1334
|
+
total_commits = period_commits # For backward compatibility with existing logic
|
|
1335
|
+
else:
|
|
1336
|
+
# Fallback (shouldn't happen with the filtering above)
|
|
1337
|
+
total_commits = 0
|
|
1338
|
+
|
|
114
1339
|
report.write(f"**{name}**\n")
|
|
115
|
-
report.write(f"- Commits: {commits}\n")
|
|
116
1340
|
|
|
1341
|
+
# Try to get commit classification breakdown if available
|
|
1342
|
+
if ticket_analysis:
|
|
1343
|
+
classifications = self._aggregate_commit_classifications(
|
|
1344
|
+
ticket_analysis, commits, developer_stats
|
|
1345
|
+
)
|
|
1346
|
+
dev_classifications = classifications.get(canonical_id, {})
|
|
1347
|
+
|
|
1348
|
+
if dev_classifications:
|
|
1349
|
+
# Sort categories by count (descending)
|
|
1350
|
+
sorted_categories = sorted(
|
|
1351
|
+
dev_classifications.items(),
|
|
1352
|
+
key=lambda x: x[1],
|
|
1353
|
+
reverse=True
|
|
1354
|
+
)
|
|
1355
|
+
|
|
1356
|
+
# Format as "Features: 15 (45%), Bug Fixes: 8 (24%), etc."
|
|
1357
|
+
total_classified = sum(dev_classifications.values())
|
|
1358
|
+
if total_classified > 0:
|
|
1359
|
+
category_parts = []
|
|
1360
|
+
for category, count in sorted_categories:
|
|
1361
|
+
pct = (count / total_classified) * 100
|
|
1362
|
+
display_name = self._format_category_name(category)
|
|
1363
|
+
category_parts.append(f"{display_name}: {count} ({pct:.0f}%)")
|
|
1364
|
+
|
|
1365
|
+
# Show top categories (limit to avoid excessive length)
|
|
1366
|
+
max_categories = 5
|
|
1367
|
+
if len(category_parts) > max_categories:
|
|
1368
|
+
shown_parts = category_parts[:max_categories]
|
|
1369
|
+
remaining = len(category_parts) - max_categories
|
|
1370
|
+
shown_parts.append(f"({remaining} more)")
|
|
1371
|
+
category_display = ", ".join(shown_parts)
|
|
1372
|
+
else:
|
|
1373
|
+
category_display = ", ".join(category_parts)
|
|
1374
|
+
|
|
1375
|
+
# Calculate ticket coverage for this developer
|
|
1376
|
+
ticket_coverage_pct = dev.get("ticket_coverage_pct", 0)
|
|
1377
|
+
report.write(f"- Commits: {category_display}\n")
|
|
1378
|
+
report.write(f"- Ticket Coverage: {ticket_coverage_pct:.1f}%\n")
|
|
1379
|
+
|
|
1380
|
+
# Add weekly trend lines if available
|
|
1381
|
+
if commits:
|
|
1382
|
+
weekly_trends = self._calculate_weekly_classification_percentages(
|
|
1383
|
+
commits, developer_id=canonical_id, weeks=weeks,
|
|
1384
|
+
analysis_start_date=self._analysis_start_date,
|
|
1385
|
+
analysis_end_date=self._analysis_end_date
|
|
1386
|
+
)
|
|
1387
|
+
if weekly_trends:
|
|
1388
|
+
self._write_weekly_trend_lines(report, weekly_trends)
|
|
1389
|
+
else:
|
|
1390
|
+
# Fallback to simple trend analysis
|
|
1391
|
+
trends = self._calculate_classification_trends(
|
|
1392
|
+
commits, developer_id=canonical_id, weeks=weeks
|
|
1393
|
+
)
|
|
1394
|
+
trend_line = self._format_trend_line(trends)
|
|
1395
|
+
if trend_line:
|
|
1396
|
+
report.write(f"- {trend_line}\n")
|
|
1397
|
+
else:
|
|
1398
|
+
# Fallback to simple count if no classifications
|
|
1399
|
+
ticket_coverage_pct = dev.get("ticket_coverage_pct", 0)
|
|
1400
|
+
report.write(f"- Commits: {total_commits}\n")
|
|
1401
|
+
report.write(f"- Ticket Coverage: {ticket_coverage_pct:.1f}%\n")
|
|
1402
|
+
|
|
1403
|
+
# Still try to add weekly trend lines for simple commits
|
|
1404
|
+
if commits:
|
|
1405
|
+
weekly_trends = self._calculate_weekly_classification_percentages(
|
|
1406
|
+
commits, developer_id=canonical_id, weeks=weeks,
|
|
1407
|
+
analysis_start_date=self._analysis_start_date,
|
|
1408
|
+
analysis_end_date=self._analysis_end_date
|
|
1409
|
+
)
|
|
1410
|
+
if weekly_trends:
|
|
1411
|
+
self._write_weekly_trend_lines(report, weekly_trends)
|
|
1412
|
+
else:
|
|
1413
|
+
# Fallback to simple trend analysis
|
|
1414
|
+
trends = self._calculate_classification_trends(
|
|
1415
|
+
commits, developer_id=canonical_id, weeks=weeks
|
|
1416
|
+
)
|
|
1417
|
+
trend_line = self._format_trend_line(trends)
|
|
1418
|
+
if trend_line:
|
|
1419
|
+
report.write(f"- {trend_line}\n")
|
|
1420
|
+
else:
|
|
1421
|
+
# Fallback to simple count if no classification data for this developer
|
|
1422
|
+
ticket_coverage_pct = dev.get("ticket_coverage_pct", 0)
|
|
1423
|
+
report.write(f"- Commits: {total_commits}\n")
|
|
1424
|
+
report.write(f"- Ticket Coverage: {ticket_coverage_pct:.1f}%\n")
|
|
1425
|
+
|
|
1426
|
+
# Still try to add weekly trend lines
|
|
1427
|
+
if commits:
|
|
1428
|
+
weekly_trends = self._calculate_weekly_classification_percentages(
|
|
1429
|
+
commits, developer_id=canonical_id, weeks=weeks,
|
|
1430
|
+
analysis_start_date=self._analysis_start_date,
|
|
1431
|
+
analysis_end_date=self._analysis_end_date
|
|
1432
|
+
)
|
|
1433
|
+
if weekly_trends:
|
|
1434
|
+
self._write_weekly_trend_lines(report, weekly_trends)
|
|
1435
|
+
else:
|
|
1436
|
+
# Fallback to simple trend analysis
|
|
1437
|
+
trends = self._calculate_classification_trends(
|
|
1438
|
+
commits, developer_id=canonical_id, weeks=weeks
|
|
1439
|
+
)
|
|
1440
|
+
trend_line = self._format_trend_line(trends)
|
|
1441
|
+
if trend_line:
|
|
1442
|
+
report.write(f"- {trend_line}\n")
|
|
1443
|
+
else:
|
|
1444
|
+
# Fallback to simple count if no ticket analysis available
|
|
1445
|
+
report.write(f"- Commits: {total_commits}\n")
|
|
1446
|
+
# No ticket coverage info available in this case
|
|
1447
|
+
|
|
1448
|
+
# Still try to add weekly trend lines if commits available
|
|
1449
|
+
if commits:
|
|
1450
|
+
weekly_trends = self._calculate_weekly_classification_percentages(
|
|
1451
|
+
commits, developer_id=canonical_id, weeks=weeks,
|
|
1452
|
+
analysis_start_date=self._analysis_start_date,
|
|
1453
|
+
analysis_end_date=self._analysis_end_date
|
|
1454
|
+
)
|
|
1455
|
+
if weekly_trends:
|
|
1456
|
+
self._write_weekly_trend_lines(report, weekly_trends)
|
|
1457
|
+
else:
|
|
1458
|
+
# Fallback to simple trend analysis
|
|
1459
|
+
trends = self._calculate_classification_trends(
|
|
1460
|
+
commits, developer_id=canonical_id, weeks=weeks
|
|
1461
|
+
)
|
|
1462
|
+
trend_line = self._format_trend_line(trends)
|
|
1463
|
+
if trend_line:
|
|
1464
|
+
report.write(f"- {trend_line}\n")
|
|
1465
|
+
|
|
1466
|
+
# Add activity score if available
|
|
1467
|
+
if canonical_id and canonical_id in activity_scores:
|
|
1468
|
+
score_data = activity_scores[canonical_id]
|
|
1469
|
+
|
|
1470
|
+
# Use curve data if available, otherwise fall back to relative scoring
|
|
1471
|
+
if "curve_data" in score_data:
|
|
1472
|
+
curve_data = score_data["curve_data"]
|
|
1473
|
+
report.write(
|
|
1474
|
+
f"- Activity Score: {curve_data['curved_score']:.1f}/100 "
|
|
1475
|
+
f"({curve_data['activity_level']}, {curve_data['level_description']})\n"
|
|
1476
|
+
)
|
|
1477
|
+
else:
|
|
1478
|
+
relative_data = self.activity_scorer.calculate_team_relative_score(
|
|
1479
|
+
score_data["raw_score"], all_scores
|
|
1480
|
+
)
|
|
1481
|
+
report.write(
|
|
1482
|
+
f"- Activity Score: {score_data['normalized_score']:.1f}/100 "
|
|
1483
|
+
f"({score_data['activity_level']}, {relative_data['percentile']:.0f}th percentile)\n"
|
|
1484
|
+
)
|
|
1485
|
+
|
|
117
1486
|
# Add focus data if available
|
|
118
1487
|
if name in focus_lookup:
|
|
119
1488
|
focus = focus_lookup[name]
|
|
120
|
-
|
|
121
|
-
|
|
1489
|
+
|
|
1490
|
+
# Get all projects for this developer - check for both naming patterns
|
|
1491
|
+
project_percentages = []
|
|
1492
|
+
|
|
1493
|
+
# First try the _dev_pct pattern - use 0.05 threshold to include small percentages but filter out noise
|
|
1494
|
+
for key in focus:
|
|
1495
|
+
if key.endswith("_dev_pct") and focus[key] > 0.05:
|
|
1496
|
+
project_name = key.replace("_dev_pct", "")
|
|
1497
|
+
project_percentages.append((project_name, focus[key]))
|
|
1498
|
+
|
|
1499
|
+
# If no _dev_pct found, try _pct pattern
|
|
1500
|
+
if not project_percentages:
|
|
1501
|
+
for key in focus:
|
|
1502
|
+
if (
|
|
1503
|
+
key.endswith("_pct")
|
|
1504
|
+
and not key.startswith("primary_")
|
|
1505
|
+
and focus[key] > 0.05
|
|
1506
|
+
):
|
|
1507
|
+
project_name = key.replace("_pct", "")
|
|
1508
|
+
project_percentages.append((project_name, focus[key]))
|
|
1509
|
+
|
|
1510
|
+
# Sort by percentage descending
|
|
1511
|
+
project_percentages.sort(key=lambda x: x[1], reverse=True)
|
|
1512
|
+
|
|
1513
|
+
# Build projects string - show all projects above threshold with percentages
|
|
1514
|
+
if project_percentages:
|
|
1515
|
+
projects_str = ", ".join(
|
|
1516
|
+
f"{proj} ({pct:.1f}%)" for proj, pct in project_percentages
|
|
1517
|
+
)
|
|
1518
|
+
report.write(f"- Projects: {projects_str}\n")
|
|
1519
|
+
else:
|
|
1520
|
+
# Fallback to primary project if no percentage fields found above threshold
|
|
1521
|
+
primary_project = focus.get("primary_project", "UNKNOWN")
|
|
1522
|
+
primary_pct = focus.get("primary_project_pct", 0)
|
|
1523
|
+
if primary_pct > 0.05: # Apply same threshold to fallback
|
|
1524
|
+
report.write(f"- Projects: {primary_project} ({primary_pct:.1f}%)\n")
|
|
1525
|
+
else:
|
|
1526
|
+
# If even primary project is below threshold, show it anyway to avoid empty projects
|
|
1527
|
+
report.write(f"- Projects: {primary_project} ({primary_pct:.1f}%)\n")
|
|
1528
|
+
|
|
122
1529
|
report.write(f"- Work Style: {focus['work_style']}\n")
|
|
123
1530
|
report.write(f"- Active Pattern: {focus['time_pattern']}\n")
|
|
124
|
-
|
|
1531
|
+
|
|
125
1532
|
report.write("\n")
|
|
126
|
-
|
|
127
|
-
def _write_project_activity(
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
1533
|
+
|
|
1534
|
+
def _write_project_activity(
|
|
1535
|
+
self, report: StringIO, activity_dist: list[dict[str, Any]], commits: list[dict[str, Any]],
|
|
1536
|
+
branch_health_metrics: dict[str, dict[str, Any]] = None,
|
|
1537
|
+
ticket_analysis: dict[str, Any] = None,
|
|
1538
|
+
weeks: int = 4
|
|
1539
|
+
) -> None:
|
|
1540
|
+
"""Write project activity breakdown with commit classifications.
|
|
1541
|
+
|
|
1542
|
+
WHY: Enhanced project activity section now includes commit classification
|
|
1543
|
+
breakdown per project, providing insights into what types of work are
|
|
1544
|
+
happening in each project (features, bug fixes, refactoring, etc.).
|
|
1545
|
+
This helps identify project-specific development patterns.
|
|
1546
|
+
"""
|
|
1547
|
+
# Aggregate by project with developer details
|
|
1548
|
+
project_totals: dict[str, dict[str, Any]] = {}
|
|
1549
|
+
project_developers: dict[str, dict[str, int]] = {}
|
|
1550
|
+
|
|
132
1551
|
for row in activity_dist:
|
|
133
|
-
|
|
1552
|
+
# Handle missing fields gracefully for test compatibility
|
|
1553
|
+
project = row.get("project", "UNKNOWN")
|
|
1554
|
+
developer = row.get("developer", "Unknown Developer")
|
|
1555
|
+
|
|
134
1556
|
if project not in project_totals:
|
|
135
|
-
project_totals[project] = {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
'developers': set()
|
|
139
|
-
}
|
|
1557
|
+
project_totals[project] = {"commits": 0, "lines": 0, "developers": set()}
|
|
1558
|
+
project_developers[project] = {}
|
|
1559
|
+
|
|
140
1560
|
data = project_totals[project]
|
|
141
|
-
|
|
142
|
-
data[
|
|
143
|
-
|
|
144
|
-
developers_set
|
|
145
|
-
|
|
1561
|
+
# Handle missing fields gracefully for test compatibility
|
|
1562
|
+
data["commits"] += row.get("commits", 1) # Default to 1 if missing
|
|
1563
|
+
data["lines"] += row.get("lines_changed", 0)
|
|
1564
|
+
developers_set: set[str] = data["developers"]
|
|
1565
|
+
developers_set.add(developer)
|
|
1566
|
+
|
|
1567
|
+
# Track commits per developer per project
|
|
1568
|
+
if developer not in project_developers[project]:
|
|
1569
|
+
project_developers[project][developer] = 0
|
|
1570
|
+
project_developers[project][developer] += row.get(
|
|
1571
|
+
"commits", 1
|
|
1572
|
+
) # Default to 1 if missing
|
|
1573
|
+
|
|
146
1574
|
# Sort by commits
|
|
147
|
-
sorted_projects = sorted(
|
|
148
|
-
|
|
149
|
-
|
|
1575
|
+
sorted_projects = sorted(
|
|
1576
|
+
project_totals.items(), key=lambda x: x[1]["commits"], reverse=True
|
|
1577
|
+
)
|
|
1578
|
+
|
|
1579
|
+
# Calculate total commits across all projects in activity distribution
|
|
1580
|
+
total_activity_commits = sum(data["commits"] for data in project_totals.values())
|
|
1581
|
+
|
|
150
1582
|
report.write("### Activity by Project\n\n")
|
|
151
1583
|
for project, data in sorted_projects:
|
|
152
1584
|
report.write(f"**{project}**\n")
|
|
153
1585
|
report.write(f"- Commits: {data['commits']} ")
|
|
154
|
-
report.write(f"({data['commits'] /
|
|
1586
|
+
report.write(f"({data['commits'] / total_activity_commits * 100:.1f}% of total)\n")
|
|
155
1587
|
report.write(f"- Lines Changed: {data['lines']:,}\n")
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
1588
|
+
|
|
1589
|
+
# Get developer contributions for this project
|
|
1590
|
+
dev_contributions = project_developers[project]
|
|
1591
|
+
# Sort by commits descending
|
|
1592
|
+
sorted_devs = sorted(dev_contributions.items(), key=lambda x: x[1], reverse=True)
|
|
1593
|
+
|
|
1594
|
+
# Build contributors string
|
|
1595
|
+
contributors = []
|
|
1596
|
+
for dev_name, dev_commits in sorted_devs:
|
|
1597
|
+
dev_pct = dev_commits / data["commits"] * 100
|
|
1598
|
+
contributors.append(f"{dev_name} ({dev_pct:.1f}%)")
|
|
1599
|
+
|
|
1600
|
+
contributors_str = ", ".join(contributors)
|
|
1601
|
+
report.write(f"- Contributors: {contributors_str}\n")
|
|
1602
|
+
|
|
1603
|
+
# Add commit classification breakdown for this project
|
|
1604
|
+
if ticket_analysis:
|
|
1605
|
+
project_classifications = self._get_project_classifications(project, commits, ticket_analysis)
|
|
1606
|
+
if project_classifications:
|
|
1607
|
+
# Sort categories by count (descending)
|
|
1608
|
+
sorted_categories = sorted(
|
|
1609
|
+
project_classifications.items(),
|
|
1610
|
+
key=lambda x: x[1],
|
|
1611
|
+
reverse=True
|
|
1612
|
+
)
|
|
1613
|
+
|
|
1614
|
+
# Calculate total for percentages
|
|
1615
|
+
total_classified = sum(project_classifications.values())
|
|
1616
|
+
if total_classified > 0:
|
|
1617
|
+
category_parts = []
|
|
1618
|
+
for category, count in sorted_categories:
|
|
1619
|
+
pct = (count / total_classified) * 100
|
|
1620
|
+
display_name = self._format_category_name(category)
|
|
1621
|
+
category_parts.append(f"{display_name}: {count} ({pct:.0f}%)")
|
|
1622
|
+
|
|
1623
|
+
# Show top categories to avoid excessive length
|
|
1624
|
+
max_categories = 4
|
|
1625
|
+
if len(category_parts) > max_categories:
|
|
1626
|
+
shown_parts = category_parts[:max_categories]
|
|
1627
|
+
remaining = len(category_parts) - max_categories
|
|
1628
|
+
shown_parts.append(f"({remaining} more)")
|
|
1629
|
+
category_display = ", ".join(shown_parts)
|
|
1630
|
+
else:
|
|
1631
|
+
category_display = ", ".join(category_parts)
|
|
1632
|
+
|
|
1633
|
+
report.write(f"- Classifications: {category_display}\n")
|
|
1634
|
+
|
|
1635
|
+
# Add project-level weekly trend lines
|
|
1636
|
+
if commits:
|
|
1637
|
+
project_weekly_trends = self._calculate_weekly_classification_percentages(
|
|
1638
|
+
commits, project_key=project, weeks=weeks,
|
|
1639
|
+
analysis_start_date=self._analysis_start_date,
|
|
1640
|
+
analysis_end_date=self._analysis_end_date
|
|
1641
|
+
)
|
|
1642
|
+
if project_weekly_trends:
|
|
1643
|
+
self._write_weekly_trend_lines(report, project_weekly_trends, "Project ")
|
|
1644
|
+
else:
|
|
1645
|
+
# Fallback to simple project trend analysis
|
|
1646
|
+
project_trends = self._calculate_classification_trends(
|
|
1647
|
+
commits, project_key=project, weeks=weeks
|
|
1648
|
+
)
|
|
1649
|
+
project_trend_line = self._format_trend_line(
|
|
1650
|
+
project_trends, prefix="📊 Weekly Trend"
|
|
1651
|
+
)
|
|
1652
|
+
if project_trend_line:
|
|
1653
|
+
report.write(f"- {project_trend_line}\n")
|
|
1654
|
+
|
|
1655
|
+
# Add branch health for this project/repository if available
|
|
1656
|
+
if branch_health_metrics and project in branch_health_metrics:
|
|
1657
|
+
repo_health = branch_health_metrics[project]
|
|
1658
|
+
summary = repo_health.get("summary", {})
|
|
1659
|
+
health_indicators = repo_health.get("health_indicators", {})
|
|
1660
|
+
branches = repo_health.get("branches", [])
|
|
1661
|
+
|
|
1662
|
+
health_score = health_indicators.get("overall_health_score", 0)
|
|
1663
|
+
total_branches = summary.get("total_branches", 0)
|
|
1664
|
+
stale_branches = summary.get("stale_branches", 0)
|
|
1665
|
+
active_branches = summary.get("active_branches", 0)
|
|
1666
|
+
long_lived_branches = summary.get("long_lived_branches", 0)
|
|
1667
|
+
|
|
1668
|
+
# Determine health status
|
|
1669
|
+
if health_score >= 80:
|
|
1670
|
+
status_emoji = "🟢"
|
|
1671
|
+
status_text = "Excellent"
|
|
1672
|
+
elif health_score >= 60:
|
|
1673
|
+
status_emoji = "🟡"
|
|
1674
|
+
status_text = "Good"
|
|
1675
|
+
elif health_score >= 40:
|
|
1676
|
+
status_emoji = "🟠"
|
|
1677
|
+
status_text = "Fair"
|
|
1678
|
+
else:
|
|
1679
|
+
status_emoji = "🔴"
|
|
1680
|
+
status_text = "Needs Attention"
|
|
1681
|
+
|
|
1682
|
+
report.write("\n**Branch Management**\n")
|
|
1683
|
+
report.write(f"- Overall Health: {status_emoji} {status_text} ({health_score:.0f}/100)\n")
|
|
1684
|
+
report.write(f"- Total Branches: {total_branches}\n")
|
|
1685
|
+
report.write(f" - Active: {active_branches} branches\n")
|
|
1686
|
+
report.write(f" - Long-lived: {long_lived_branches} branches (>30 days)\n")
|
|
1687
|
+
report.write(f" - Stale: {stale_branches} branches (>90 days)\n")
|
|
1688
|
+
|
|
1689
|
+
# Show top problematic branches if any
|
|
1690
|
+
if branches:
|
|
1691
|
+
# Sort branches by health score (ascending) to get worst first
|
|
1692
|
+
problem_branches = [b for b in branches if b.get("health_score", 100) < 60 and not b.get("is_merged", False)]
|
|
1693
|
+
problem_branches.sort(key=lambda x: x.get("health_score", 100))
|
|
1694
|
+
|
|
1695
|
+
if problem_branches:
|
|
1696
|
+
report.write("\n**Branches Needing Attention**:\n")
|
|
1697
|
+
for i, branch in enumerate(problem_branches[:3]): # Show top 3
|
|
1698
|
+
name = branch.get("name", "unknown")
|
|
1699
|
+
age = branch.get("age_days", 0)
|
|
1700
|
+
behind = branch.get("behind_main", 0)
|
|
1701
|
+
ahead = branch.get("ahead_of_main", 0)
|
|
1702
|
+
score = branch.get("health_score", 0)
|
|
1703
|
+
|
|
1704
|
+
report.write(f" {i+1}. `{name}` (score: {score:.0f}/100)\n")
|
|
1705
|
+
report.write(f" - Age: {age} days\n")
|
|
1706
|
+
if behind > 0:
|
|
1707
|
+
report.write(f" - Behind main: {behind} commits\n")
|
|
1708
|
+
if ahead > 0:
|
|
1709
|
+
report.write(f" - Ahead of main: {ahead} commits\n")
|
|
1710
|
+
|
|
1711
|
+
# Add recommendations
|
|
1712
|
+
recommendations = repo_health.get("recommendations", [])
|
|
1713
|
+
if recommendations:
|
|
1714
|
+
report.write("\n**Recommended Actions**:\n")
|
|
1715
|
+
for rec in recommendations[:3]: # Show top 3 recommendations
|
|
1716
|
+
report.write(f"- {rec}\n")
|
|
1717
|
+
|
|
1718
|
+
report.write("\n")
|
|
1719
|
+
|
|
1720
|
+
def _get_week_start(self, date: datetime) -> datetime:
|
|
1721
|
+
"""Get Monday of the week for a given date."""
|
|
1722
|
+
# Ensure consistent timezone handling - keep timezone info
|
|
1723
|
+
if hasattr(date, "tzinfo") and date.tzinfo is not None:
|
|
1724
|
+
# Keep timezone-aware but ensure it's UTC
|
|
1725
|
+
if date.tzinfo != timezone.utc:
|
|
1726
|
+
date = date.astimezone(timezone.utc)
|
|
1727
|
+
else:
|
|
1728
|
+
# Convert naive datetime to UTC timezone-aware
|
|
1729
|
+
date = date.replace(tzinfo=timezone.utc)
|
|
1730
|
+
|
|
1731
|
+
days_since_monday = date.weekday()
|
|
1732
|
+
monday = date - timedelta(days=days_since_monday)
|
|
1733
|
+
result = monday.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
1734
|
+
|
|
1735
|
+
return result
|
|
1736
|
+
|
|
1737
|
+
def _write_development_patterns(
|
|
1738
|
+
self, report: StringIO, insights: list[dict[str, Any]], focus_data: list[dict[str, Any]]
|
|
1739
|
+
) -> None:
|
|
160
1740
|
"""Write development patterns analysis."""
|
|
161
1741
|
report.write("### Key Patterns Identified\n\n")
|
|
162
|
-
|
|
163
|
-
# Group insights by category
|
|
164
|
-
by_category:
|
|
1742
|
+
|
|
1743
|
+
# Group insights by category (handle missing category field gracefully)
|
|
1744
|
+
by_category: dict[str, list[dict[str, Any]]] = {}
|
|
165
1745
|
for insight in insights:
|
|
166
|
-
category = insight
|
|
1746
|
+
category = insight.get("category", "General")
|
|
167
1747
|
if category not in by_category:
|
|
168
1748
|
by_category[category] = []
|
|
169
1749
|
by_category[category].append(insight)
|
|
170
|
-
|
|
1750
|
+
|
|
171
1751
|
for category, category_insights in by_category.items():
|
|
172
1752
|
report.write(f"**{category}**:\n")
|
|
173
1753
|
for insight in category_insights:
|
|
174
|
-
|
|
175
|
-
|
|
1754
|
+
# Handle missing fields gracefully for test compatibility
|
|
1755
|
+
insight_text = insight.get("insight", insight.get("metric", "Unknown"))
|
|
1756
|
+
insight_value = insight.get("value", "N/A")
|
|
1757
|
+
insight_impact = insight.get("impact", "No impact specified")
|
|
1758
|
+
report.write(f"- {insight_text}: {insight_value} ")
|
|
1759
|
+
report.write(f"({insight_impact})\n")
|
|
176
1760
|
report.write("\n")
|
|
177
|
-
|
|
178
|
-
# Add focus insights
|
|
1761
|
+
|
|
1762
|
+
# Add focus insights (handle missing focus_score field gracefully)
|
|
179
1763
|
if focus_data:
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
1764
|
+
# Use focus_ratio if focus_score is not available
|
|
1765
|
+
focus_scores = []
|
|
1766
|
+
for d in focus_data:
|
|
1767
|
+
if "focus_score" in d:
|
|
1768
|
+
focus_scores.append(d["focus_score"])
|
|
1769
|
+
elif "focus_ratio" in d:
|
|
1770
|
+
focus_scores.append(d["focus_ratio"] * 100) # Convert ratio to percentage
|
|
1771
|
+
else:
|
|
1772
|
+
focus_scores.append(50) # Default value
|
|
1773
|
+
|
|
1774
|
+
if focus_scores:
|
|
1775
|
+
avg_focus = sum(focus_scores) / len(focus_scores)
|
|
1776
|
+
report.write(f"**Developer Focus**: Average focus score of {avg_focus:.1f}% ")
|
|
1777
|
+
|
|
1778
|
+
if avg_focus > 80:
|
|
1779
|
+
report.write("indicates strong project concentration\n")
|
|
1780
|
+
elif avg_focus > 60:
|
|
1781
|
+
report.write("shows moderate multi-project work\n")
|
|
1782
|
+
else:
|
|
1783
|
+
report.write("suggests high context switching\n")
|
|
1784
|
+
|
|
1785
|
+
def _write_pr_analysis(
|
|
1786
|
+
self, report: StringIO, pr_metrics: dict[str, Any], prs: list[dict[str, Any]]
|
|
1787
|
+
) -> None:
|
|
192
1788
|
"""Write pull request analysis."""
|
|
193
|
-
report.write(f"- **Total PRs Merged**: {pr_metrics
|
|
194
|
-
report.write(f"- **Average PR Size**: {pr_metrics
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
1789
|
+
report.write(f"- **Total PRs Merged**: {pr_metrics.get('total_prs', 0)}\n")
|
|
1790
|
+
report.write(f"- **Average PR Size**: {pr_metrics.get('avg_pr_size', 0):.0f} lines\n")
|
|
1791
|
+
|
|
1792
|
+
# Handle optional metrics gracefully
|
|
1793
|
+
if "avg_pr_lifetime_hours" in pr_metrics:
|
|
1794
|
+
report.write(
|
|
1795
|
+
f"- **Average PR Lifetime**: {pr_metrics['avg_pr_lifetime_hours']:.1f} hours\n"
|
|
1796
|
+
)
|
|
1797
|
+
|
|
1798
|
+
if "story_point_coverage" in pr_metrics:
|
|
1799
|
+
report.write(f"- **Story Point Coverage**: {pr_metrics['story_point_coverage']:.1f}%\n")
|
|
1800
|
+
|
|
1801
|
+
total_comments = pr_metrics.get("total_review_comments", 0)
|
|
1802
|
+
if total_comments > 0:
|
|
1803
|
+
report.write(f"- **Total Review Comments**: {total_comments}\n")
|
|
1804
|
+
total_prs = pr_metrics.get("total_prs", 1)
|
|
1805
|
+
avg_comments = total_comments / total_prs if total_prs > 0 else 0
|
|
201
1806
|
report.write(f"- **Average Comments per PR**: {avg_comments:.1f}\n")
|
|
202
|
-
|
|
203
|
-
def _write_ticket_tracking(
|
|
204
|
-
|
|
205
|
-
report
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
1807
|
+
|
|
1808
|
+
def _write_ticket_tracking(
|
|
1809
|
+
self,
|
|
1810
|
+
report: StringIO,
|
|
1811
|
+
ticket_analysis: dict[str, Any],
|
|
1812
|
+
developer_stats: list[dict[str, Any]],
|
|
1813
|
+
) -> None:
|
|
1814
|
+
"""Write ticket tracking analysis with simplified platform usage section."""
|
|
1815
|
+
# Simplified platform usage matching old report format
|
|
1816
|
+
ticket_summary = ticket_analysis.get("ticket_summary", {})
|
|
1817
|
+
total_tickets = sum(ticket_summary.values()) if ticket_summary else 0
|
|
1818
|
+
|
|
1819
|
+
if total_tickets > 0:
|
|
1820
|
+
report.write("### Platform Usage\n\n")
|
|
1821
|
+
for platform, count in sorted(ticket_summary.items(), key=lambda x: x[1], reverse=True):
|
|
1822
|
+
pct = count / total_tickets * 100 if total_tickets > 0 else 0
|
|
1823
|
+
report.write(f"- **{platform.title()}**: {count} tickets ({pct:.1f}%)\n")
|
|
1824
|
+
|
|
1825
|
+
report.write("\n### Coverage Analysis\n\n")
|
|
1826
|
+
|
|
1827
|
+
# Handle missing fields gracefully
|
|
1828
|
+
commits_with_tickets = ticket_analysis.get("commits_with_tickets", 0)
|
|
1829
|
+
total_commits = ticket_analysis.get("total_commits", 0)
|
|
1830
|
+
coverage_pct = ticket_analysis.get("commit_coverage_pct", 0)
|
|
1831
|
+
|
|
1832
|
+
# Debug logging for ticket coverage issues
|
|
1833
|
+
logger.debug(f"Ticket coverage analysis - commits_with_tickets: {commits_with_tickets}, total_commits: {total_commits}, coverage_pct: {coverage_pct}")
|
|
1834
|
+
if commits_with_tickets == 0 and total_commits > 0:
|
|
1835
|
+
logger.warning(f"No commits found with ticket references out of {total_commits} total commits")
|
|
1836
|
+
# Log sample of ticket_analysis structure for debugging
|
|
1837
|
+
if "ticket_summary" in ticket_analysis:
|
|
1838
|
+
logger.debug(f"Ticket summary: {ticket_analysis['ticket_summary']}")
|
|
1839
|
+
if "ticket_platforms" in ticket_analysis:
|
|
1840
|
+
logger.debug(f"Ticket platforms: {ticket_analysis['ticket_platforms']}")
|
|
1841
|
+
|
|
1842
|
+
report.write(f"- **Commits with Tickets**: {commits_with_tickets} ")
|
|
1843
|
+
report.write(f"of {total_commits} ")
|
|
1844
|
+
report.write(f"({coverage_pct:.1f}%)\n")
|
|
1845
|
+
|
|
1846
|
+
# Enhanced untracked commits reporting
|
|
1847
|
+
untracked_commits = ticket_analysis.get("untracked_commits", [])
|
|
1848
|
+
if untracked_commits:
|
|
1849
|
+
self._write_enhanced_untracked_analysis(
|
|
1850
|
+
report, untracked_commits, ticket_analysis, developer_stats
|
|
1851
|
+
)
|
|
1852
|
+
|
|
1853
|
+
def _write_enhanced_untracked_analysis(
|
|
1854
|
+
self,
|
|
1855
|
+
report: StringIO,
|
|
1856
|
+
untracked_commits: list[dict[str, Any]],
|
|
1857
|
+
ticket_analysis: dict[str, Any],
|
|
1858
|
+
developer_stats: list[dict[str, Any]],
|
|
1859
|
+
) -> None:
|
|
1860
|
+
"""Write comprehensive untracked commits analysis.
|
|
1861
|
+
|
|
1862
|
+
WHY: Enhanced untracked analysis provides actionable insights into what
|
|
1863
|
+
types of work are happening outside the tracked process, helping identify
|
|
1864
|
+
process improvements and training opportunities.
|
|
1865
|
+
"""
|
|
1866
|
+
report.write("\n### Untracked Work Analysis\n\n")
|
|
1867
|
+
|
|
1868
|
+
total_untracked = len(untracked_commits)
|
|
1869
|
+
total_commits = ticket_analysis.get("total_commits", 0)
|
|
1870
|
+
untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
|
|
1871
|
+
|
|
1872
|
+
report.write(
|
|
1873
|
+
f"**Summary**: {total_untracked} commits ({untracked_pct:.1f}% of total) lack ticket references.\n\n"
|
|
1874
|
+
)
|
|
1875
|
+
|
|
1876
|
+
# Analyze categories
|
|
1877
|
+
categories = {}
|
|
1878
|
+
contributors = {}
|
|
1879
|
+
projects = {}
|
|
1880
|
+
|
|
1881
|
+
for commit in untracked_commits:
|
|
1882
|
+
# Category analysis
|
|
1883
|
+
category = commit.get("category", "other")
|
|
1884
|
+
if category not in categories:
|
|
1885
|
+
categories[category] = {"count": 0, "lines": 0, "examples": []}
|
|
1886
|
+
categories[category]["count"] += 1
|
|
1887
|
+
categories[category]["lines"] += commit.get("lines_changed", 0)
|
|
1888
|
+
if len(categories[category]["examples"]) < 2:
|
|
1889
|
+
categories[category]["examples"].append(
|
|
1890
|
+
{
|
|
1891
|
+
"hash": commit.get("hash", ""),
|
|
1892
|
+
"message": commit.get("message", ""),
|
|
1893
|
+
"author": commit.get("author", ""),
|
|
1894
|
+
}
|
|
1895
|
+
)
|
|
1896
|
+
|
|
1897
|
+
# Contributor analysis
|
|
1898
|
+
author = commit.get("author", "Unknown")
|
|
1899
|
+
if author not in contributors:
|
|
1900
|
+
contributors[author] = {"count": 0, "categories": set()}
|
|
1901
|
+
contributors[author]["count"] += 1
|
|
1902
|
+
contributors[author]["categories"].add(category)
|
|
1903
|
+
|
|
1904
|
+
# Project analysis
|
|
1905
|
+
project = commit.get("project_key", "UNKNOWN")
|
|
1906
|
+
if project not in projects:
|
|
1907
|
+
projects[project] = {"count": 0, "categories": set()}
|
|
1908
|
+
projects[project]["count"] += 1
|
|
1909
|
+
projects[project]["categories"].add(category)
|
|
1910
|
+
|
|
1911
|
+
# Write category breakdown
|
|
1912
|
+
if categories:
|
|
1913
|
+
report.write("#### Work Categories\n\n")
|
|
1914
|
+
sorted_categories = sorted(
|
|
1915
|
+
categories.items(), key=lambda x: x[1]["count"], reverse=True
|
|
1916
|
+
)
|
|
1917
|
+
|
|
1918
|
+
for category, data in sorted_categories[:8]: # Show top 8 categories
|
|
1919
|
+
pct = (data["count"] / total_untracked) * 100
|
|
1920
|
+
avg_size = data["lines"] / data["count"] if data["count"] > 0 else 0
|
|
1921
|
+
|
|
1922
|
+
# Categorize the impact
|
|
1923
|
+
if category in ["style", "documentation", "maintenance"]:
|
|
1924
|
+
impact_note = " *(acceptable untracked)*"
|
|
1925
|
+
elif category in ["feature", "bug_fix"]:
|
|
1926
|
+
impact_note = " *(should be tracked)*"
|
|
1927
|
+
else:
|
|
1928
|
+
impact_note = ""
|
|
1929
|
+
|
|
1930
|
+
report.write(f"- **{category.replace('_', ' ').title()}**: ")
|
|
1931
|
+
report.write(f"{data['count']} commits ({pct:.1f}%), ")
|
|
1932
|
+
report.write(f"avg {avg_size:.0f} lines{impact_note}\n")
|
|
1933
|
+
|
|
1934
|
+
# Add examples
|
|
1935
|
+
if data["examples"]:
|
|
1936
|
+
for example in data["examples"]:
|
|
1937
|
+
report.write(f" - `{example['hash']}`: {example['message'][:80]}...\n")
|
|
1938
|
+
report.write("\n")
|
|
1939
|
+
|
|
1940
|
+
# Write top contributors to untracked work with enhanced percentage analysis
|
|
1941
|
+
if contributors:
|
|
1942
|
+
report.write("#### Top Contributors (Untracked Work)\n\n")
|
|
1943
|
+
|
|
1944
|
+
# Create developer lookup for total commits
|
|
1945
|
+
dev_lookup = {}
|
|
1946
|
+
for dev in developer_stats:
|
|
1947
|
+
# Map canonical_id to developer data
|
|
1948
|
+
dev_lookup[dev["canonical_id"]] = dev
|
|
1949
|
+
# Also map primary name and primary email as fallbacks
|
|
1950
|
+
dev_lookup[dev["primary_name"]] = dev
|
|
1951
|
+
dev_lookup[dev["primary_email"]] = dev
|
|
1952
|
+
|
|
1953
|
+
sorted_contributors = sorted(
|
|
1954
|
+
contributors.items(), key=lambda x: x[1]["count"], reverse=True
|
|
1955
|
+
)
|
|
1956
|
+
|
|
1957
|
+
for author, data in sorted_contributors[:5]: # Show top 5
|
|
1958
|
+
untracked_count = data["count"]
|
|
1959
|
+
pct_of_untracked = (untracked_count / total_untracked) * 100
|
|
1960
|
+
|
|
1961
|
+
# Find developer's total commits to calculate percentage of their work that's untracked
|
|
1962
|
+
dev_data = dev_lookup.get(author)
|
|
1963
|
+
if dev_data:
|
|
1964
|
+
total_dev_commits = dev_data["total_commits"]
|
|
1965
|
+
pct_of_dev_work = (
|
|
1966
|
+
(untracked_count / total_dev_commits) * 100 if total_dev_commits > 0 else 0
|
|
1967
|
+
)
|
|
1968
|
+
dev_context = f", {pct_of_dev_work:.1f}% of their work"
|
|
1969
|
+
else:
|
|
1970
|
+
dev_context = ""
|
|
1971
|
+
|
|
1972
|
+
categories_list = list(data["categories"])
|
|
1973
|
+
categories_str = ", ".join(categories_list[:3]) # Show up to 3 categories
|
|
1974
|
+
if len(categories_list) > 3:
|
|
1975
|
+
categories_str += f" (+{len(categories_list) - 3} more)"
|
|
1976
|
+
|
|
1977
|
+
report.write(f"- **{author}**: {untracked_count} commits ")
|
|
1978
|
+
report.write(f"({pct_of_untracked:.1f}% of untracked{dev_context}) - ")
|
|
1979
|
+
report.write(f"*{categories_str}*\n")
|
|
1980
|
+
report.write("\n")
|
|
1981
|
+
|
|
1982
|
+
# Write project breakdown
|
|
1983
|
+
if len(projects) > 1:
|
|
1984
|
+
report.write("#### Projects with Untracked Work\n\n")
|
|
1985
|
+
sorted_projects = sorted(projects.items(), key=lambda x: x[1]["count"], reverse=True)
|
|
1986
|
+
|
|
1987
|
+
for project, data in sorted_projects:
|
|
1988
|
+
pct = (data["count"] / total_untracked) * 100
|
|
1989
|
+
categories_list = list(data["categories"])
|
|
1990
|
+
report.write(f"- **{project}**: {data['count']} commits ({pct:.1f}%)\n")
|
|
1991
|
+
report.write("\n")
|
|
1992
|
+
|
|
1993
|
+
# Write recent examples (configurable limit, default 15 for better visibility)
|
|
1994
|
+
if untracked_commits:
|
|
1995
|
+
report.write("#### Recent Untracked Commits\n\n")
|
|
1996
|
+
|
|
1997
|
+
# Show configurable number of recent commits (increased from 10 to 15)
|
|
1998
|
+
max_recent_commits = 15
|
|
1999
|
+
|
|
2000
|
+
# Safe timestamp sorting that handles mixed timezone types
|
|
2001
|
+
def safe_timestamp_key(commit):
|
|
2002
|
+
ts = commit.get("timestamp")
|
|
2003
|
+
if ts is None:
|
|
2004
|
+
return datetime.min.replace(tzinfo=timezone.utc)
|
|
2005
|
+
# If it's a datetime object, handle timezone issues
|
|
2006
|
+
if hasattr(ts, "tzinfo"):
|
|
2007
|
+
# Make timezone-naive datetime UTC-aware for consistent comparison
|
|
2008
|
+
if ts.tzinfo is None:
|
|
2009
|
+
ts = ts.replace(tzinfo=timezone.utc)
|
|
2010
|
+
return ts
|
|
2011
|
+
# If it's a string or other type, try to parse or use as-is
|
|
2012
|
+
return ts
|
|
2013
|
+
|
|
2014
|
+
recent_commits = sorted(
|
|
2015
|
+
untracked_commits, key=safe_timestamp_key, reverse=True
|
|
2016
|
+
)[:max_recent_commits]
|
|
2017
|
+
|
|
2018
|
+
if len(untracked_commits) > max_recent_commits:
|
|
2019
|
+
report.write(
|
|
2020
|
+
f"*Showing {max_recent_commits} most recent of {len(untracked_commits)} untracked commits*\n\n"
|
|
2021
|
+
)
|
|
2022
|
+
|
|
2023
|
+
for commit in recent_commits:
|
|
2024
|
+
# Format date
|
|
2025
|
+
timestamp = commit.get("timestamp")
|
|
2026
|
+
if timestamp and hasattr(timestamp, "strftime"):
|
|
2027
|
+
date_str = timestamp.strftime("%Y-%m-%d")
|
|
2028
|
+
else:
|
|
2029
|
+
date_str = "unknown date"
|
|
2030
|
+
|
|
2031
|
+
report.write(f"- `{commit.get('hash', '')}` ({date_str}) ")
|
|
2032
|
+
report.write(f"**{commit.get('author', 'Unknown')}** ")
|
|
2033
|
+
report.write(f"[{commit.get('category', 'other')}]: ")
|
|
2034
|
+
report.write(f"{commit.get('message', '')[:100]}")
|
|
2035
|
+
if len(commit.get("message", "")) > 100:
|
|
2036
|
+
report.write("...")
|
|
2037
|
+
report.write(f" *({commit.get('files_changed', 0)} files, ")
|
|
2038
|
+
report.write(f"{commit.get('lines_changed', 0)} lines)*\n")
|
|
2039
|
+
report.write("\n")
|
|
2040
|
+
|
|
2041
|
+
# Add recommendations based on untracked analysis
|
|
2042
|
+
self._write_untracked_recommendations(
|
|
2043
|
+
report, categories, contributors, total_untracked, total_commits
|
|
2044
|
+
)
|
|
2045
|
+
|
|
2046
|
+
def _write_untracked_recommendations(
|
|
2047
|
+
self,
|
|
2048
|
+
report: StringIO,
|
|
2049
|
+
categories: dict[str, Any],
|
|
2050
|
+
contributors: dict[str, Any],
|
|
2051
|
+
total_untracked: int,
|
|
2052
|
+
total_commits: int,
|
|
2053
|
+
) -> None:
|
|
2054
|
+
"""Write specific recommendations based on untracked commit analysis."""
|
|
2055
|
+
report.write("#### Recommendations for Untracked Work\n\n")
|
|
2056
|
+
|
|
2057
|
+
recommendations = []
|
|
2058
|
+
|
|
2059
|
+
# Category-based recommendations
|
|
2060
|
+
feature_count = categories.get("feature", {}).get("count", 0)
|
|
2061
|
+
bug_fix_count = categories.get("bug_fix", {}).get("count", 0)
|
|
2062
|
+
maintenance_count = categories.get("maintenance", {}).get("count", 0)
|
|
2063
|
+
docs_count = categories.get("documentation", {}).get("count", 0)
|
|
2064
|
+
style_count = categories.get("style", {}).get("count", 0)
|
|
2065
|
+
|
|
2066
|
+
if feature_count > total_untracked * 0.2:
|
|
2067
|
+
recommendations.append(
|
|
2068
|
+
"🎫 **Require tickets for features**: Many feature developments lack ticket references. "
|
|
2069
|
+
"Consider enforcing ticket creation for new functionality."
|
|
2070
|
+
)
|
|
2071
|
+
|
|
2072
|
+
if bug_fix_count > total_untracked * 0.15:
|
|
2073
|
+
recommendations.append(
|
|
2074
|
+
"🐛 **Track bug fixes**: Bug fixes should be linked to issue tickets for better "
|
|
2075
|
+
"visibility and follow-up."
|
|
2076
|
+
)
|
|
2077
|
+
|
|
2078
|
+
# Positive recognition for appropriate untracked work
|
|
2079
|
+
acceptable_count = maintenance_count + docs_count + style_count
|
|
2080
|
+
if acceptable_count > total_untracked * 0.6:
|
|
2081
|
+
recommendations.append(
|
|
2082
|
+
"✅ **Good process balance**: Most untracked work consists of maintenance, "
|
|
2083
|
+
"documentation, and style improvements - this is acceptable and shows good "
|
|
2084
|
+
"development hygiene."
|
|
2085
|
+
)
|
|
2086
|
+
|
|
2087
|
+
# Coverage recommendations
|
|
2088
|
+
untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
|
|
2089
|
+
if untracked_pct > 50:
|
|
2090
|
+
recommendations.append(
|
|
2091
|
+
"📈 **Improve overall tracking**: Over 50% of commits lack ticket references. "
|
|
2092
|
+
"Consider team training on linking commits to work items."
|
|
2093
|
+
)
|
|
2094
|
+
elif untracked_pct < 20:
|
|
2095
|
+
recommendations.append(
|
|
2096
|
+
"🎯 **Excellent tracking**: Less than 20% of commits are untracked - "
|
|
2097
|
+
"the team shows strong process adherence."
|
|
2098
|
+
)
|
|
2099
|
+
|
|
2100
|
+
# Developer-specific recommendations
|
|
2101
|
+
if len(contributors) > 1:
|
|
2102
|
+
max_contributor_pct = max(
|
|
2103
|
+
(data["count"] / total_untracked * 100) for data in contributors.values()
|
|
2104
|
+
)
|
|
2105
|
+
if max_contributor_pct > 40:
|
|
2106
|
+
recommendations.append(
|
|
2107
|
+
"👥 **Targeted training**: Some developers need additional guidance on "
|
|
2108
|
+
"ticket referencing practices. Consider peer mentoring or process review."
|
|
2109
|
+
)
|
|
2110
|
+
|
|
2111
|
+
if not recommendations:
|
|
2112
|
+
recommendations.append(
|
|
2113
|
+
"✅ **Balanced approach**: Untracked work appears well-balanced between "
|
|
2114
|
+
"necessary maintenance and tracked development work."
|
|
2115
|
+
)
|
|
2116
|
+
|
|
2117
|
+
for rec in recommendations:
|
|
2118
|
+
report.write(f"{rec}\n\n")
|
|
2119
|
+
|
|
2120
|
+
def _write_recommendations(
|
|
2121
|
+
self,
|
|
2122
|
+
report: StringIO,
|
|
2123
|
+
insights: list[dict[str, Any]],
|
|
2124
|
+
ticket_analysis: dict[str, Any],
|
|
2125
|
+
focus_data: list[dict[str, Any]],
|
|
2126
|
+
) -> None:
|
|
227
2127
|
"""Write recommendations based on analysis."""
|
|
228
2128
|
recommendations = []
|
|
229
|
-
|
|
2129
|
+
|
|
230
2130
|
# Ticket coverage recommendations
|
|
231
|
-
coverage = ticket_analysis[
|
|
2131
|
+
coverage = ticket_analysis["commit_coverage_pct"]
|
|
232
2132
|
if coverage < 50:
|
|
233
2133
|
recommendations.append(
|
|
234
2134
|
"🎫 **Improve ticket tracking**: Current coverage is below 50%. "
|
|
235
2135
|
"Consider enforcing ticket references in commit messages or PR descriptions."
|
|
236
2136
|
)
|
|
237
|
-
|
|
238
|
-
# Work distribution recommendations
|
|
2137
|
+
|
|
2138
|
+
# Work distribution recommendations (handle missing insight field gracefully)
|
|
239
2139
|
for insight in insights:
|
|
240
|
-
|
|
241
|
-
|
|
2140
|
+
insight_text = insight.get("insight", insight.get("metric", ""))
|
|
2141
|
+
if insight_text == "Work distribution":
|
|
2142
|
+
insight_value = str(insight.get("value", ""))
|
|
2143
|
+
if "unbalanced" in insight_value.lower():
|
|
242
2144
|
recommendations.append(
|
|
243
2145
|
"⚖️ **Balance workload**: Work is concentrated among few developers. "
|
|
244
2146
|
"Consider distributing tasks more evenly or adding team members."
|
|
245
2147
|
)
|
|
246
|
-
|
|
247
|
-
# Focus recommendations
|
|
2148
|
+
|
|
2149
|
+
# Focus recommendations (handle missing focus_score field gracefully)
|
|
248
2150
|
if focus_data:
|
|
249
|
-
low_focus = [
|
|
2151
|
+
low_focus = []
|
|
2152
|
+
for d in focus_data:
|
|
2153
|
+
focus_score = d.get("focus_score", d.get("focus_ratio", 0.5) * 100)
|
|
2154
|
+
if focus_score < 50:
|
|
2155
|
+
low_focus.append(d)
|
|
250
2156
|
if len(low_focus) > len(focus_data) / 2:
|
|
251
2157
|
recommendations.append(
|
|
252
2158
|
"🎯 **Reduce context switching**: Many developers work across multiple projects. "
|
|
253
2159
|
"Consider more focused project assignments to improve efficiency."
|
|
254
2160
|
)
|
|
255
|
-
|
|
256
|
-
# Branching strategy
|
|
2161
|
+
|
|
2162
|
+
# Branching strategy (handle missing insight field gracefully)
|
|
257
2163
|
for insight in insights:
|
|
258
|
-
|
|
2164
|
+
insight_text = insight.get("insight", insight.get("metric", ""))
|
|
2165
|
+
insight_value = str(insight.get("value", ""))
|
|
2166
|
+
if insight_text == "Branching strategy" and "Heavy" in insight_value:
|
|
259
2167
|
recommendations.append(
|
|
260
2168
|
"🌿 **Review branching strategy**: High percentage of merge commits suggests "
|
|
261
2169
|
"complex branching. Consider simplifying the Git workflow."
|
|
262
2170
|
)
|
|
263
|
-
|
|
2171
|
+
|
|
264
2172
|
if recommendations:
|
|
265
2173
|
for rec in recommendations:
|
|
266
2174
|
report.write(f"{rec}\n\n")
|
|
267
2175
|
else:
|
|
268
2176
|
report.write("✅ The team shows healthy development patterns. ")
|
|
269
|
-
report.write("Continue current practices while monitoring for changes.\n")
|
|
2177
|
+
report.write("Continue current practices while monitoring for changes.\n")
|
|
2178
|
+
|
|
2179
|
+
def _write_commit_classification_analysis(
|
|
2180
|
+
self, report: StringIO, ticket_analysis: dict[str, Any]
|
|
2181
|
+
) -> None:
|
|
2182
|
+
"""Write commit classification analysis section.
|
|
2183
|
+
|
|
2184
|
+
WHY: This section provides insights into automated commit categorization
|
|
2185
|
+
quality and distribution, helping teams understand their development patterns
|
|
2186
|
+
and the effectiveness of ML-based categorization.
|
|
2187
|
+
|
|
2188
|
+
Args:
|
|
2189
|
+
report: StringIO buffer to write to
|
|
2190
|
+
ticket_analysis: Ticket analysis data containing ML classification results
|
|
2191
|
+
"""
|
|
2192
|
+
ml_analysis = ticket_analysis.get("ml_analysis", {})
|
|
2193
|
+
if not ml_analysis.get("enabled", False):
|
|
2194
|
+
return
|
|
2195
|
+
|
|
2196
|
+
report.write("The team's commit patterns reveal the following automated classification insights:\n\n")
|
|
2197
|
+
|
|
2198
|
+
# Overall classification statistics
|
|
2199
|
+
total_ml_predictions = ml_analysis.get("total_ml_predictions", 0)
|
|
2200
|
+
total_rule_predictions = ml_analysis.get("total_rule_predictions", 0)
|
|
2201
|
+
total_cached_predictions = ml_analysis.get("total_cached_predictions", 0)
|
|
2202
|
+
total_predictions = total_ml_predictions + total_rule_predictions + total_cached_predictions
|
|
2203
|
+
|
|
2204
|
+
if total_predictions > 0:
|
|
2205
|
+
report.write("### Classification Method Distribution\n\n")
|
|
2206
|
+
|
|
2207
|
+
# Calculate percentages
|
|
2208
|
+
ml_pct = (total_ml_predictions / total_predictions) * 100
|
|
2209
|
+
rules_pct = (total_rule_predictions / total_predictions) * 100
|
|
2210
|
+
cached_pct = (total_cached_predictions / total_predictions) * 100
|
|
2211
|
+
|
|
2212
|
+
report.write(f"- **ML-based Classifications**: {total_ml_predictions} commits ({ml_pct:.1f}%)\n")
|
|
2213
|
+
report.write(f"- **Rule-based Classifications**: {total_rule_predictions} commits ({rules_pct:.1f}%)\n")
|
|
2214
|
+
report.write(f"- **Cached Results**: {total_cached_predictions} commits ({cached_pct:.1f}%)\n\n")
|
|
2215
|
+
|
|
2216
|
+
# Classification confidence analysis
|
|
2217
|
+
avg_confidence = ml_analysis.get("avg_confidence", 0)
|
|
2218
|
+
confidence_dist = ml_analysis.get("confidence_distribution", {})
|
|
2219
|
+
|
|
2220
|
+
if confidence_dist:
|
|
2221
|
+
report.write("### Classification Confidence\n\n")
|
|
2222
|
+
report.write(f"- **Average Confidence**: {avg_confidence:.1%} across all classifications\n")
|
|
2223
|
+
|
|
2224
|
+
high_conf = confidence_dist.get("high", 0)
|
|
2225
|
+
medium_conf = confidence_dist.get("medium", 0)
|
|
2226
|
+
low_conf = confidence_dist.get("low", 0)
|
|
2227
|
+
total_conf_items = high_conf + medium_conf + low_conf
|
|
2228
|
+
|
|
2229
|
+
if total_conf_items > 0:
|
|
2230
|
+
high_pct = (high_conf / total_conf_items) * 100
|
|
2231
|
+
medium_pct = (medium_conf / total_conf_items) * 100
|
|
2232
|
+
low_pct = (low_conf / total_conf_items) * 100
|
|
2233
|
+
|
|
2234
|
+
report.write(f"- **High Confidence** (≥80%): {high_conf} commits ({high_pct:.1f}%)\n")
|
|
2235
|
+
report.write(f"- **Medium Confidence** (60-79%): {medium_conf} commits ({medium_pct:.1f}%)\n")
|
|
2236
|
+
report.write(f"- **Low Confidence** (<60%): {low_conf} commits ({low_pct:.1f}%)\n\n")
|
|
2237
|
+
|
|
2238
|
+
# Category confidence breakdown
|
|
2239
|
+
category_confidence = ml_analysis.get("category_confidence", {})
|
|
2240
|
+
if category_confidence:
|
|
2241
|
+
report.write("### Classification Categories\n\n")
|
|
2242
|
+
|
|
2243
|
+
# Sort categories by count (descending)
|
|
2244
|
+
sorted_categories = sorted(
|
|
2245
|
+
category_confidence.items(),
|
|
2246
|
+
key=lambda x: x[1].get("count", 0),
|
|
2247
|
+
reverse=True
|
|
2248
|
+
)
|
|
2249
|
+
|
|
2250
|
+
# Calculate total commits for percentages
|
|
2251
|
+
total_categorized = sum(data.get("count", 0) for data in category_confidence.values())
|
|
2252
|
+
|
|
2253
|
+
for category, data in sorted_categories:
|
|
2254
|
+
count = data.get("count", 0)
|
|
2255
|
+
avg_conf = data.get("avg", 0)
|
|
2256
|
+
|
|
2257
|
+
if count > 0:
|
|
2258
|
+
category_pct = (count / total_categorized) * 100
|
|
2259
|
+
category_display = category.replace("_", " ").title()
|
|
2260
|
+
report.write(f"- **{category_display}**: {count} commits ({category_pct:.1f}%, avg confidence: {avg_conf:.1%})\n")
|
|
2261
|
+
|
|
2262
|
+
report.write("\n")
|
|
2263
|
+
|
|
2264
|
+
# Performance metrics
|
|
2265
|
+
processing_stats = ml_analysis.get("processing_time_stats", {})
|
|
2266
|
+
if processing_stats.get("total_ms", 0) > 0:
|
|
2267
|
+
avg_ms = processing_stats.get("avg_ms", 0)
|
|
2268
|
+
total_ms = processing_stats.get("total_ms", 0)
|
|
2269
|
+
|
|
2270
|
+
report.write("### Processing Performance\n\n")
|
|
2271
|
+
report.write(f"- **Average Processing Time**: {avg_ms:.1f}ms per commit\n")
|
|
2272
|
+
report.write(f"- **Total Processing Time**: {total_ms:.0f}ms ({total_ms/1000:.1f} seconds)\n\n")
|
|
2273
|
+
|
|
2274
|
+
|
|
2275
|
+
else:
|
|
2276
|
+
report.write("No classification data available for analysis.\n\n")
|
|
2277
|
+
|
|
2278
|
+
def _write_pm_insights(self, report: StringIO, pm_data: dict[str, Any]) -> None:
|
|
2279
|
+
"""Write PM platform integration insights.
|
|
2280
|
+
|
|
2281
|
+
WHY: PM platform integration provides valuable insights into work item
|
|
2282
|
+
tracking, story point accuracy, and development velocity that complement
|
|
2283
|
+
Git-based analytics. This section highlights the value of PM integration.
|
|
2284
|
+
"""
|
|
2285
|
+
metrics = pm_data.get("metrics", {})
|
|
2286
|
+
|
|
2287
|
+
# Platform overview
|
|
2288
|
+
platform_coverage = metrics.get("platform_coverage", {})
|
|
2289
|
+
total_issues = metrics.get("total_pm_issues", 0)
|
|
2290
|
+
correlations = len(pm_data.get("correlations", []))
|
|
2291
|
+
|
|
2292
|
+
report.write(f"The team has integrated **{len(platform_coverage)} PM platforms** ")
|
|
2293
|
+
report.write(
|
|
2294
|
+
f"tracking **{total_issues:,} issues** with **{correlations} commit correlations**.\n\n"
|
|
2295
|
+
)
|
|
2296
|
+
|
|
2297
|
+
# Story point analysis
|
|
2298
|
+
story_analysis = metrics.get("story_point_analysis", {})
|
|
2299
|
+
pm_story_points = story_analysis.get("pm_total_story_points", 0)
|
|
2300
|
+
git_story_points = story_analysis.get("git_total_story_points", 0)
|
|
2301
|
+
coverage_pct = story_analysis.get("story_point_coverage_pct", 0)
|
|
2302
|
+
|
|
2303
|
+
if pm_story_points > 0:
|
|
2304
|
+
report.write("### Story Point Tracking\n\n")
|
|
2305
|
+
report.write(f"- **PM Platform Story Points**: {pm_story_points:,}\n")
|
|
2306
|
+
report.write(f"- **Git Extracted Story Points**: {git_story_points:,}\n")
|
|
2307
|
+
report.write(
|
|
2308
|
+
f"- **Story Point Coverage**: {coverage_pct:.1f}% of issues have story points\n"
|
|
2309
|
+
)
|
|
2310
|
+
|
|
2311
|
+
if git_story_points > 0:
|
|
2312
|
+
accuracy = min(git_story_points / pm_story_points, 1.0) * 100
|
|
2313
|
+
report.write(
|
|
2314
|
+
f"- **Extraction Accuracy**: {accuracy:.1f}% of PM story points found in Git\n"
|
|
2315
|
+
)
|
|
2316
|
+
report.write("\n")
|
|
2317
|
+
|
|
2318
|
+
# Issue type distribution
|
|
2319
|
+
issue_types = metrics.get("issue_type_distribution", {})
|
|
2320
|
+
if issue_types:
|
|
2321
|
+
report.write("### Work Item Types\n\n")
|
|
2322
|
+
sorted_types = sorted(issue_types.items(), key=lambda x: x[1], reverse=True)
|
|
2323
|
+
total_typed_issues = sum(issue_types.values())
|
|
2324
|
+
|
|
2325
|
+
for issue_type, count in sorted_types[:5]: # Top 5 types
|
|
2326
|
+
pct = (count / total_typed_issues * 100) if total_typed_issues > 0 else 0
|
|
2327
|
+
report.write(f"- **{issue_type.title()}**: {count} issues ({pct:.1f}%)\n")
|
|
2328
|
+
report.write("\n")
|
|
2329
|
+
|
|
2330
|
+
# Platform-specific insights
|
|
2331
|
+
if platform_coverage:
|
|
2332
|
+
report.write("### Platform Coverage\n\n")
|
|
2333
|
+
for platform, coverage_data in platform_coverage.items():
|
|
2334
|
+
platform_issues = coverage_data.get("total_issues", 0)
|
|
2335
|
+
linked_issues = coverage_data.get("linked_issues", 0)
|
|
2336
|
+
coverage_percentage = coverage_data.get("coverage_percentage", 0)
|
|
2337
|
+
|
|
2338
|
+
report.write(f"**{platform.title()}**: ")
|
|
2339
|
+
report.write(f"{platform_issues} issues, {linked_issues} linked to commits ")
|
|
2340
|
+
report.write(f"({coverage_percentage:.1f}% coverage)\n")
|
|
2341
|
+
report.write("\n")
|
|
2342
|
+
|
|
2343
|
+
# Correlation quality
|
|
2344
|
+
correlation_quality = metrics.get("correlation_quality", {})
|
|
2345
|
+
if correlation_quality.get("total_correlations", 0) > 0:
|
|
2346
|
+
avg_confidence = correlation_quality.get("average_confidence", 0)
|
|
2347
|
+
high_confidence = correlation_quality.get("high_confidence_correlations", 0)
|
|
2348
|
+
correlation_methods = correlation_quality.get("correlation_methods", {})
|
|
2349
|
+
|
|
2350
|
+
report.write("### Correlation Quality\n\n")
|
|
2351
|
+
report.write(f"- **Average Confidence**: {avg_confidence:.2f} (0.0-1.0 scale)\n")
|
|
2352
|
+
report.write(f"- **High Confidence Matches**: {high_confidence} correlations\n")
|
|
2353
|
+
|
|
2354
|
+
if correlation_methods:
|
|
2355
|
+
report.write("- **Methods Used**: ")
|
|
2356
|
+
method_list = [
|
|
2357
|
+
f"{method.replace('_', ' ').title()} ({count})"
|
|
2358
|
+
for method, count in correlation_methods.items()
|
|
2359
|
+
]
|
|
2360
|
+
report.write(", ".join(method_list))
|
|
2361
|
+
report.write("\n")
|
|
2362
|
+
report.write("\n")
|
|
2363
|
+
|
|
2364
|
+
# Key insights
|
|
2365
|
+
report.write("### Key Insights\n\n")
|
|
2366
|
+
|
|
2367
|
+
if coverage_pct > 80:
|
|
2368
|
+
report.write(
|
|
2369
|
+
"✅ **Excellent story point coverage** - Most issues have effort estimates\n"
|
|
2370
|
+
)
|
|
2371
|
+
elif coverage_pct > 50:
|
|
2372
|
+
report.write(
|
|
2373
|
+
"⚠️ **Moderate story point coverage** - Consider improving estimation practices\n"
|
|
2374
|
+
)
|
|
2375
|
+
else:
|
|
2376
|
+
report.write(
|
|
2377
|
+
"❌ **Low story point coverage** - Story point tracking needs improvement\n"
|
|
2378
|
+
)
|
|
2379
|
+
|
|
2380
|
+
if correlations > total_issues * 0.5:
|
|
2381
|
+
report.write(
|
|
2382
|
+
"✅ **Strong commit-issue correlation** - Good traceability between work items and code\n"
|
|
2383
|
+
)
|
|
2384
|
+
elif correlations > total_issues * 0.2:
|
|
2385
|
+
report.write(
|
|
2386
|
+
"⚠️ **Moderate commit-issue correlation** - Some work items lack code links\n"
|
|
2387
|
+
)
|
|
2388
|
+
else:
|
|
2389
|
+
report.write(
|
|
2390
|
+
"❌ **Weak commit-issue correlation** - Improve ticket referencing in commits\n"
|
|
2391
|
+
)
|
|
2392
|
+
|
|
2393
|
+
if len(platform_coverage) > 1:
|
|
2394
|
+
report.write(
|
|
2395
|
+
"📊 **Multi-platform integration** - Comprehensive work item tracking across tools\n"
|
|
2396
|
+
)
|
|
2397
|
+
|
|
2398
|
+
report.write("\n")
|