gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -1,269 +1,2398 @@
1
1
  """Narrative report generation in Markdown format."""
2
- from datetime import datetime
3
- from pathlib import Path
4
- from typing import List, Dict, Any, Set
2
+
3
+ import logging
4
+ from datetime import datetime, timedelta, timezone
5
5
  from io import StringIO
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from ..metrics.activity_scoring import ActivityScorer
10
+
11
+ # Get logger for this module
12
+ logger = logging.getLogger(__name__)
6
13
 
7
14
 
8
15
  class NarrativeReportGenerator:
9
16
  """Generate human-readable narrative reports in Markdown."""
10
-
17
+
11
18
  def __init__(self) -> None:
12
19
  """Initialize narrative report generator."""
20
+ self.activity_scorer = ActivityScorer()
13
21
  self.templates = {
14
- 'high_performer': "{name} led development with {commits} commits ({pct}% of total activity)",
15
- 'multi_project': "{name} worked across {count} projects, primarily on {primary} ({primary_pct}%)",
16
- 'focused_developer': "{name} showed strong focus on {project} with {pct}% of their time",
17
- 'ticket_coverage': "The team maintained {coverage}% ticket coverage, indicating {quality} process adherence",
18
- 'work_distribution': "Work distribution shows a {distribution} pattern with a Gini coefficient of {gini}"
22
+ "high_performer": "{name} led development with {commits} commits ({pct}% of total activity)",
23
+ "multi_project": "{name} worked across {count} projects, primarily on {primary} ({primary_pct}%)",
24
+ "focused_developer": "{name} showed strong focus on {project} with {pct}% of their time",
25
+ "ticket_coverage": "The team maintained {coverage}% ticket coverage, indicating {quality} process adherence",
26
+ "work_distribution": "Work distribution shows a {distribution} pattern with a Gini coefficient of {gini}",
19
27
  }
20
-
21
- def generate_narrative_report(self,
22
- commits: List[Dict[str, Any]],
23
- prs: List[Dict[str, Any]],
24
- developer_stats: List[Dict[str, Any]],
25
- activity_dist: List[Dict[str, Any]],
26
- focus_data: List[Dict[str, Any]],
27
- insights: List[Dict[str, Any]],
28
- ticket_analysis: Dict[str, Any],
29
- pr_metrics: Dict[str, Any],
30
- output_path: Path,
31
- weeks: int) -> Path:
28
+
29
+ def _filter_excluded_authors(self, data_list: list[dict[str, Any]], exclude_authors: list[str]) -> list[dict[str, Any]]:
30
+ """
31
+ Filter out excluded authors from any data list using canonical_id and enhanced bot detection.
32
+
33
+ WHY: Bot exclusion happens in Phase 2 (reporting) instead of Phase 1 (data collection)
34
+ to ensure manual identity mappings work correctly. This allows the system to see
35
+ consolidated bot identities via canonical_id instead of just original author_email/author_name.
36
+
37
+ ENHANCEMENT: Added enhanced bot pattern matching to catch bots that weren't properly
38
+ consolidated via manual mappings, preventing bot leakage in reports.
39
+
40
+ Args:
41
+ data_list: List of data dictionaries containing canonical_id field
42
+ exclude_authors: List of author identifiers to exclude (checked against canonical_id)
43
+
44
+ Returns:
45
+ Filtered list with excluded authors removed
46
+ """
47
+ if not exclude_authors:
48
+ return data_list
49
+
50
+ logger.debug(f"DEBUG EXCLUSION: Starting filter with {len(exclude_authors)} excluded authors: {exclude_authors}")
51
+ logger.debug(f"DEBUG EXCLUSION: Filtering {len(data_list)} items from data list")
52
+
53
+ excluded_lower = [author.lower() for author in exclude_authors]
54
+ logger.debug(f"DEBUG EXCLUSION: Excluded authors (lowercase): {excluded_lower}")
55
+
56
+ # Separate explicit excludes from bot patterns
57
+ explicit_excludes = []
58
+ bot_patterns = []
59
+
60
+ for exclude in excluded_lower:
61
+ if '[bot]' in exclude or 'bot' in exclude.split():
62
+ bot_patterns.append(exclude)
63
+ else:
64
+ explicit_excludes.append(exclude)
65
+
66
+ logger.debug(f"DEBUG EXCLUSION: Explicit excludes: {explicit_excludes}")
67
+ logger.debug(f"DEBUG EXCLUSION: Bot patterns: {bot_patterns}")
68
+
69
+ filtered_data = []
70
+ excluded_count = 0
71
+
72
+ # Sample first 5 items to see data structure
73
+ for i, item in enumerate(data_list[:5]):
74
+ logger.debug(f"DEBUG EXCLUSION: Sample item {i}: canonical_id='{item.get('canonical_id', '')}', "
75
+ f"author_email='{item.get('author_email', '')}', author_name='{item.get('author_name', '')}', "
76
+ f"author='{item.get('author', '')}', primary_name='{item.get('primary_name', '')}', "
77
+ f"name='{item.get('name', '')}'")
78
+
79
+ for item in data_list:
80
+ canonical_id = item.get("canonical_id", "")
81
+ # Also check original author fields as fallback for data without canonical_id
82
+ author_email = item.get("author_email", "")
83
+ author_name = item.get("author_name", "")
84
+
85
+ # Check all possible author fields
86
+ author = item.get("author", "")
87
+ primary_name = item.get("primary_name", "")
88
+ name = item.get("name", "")
89
+
90
+ # Collect all identity fields for checking
91
+ identity_fields = [
92
+ canonical_id,
93
+ item.get("primary_email", ""),
94
+ author_email,
95
+ author_name,
96
+ author,
97
+ primary_name,
98
+ name
99
+ ]
100
+
101
+ should_exclude = False
102
+ exclusion_reason = ""
103
+
104
+ # Check for exact matches with explicit excludes first
105
+ for field in identity_fields:
106
+ if field and field.lower() in explicit_excludes:
107
+ should_exclude = True
108
+ exclusion_reason = f"exact match with '{field}' in explicit excludes"
109
+ break
110
+
111
+ # If not explicitly excluded, check for bot patterns
112
+ if not should_exclude:
113
+ for field in identity_fields:
114
+ if not field:
115
+ continue
116
+ field_lower = field.lower()
117
+
118
+ # Enhanced bot detection: check if any field contains bot-like patterns
119
+ for bot_pattern in bot_patterns:
120
+ if bot_pattern in field_lower:
121
+ should_exclude = True
122
+ exclusion_reason = f"bot pattern '{bot_pattern}' matches field '{field}'"
123
+ break
124
+
125
+ # Additional bot detection: check for common bot patterns not in explicit list
126
+ if not should_exclude:
127
+ bot_indicators = ['[bot]', 'bot@', '-bot', 'automated', 'github-actions', 'dependabot', 'renovate']
128
+ for indicator in bot_indicators:
129
+ if indicator in field_lower:
130
+ # Only exclude if this bot-like pattern matches something in our exclude list
131
+ for exclude in excluded_lower:
132
+ if indicator.replace('[', '').replace(']', '') in exclude or exclude in field_lower:
133
+ should_exclude = True
134
+ exclusion_reason = f"bot indicator '{indicator}' in field '{field}' matches exclude pattern '{exclude}'"
135
+ break
136
+ if should_exclude:
137
+ break
138
+
139
+ if should_exclude:
140
+ break
141
+
142
+ if should_exclude:
143
+ excluded_count += 1
144
+ logger.debug(f"DEBUG EXCLUSION: EXCLUDING item - {exclusion_reason}")
145
+ logger.debug(f" canonical_id='{canonical_id}', primary_email='{item.get('primary_email', '')}', "
146
+ f"author_email='{author_email}', author_name='{author_name}', author='{author}', "
147
+ f"primary_name='{primary_name}', name='{name}'")
148
+ else:
149
+ filtered_data.append(item)
150
+
151
+ logger.debug(f"DEBUG EXCLUSION: Excluded {excluded_count} items, kept {len(filtered_data)} items")
152
+ return filtered_data
153
+
154
+ def generate_narrative_report(
155
+ self,
156
+ commits: list[dict[str, Any]],
157
+ prs: list[dict[str, Any]],
158
+ developer_stats: list[dict[str, Any]],
159
+ activity_dist: list[dict[str, Any]],
160
+ focus_data: list[dict[str, Any]],
161
+ insights: list[dict[str, Any]],
162
+ ticket_analysis: dict[str, Any],
163
+ pr_metrics: dict[str, Any],
164
+ output_path: Path,
165
+ weeks: int,
166
+ pm_data: dict[str, Any] = None,
167
+ chatgpt_summary: str = None,
168
+ branch_health_metrics: dict[str, dict[str, Any]] = None,
169
+ exclude_authors: list[str] = None,
170
+ analysis_start_date: datetime = None,
171
+ analysis_end_date: datetime = None,
172
+ ) -> Path:
32
173
  """Generate comprehensive narrative report."""
33
- report = StringIO()
174
+ # Store analysis period for use in weekly trends calculation
175
+ self._analysis_start_date = analysis_start_date
176
+ self._analysis_end_date = analysis_end_date
177
+
178
+ logger.debug(f"DEBUG NARRATIVE: Starting report generation with exclude_authors: {exclude_authors}")
179
+ logger.debug(f"DEBUG NARRATIVE: Analysis period: {analysis_start_date} to {analysis_end_date}")
180
+ logger.debug(f"DEBUG NARRATIVE: Input data sizes - commits: {len(commits)}, developer_stats: {len(developer_stats)}, "
181
+ f"activity_dist: {len(activity_dist)}, focus_data: {len(focus_data)}")
182
+
183
+ # Sample some developer_stats to see their structure
184
+ if developer_stats:
185
+ for i, dev in enumerate(developer_stats[:3]):
186
+ logger.debug(f"DEBUG NARRATIVE: Sample developer_stats[{i}]: canonical_id='{dev.get('canonical_id', '')}', "
187
+ f"primary_name='{dev.get('primary_name', '')}', name='{dev.get('name', '')}', "
188
+ f"primary_email='{dev.get('primary_email', '')}'")
189
+
190
+ # Filter out excluded authors in Phase 2 using canonical_id
191
+ if exclude_authors:
192
+ logger.debug(f"DEBUG NARRATIVE: Applying exclusion filter with {len(exclude_authors)} excluded authors")
193
+
194
+ original_commits = len(commits)
195
+ commits = self._filter_excluded_authors(commits, exclude_authors)
196
+ filtered_commits = original_commits - len(commits)
197
+
198
+ # Filter other data structures too
199
+ logger.debug(f"DEBUG NARRATIVE: Filtering developer_stats (original: {len(developer_stats)})")
200
+ developer_stats = self._filter_excluded_authors(developer_stats, exclude_authors)
201
+ logger.debug(f"DEBUG NARRATIVE: After filtering developer_stats: {len(developer_stats)}")
202
+
203
+ activity_dist = self._filter_excluded_authors(activity_dist, exclude_authors)
204
+ focus_data = self._filter_excluded_authors(focus_data, exclude_authors)
205
+
206
+ if filtered_commits > 0:
207
+ logger.info(f"Filtered out {filtered_commits} commits from {len(exclude_authors)} excluded authors in narrative report")
208
+
209
+ # Log remaining developers after filtering
210
+ if developer_stats:
211
+ remaining_devs = [dev.get('primary_name', dev.get('name', 'Unknown')) for dev in developer_stats]
212
+ logger.debug(f"DEBUG NARRATIVE: Remaining developers after filtering: {remaining_devs}")
213
+ else:
214
+ logger.debug("DEBUG NARRATIVE: No exclusion filter applied")
34
215
 
216
+ report = StringIO()
217
+
35
218
  # Header
36
219
  report.write("# GitFlow Analytics Report\n\n")
37
- report.write(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
220
+
221
+ # Log datetime formatting
222
+ now = datetime.now()
223
+ logger.debug(
224
+ f"Formatting current datetime for report header: {now} (tzinfo: {getattr(now, 'tzinfo', 'N/A')})"
225
+ )
226
+ formatted_time = now.strftime("%Y-%m-%d %H:%M:%S")
227
+ logger.debug(f" Formatted time: {formatted_time}")
228
+
229
+ report.write(f"**Generated**: {formatted_time}\n")
38
230
  report.write(f"**Analysis Period**: Last {weeks} weeks\n\n")
39
-
231
+
40
232
  # Executive Summary
41
233
  report.write("## Executive Summary\n\n")
42
- self._write_executive_summary(report, commits, developer_stats, ticket_analysis)
43
-
234
+ self._write_executive_summary(report, commits, developer_stats, ticket_analysis, prs, branch_health_metrics, pm_data)
235
+
236
+ # Add ChatGPT qualitative insights if available
237
+ if chatgpt_summary:
238
+ report.write("\n## Qualitative Analysis\n\n")
239
+ report.write(chatgpt_summary)
240
+ report.write("\n")
241
+
44
242
  # Team Composition
45
243
  report.write("\n## Team Composition\n\n")
46
- self._write_team_composition(report, developer_stats, focus_data)
47
-
244
+ self._write_team_composition(report, developer_stats, focus_data, commits, prs, ticket_analysis, weeks)
245
+
48
246
  # Project Activity
49
247
  report.write("\n## Project Activity\n\n")
50
- self._write_project_activity(report, activity_dist, commits)
51
-
248
+ self._write_project_activity(report, activity_dist, commits, branch_health_metrics, ticket_analysis, weeks)
249
+
250
+
52
251
  # Development Patterns
53
252
  report.write("\n## Development Patterns\n\n")
54
253
  self._write_development_patterns(report, insights, focus_data)
55
-
254
+
255
+ # Commit Classification Analysis (if ML analysis is available)
256
+ if ticket_analysis.get("ml_analysis", {}).get("enabled", False):
257
+ report.write("\n## Commit Classification Analysis\n\n")
258
+ self._write_commit_classification_analysis(report, ticket_analysis)
259
+
56
260
  # Pull Request Analysis (if available)
57
- if pr_metrics and pr_metrics.get('total_prs', 0) > 0:
261
+ if pr_metrics and pr_metrics.get("total_prs", 0) > 0:
58
262
  report.write("\n## Pull Request Analysis\n\n")
59
263
  self._write_pr_analysis(report, pr_metrics, prs)
60
-
264
+
61
265
  # Ticket Tracking
62
266
  report.write("\n## Issue Tracking\n\n")
63
- self._write_ticket_tracking(report, ticket_analysis)
64
-
267
+ self._write_ticket_tracking(report, ticket_analysis, developer_stats)
268
+
269
+ # PM Platform Insights
270
+ if pm_data and "metrics" in pm_data:
271
+ report.write("\n## PM Platform Integration\n\n")
272
+ self._write_pm_insights(report, pm_data)
273
+
65
274
  # Recommendations
66
275
  report.write("\n## Recommendations\n\n")
67
276
  self._write_recommendations(report, insights, ticket_analysis, focus_data)
68
-
277
+
69
278
  # Write to file
70
- with open(output_path, 'w') as f:
279
+ with open(output_path, "w") as f:
71
280
  f.write(report.getvalue())
72
-
281
+
73
282
  return output_path
74
-
75
- def _write_executive_summary(self, report: StringIO, commits: List[Dict[str, Any]],
76
- developer_stats: List[Dict[str, Any]],
77
- ticket_analysis: Dict[str, Any]) -> None:
283
+
284
+ def _write_executive_summary(
285
+ self,
286
+ report: StringIO,
287
+ commits: list[dict[str, Any]],
288
+ developer_stats: list[dict[str, Any]],
289
+ ticket_analysis: dict[str, Any],
290
+ prs: list[dict[str, Any]],
291
+ branch_health_metrics: dict[str, dict[str, Any]] = None,
292
+ pm_data: dict[str, Any] = None,
293
+ ) -> None:
78
294
  """Write executive summary section."""
79
295
  total_commits = len(commits)
80
296
  total_developers = len(developer_stats)
81
297
  total_lines = sum(
82
- c.get('filtered_insertions', c.get('insertions', 0)) +
83
- c.get('filtered_deletions', c.get('deletions', 0))
298
+ c.get("filtered_insertions", c.get("insertions", 0))
299
+ + c.get("filtered_deletions", c.get("deletions", 0))
84
300
  for c in commits
85
301
  )
86
-
302
+
87
303
  report.write(f"- **Total Commits**: {total_commits:,}\n")
88
304
  report.write(f"- **Active Developers**: {total_developers}\n")
89
305
  report.write(f"- **Lines Changed**: {total_lines:,}\n")
90
306
  report.write(f"- **Ticket Coverage**: {ticket_analysis['commit_coverage_pct']:.1f}%\n")
91
307
 
92
- # Projects worked on
93
- projects = set(c.get('project_key', 'UNKNOWN') for c in commits)
94
- report.write(f"- **Active Projects**: {len(projects)} ({', '.join(sorted(projects))})\n")
308
+ # PM Platform Story Points (if available)
309
+ if pm_data and "metrics" in pm_data:
310
+ metrics = pm_data.get("metrics", {})
311
+ story_analysis = metrics.get("story_point_analysis", {})
312
+ pm_story_points = story_analysis.get("pm_total_story_points", 0)
313
+ git_story_points = story_analysis.get("git_total_story_points", 0)
314
+
315
+ if pm_story_points > 0 or git_story_points > 0:
316
+ report.write(f"- **PM Story Points**: {pm_story_points:,} (platform) / {git_story_points:,} (commit-linked)\n")
95
317
 
96
- # Top contributor
97
- if developer_stats:
98
- top_dev = developer_stats[0]
99
- report.write(f"- **Top Contributor**: {top_dev['primary_name']} ")
100
- report.write(f"({top_dev['total_commits']} commits)\n")
318
+ # Add repository branch health summary
319
+ if branch_health_metrics:
320
+ # Aggregate branch health across all repositories
321
+ total_branches = 0
322
+ total_stale = 0
323
+ overall_health_scores = []
324
+
325
+ for _repo_name, metrics in branch_health_metrics.items():
326
+ summary = metrics.get("summary", {})
327
+ health_indicators = metrics.get("health_indicators", {})
328
+
329
+ total_branches += summary.get("total_branches", 0)
330
+ total_stale += summary.get("stale_branches", 0)
331
+
332
+ if health_indicators.get("overall_health_score") is not None:
333
+ overall_health_scores.append(health_indicators["overall_health_score"])
334
+
335
+ # Calculate average health score
336
+ avg_health_score = sum(overall_health_scores) / len(overall_health_scores) if overall_health_scores else 0
337
+
338
+ # Determine health status
339
+ if avg_health_score >= 80:
340
+ health_status = "Excellent"
341
+ elif avg_health_score >= 60:
342
+ health_status = "Good"
343
+ elif avg_health_score >= 40:
344
+ health_status = "Fair"
345
+ else:
346
+ health_status = "Needs Attention"
347
+
348
+ report.write(f"- **Branch Health**: {health_status} ({avg_health_score:.0f}/100) - "
349
+ f"{total_branches} branches, {total_stale} stale\n")
350
+
351
+ # Projects worked on - show full list instead of just count
352
+ projects = set(c.get("project_key", "UNKNOWN") for c in commits)
353
+ projects_list = sorted(projects)
354
+ report.write(f"- **Active Projects**: {', '.join(projects_list)}\n")
355
+
356
+ # Top contributor with proper format matching old report
357
+ if developer_stats and commits:
358
+ # BUGFIX: Calculate period-specific commit counts instead of using all-time totals
359
+ period_commit_counts = {}
360
+ for commit in commits:
361
+ canonical_id = commit.get("canonical_id", "")
362
+ period_commit_counts[canonical_id] = period_commit_counts.get(canonical_id, 0) + 1
363
+
364
+ # Find the developer with most commits in this period
365
+ if period_commit_counts:
366
+ top_canonical_id = max(period_commit_counts, key=period_commit_counts.get)
367
+ top_period_commits = period_commit_counts[top_canonical_id]
368
+
369
+ # Find the developer stats entry for this canonical_id
370
+ top_dev = None
371
+ for dev in developer_stats:
372
+ if dev.get("canonical_id") == top_canonical_id:
373
+ top_dev = dev
374
+ break
375
+
376
+ if top_dev:
377
+ # Handle both 'primary_name' (production) and 'name' (tests) for backward compatibility
378
+ dev_name = top_dev.get("primary_name", top_dev.get("name", "Unknown Developer"))
379
+ report.write(
380
+ f"- **Top Contributor**: {dev_name} with {top_period_commits} commits\n"
381
+ )
382
+ elif developer_stats:
383
+ # Fallback: use first developer but with 0 commits (shouldn't happen with proper filtering)
384
+ top_dev = developer_stats[0]
385
+ dev_name = top_dev.get("primary_name", top_dev.get("name", "Unknown Developer"))
386
+ report.write(
387
+ f"- **Top Contributor**: {dev_name} with 0 commits\n"
388
+ )
389
+
390
+ # Calculate team average activity
391
+ if commits:
392
+ # Quick activity score calculation for executive summary
393
+ # total_prs = len(prs) if prs else 0 # Not used yet
394
+ total_lines = sum(
395
+ c.get("filtered_insertions", c.get("insertions", 0))
396
+ + c.get("filtered_deletions", c.get("deletions", 0))
397
+ for c in commits
398
+ )
399
+
400
+ # BUGFIX: Basic team activity assessment using only active developers in period
401
+ active_devs_in_period = len(period_commit_counts) if period_commit_counts else 0
402
+ avg_commits_per_dev = len(commits) / active_devs_in_period if active_devs_in_period > 0 else 0
403
+ if avg_commits_per_dev >= 10:
404
+ activity_assessment = "high activity"
405
+ elif avg_commits_per_dev >= 5:
406
+ activity_assessment = "moderate activity"
407
+ else:
408
+ activity_assessment = "low activity"
409
+
410
+ report.write(
411
+ f"- **Team Activity**: {activity_assessment} (avg {avg_commits_per_dev:.1f} commits/developer)\n"
412
+ )
413
+
414
+ def _aggregate_commit_classifications(
415
+ self,
416
+ ticket_analysis: dict[str, Any],
417
+ commits: list[dict[str, Any]] = None,
418
+ developer_stats: list[dict[str, Any]] = None
419
+ ) -> dict[str, dict[str, int]]:
420
+ """Aggregate commit classifications per developer.
421
+
422
+ WHY: This method provides detailed breakdown of commit types per developer,
423
+ replacing simple commit counts with actionable insights into what types of
424
+ work each developer is doing. This helps identify patterns and training needs.
425
+
426
+ DESIGN DECISION: Classify ALL commits (tracked and untracked) into proper
427
+ categories (feature, bug_fix, refactor, etc.) rather than using 'tracked_work'
428
+ as a category. For tracked commits, use ticket information to enhance accuracy.
429
+
430
+ Args:
431
+ ticket_analysis: Ticket analysis data containing classification info
432
+ commits: Optional list of all commits for complete categorization
433
+ developer_stats: Developer statistics for mapping canonical IDs
434
+
435
+ Returns:
436
+ Dictionary mapping developer canonical_id to category counts:
437
+ {
438
+ 'dev_canonical_id': {
439
+ 'feature': 15,
440
+ 'bug_fix': 8,
441
+ 'maintenance': 5,
442
+ ...
443
+ }
444
+ }
445
+ """
446
+ # Defensive type checking
447
+ if not isinstance(ticket_analysis, dict):
448
+ return {}
449
+
450
+ if commits is not None and not isinstance(commits, list):
451
+ # Log the error and continue without commits data
452
+ import logging
453
+ logger = logging.getLogger(__name__)
454
+ logger.warning(f"Expected commits to be list or None, got {type(commits)}: {commits}")
455
+ commits = None
456
+
457
+ if developer_stats is not None and not isinstance(developer_stats, list):
458
+ developer_stats = None
459
+
460
+ classifications = {}
461
+
462
+ # If we have full commits data, classify ALL commits properly
463
+ if commits and isinstance(commits, list):
464
+ # Import the ticket extractor for classification
465
+ try:
466
+ from ..extractors.ml_tickets import MLTicketExtractor
467
+ extractor = MLTicketExtractor(enable_ml=True)
468
+ except Exception:
469
+ # Fallback to basic ticket extractor
470
+ from ..extractors.tickets import TicketExtractor
471
+ extractor = TicketExtractor()
472
+
473
+ # Classify all commits
474
+ for commit in commits:
475
+ canonical_id = commit.get("canonical_id", "Unknown")
476
+ message = commit.get("message", "")
477
+
478
+ # Get files_changed in proper format for classification
479
+ files_changed = commit.get("files_changed", [])
480
+ if isinstance(files_changed, int):
481
+ # If files_changed is just a count, we can't provide file names
482
+ files_changed = []
483
+ elif not isinstance(files_changed, list):
484
+ files_changed = []
485
+
486
+ # Use ticket information to enhance classification for tracked commits
487
+ ticket_refs = commit.get("ticket_references", [])
488
+
489
+ if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
490
+ # Use ML categorization with confidence for tracked commits
491
+ try:
492
+ result = extractor.categorize_commit_with_confidence(message, files_changed)
493
+ category = result['category']
494
+ # For tracked commits with ticket info, try to infer better category from ticket type
495
+ category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
496
+ except Exception:
497
+ # Fallback to basic categorization
498
+ category = extractor.categorize_commit(message)
499
+ else:
500
+ # Use basic categorization for untracked commits
501
+ category = extractor.categorize_commit(message)
502
+
503
+ # Initialize developer classification if not exists
504
+ if canonical_id not in classifications:
505
+ classifications[canonical_id] = {}
506
+
507
+ # Initialize category count if not exists
508
+ if category not in classifications[canonical_id]:
509
+ classifications[canonical_id][category] = 0
510
+
511
+ # Increment category count
512
+ classifications[canonical_id][category] += 1
513
+
514
+ else:
515
+ # Fallback: Only process untracked commits (legacy behavior)
516
+ untracked_commits = ticket_analysis.get("untracked_commits", [])
517
+
518
+ # Process untracked commits (these have category information)
519
+ for commit in untracked_commits:
520
+ author = commit.get("author", "Unknown")
521
+ category = commit.get("category", "other")
522
+
523
+ # Map author to canonical_id if developer_stats is available
524
+ canonical_id = author # fallback
525
+ if developer_stats:
526
+ for dev in developer_stats:
527
+ # Check multiple possible name mappings
528
+ if (dev.get("primary_name") == author or
529
+ dev.get("primary_email") == author or
530
+ dev.get("canonical_id") == author):
531
+ canonical_id = dev.get("canonical_id", author)
532
+ break
533
+
534
+ if canonical_id not in classifications:
535
+ classifications[canonical_id] = {}
536
+
537
+ if category not in classifications[canonical_id]:
538
+ classifications[canonical_id][category] = 0
539
+
540
+ classifications[canonical_id][category] += 1
541
+
542
+ return classifications
101
543
 
102
- def _write_team_composition(self, report: StringIO, developer_stats: List[Dict[str, Any]],
103
- focus_data: List[Dict[str, Any]]) -> None:
104
- """Write team composition analysis."""
105
- report.write("### Developer Profiles\n\n")
544
+ def _enhance_category_with_ticket_info(self, category: str, ticket_refs: list, message: str) -> str:
545
+ """Enhance commit categorization using ticket reference information.
546
+
547
+ WHY: For tracked commits, we can often infer better categories by examining
548
+ the ticket references and message content. This improves classification accuracy
549
+ for tracked work versus relying purely on message patterns.
550
+
551
+ Args:
552
+ category: Base category from ML/rule-based classification
553
+ ticket_refs: List of ticket references for this commit
554
+ message: Commit message
555
+
556
+ Returns:
557
+ Enhanced category, potentially refined based on ticket information
558
+ """
559
+ if not ticket_refs:
560
+ return category
561
+
562
+ # Try to extract insights from ticket references and message
563
+ message_lower = message.lower()
564
+
565
+ # Look for ticket type patterns in the message or ticket IDs
566
+ # These patterns suggest specific categories regardless of base classification
567
+ if any(pattern in message_lower for pattern in ['hotfix', 'critical', 'urgent', 'prod', 'production']):
568
+ return 'bug_fix' # Production/critical issues are typically bug fixes
569
+
570
+ if any(pattern in message_lower for pattern in ['feature', 'epic', 'story', 'user story']):
571
+ return 'feature' # Explicitly mentioned features
572
+
573
+ # Look for JIRA/GitHub issue patterns that might indicate bug fixes
574
+ for ticket_ref in ticket_refs:
575
+ if isinstance(ticket_ref, dict):
576
+ ticket_id = ticket_ref.get('id', '').lower()
577
+ else:
578
+ ticket_id = str(ticket_ref).lower()
579
+
580
+ # Common bug fix patterns in ticket IDs
581
+ if any(pattern in ticket_id for pattern in ['bug', 'fix', 'issue', 'defect']):
582
+ return 'bug_fix'
583
+
584
+ # Feature patterns in ticket IDs
585
+ if any(pattern in ticket_id for pattern in ['feat', 'feature', 'epic', 'story']):
586
+ return 'feature'
587
+
588
+ # If no specific enhancement found, return original category
589
+ return category
590
+
591
+ def _get_project_classifications(
592
+ self, project: str, commits: list[dict[str, Any]], ticket_analysis: dict[str, Any]
593
+ ) -> dict[str, int]:
594
+ """Get commit classification breakdown for a specific project.
595
+
596
+ WHY: This method filters classification data to show only commits belonging
597
+ to a specific project, enabling project-specific classification insights
598
+ in the project activity section.
599
+
600
+ DESIGN DECISION: Classify ALL commits (tracked and untracked) for this project
601
+ into proper categories rather than lumping tracked commits as 'tracked_work'.
602
+
603
+ Args:
604
+ project: Project key to filter by
605
+ commits: List of all commits for mapping
606
+ ticket_analysis: Ticket analysis data containing classifications
607
+
608
+ Returns:
609
+ Dictionary mapping category names to commit counts for this project:
610
+ {'feature': 15, 'bug_fix': 8, 'refactor': 5, ...}
611
+ """
612
+ if not isinstance(ticket_analysis, dict):
613
+ return {}
614
+
615
+ project_classifications = {}
616
+
617
+ # First, try to use already classified untracked commits
618
+ untracked_commits = ticket_analysis.get("untracked_commits", [])
619
+ for commit in untracked_commits:
620
+ commit_project = commit.get("project_key", "UNKNOWN")
621
+ if commit_project == project:
622
+ category = commit.get("category", "other")
623
+ if category not in project_classifications:
624
+ project_classifications[category] = 0
625
+ project_classifications[category] += 1
626
+
627
+ # If we have classifications from untracked commits, use those
628
+ if project_classifications:
629
+ return project_classifications
630
+
631
+ # Fallback: If no untracked commits data, classify all commits for this project
632
+ if isinstance(commits, list):
633
+ # Import the ticket extractor for classification
634
+ try:
635
+ from ..extractors.ml_tickets import MLTicketExtractor
636
+ extractor = MLTicketExtractor(enable_ml=True)
637
+ except Exception:
638
+ # Fallback to basic ticket extractor
639
+ from ..extractors.tickets import TicketExtractor
640
+ extractor = TicketExtractor()
641
+
642
+ # Classify all commits for this project
643
+ for commit in commits:
644
+ commit_project = commit.get("project_key", "UNKNOWN")
645
+ if commit_project == project:
646
+ message = commit.get("message", "")
647
+
648
+ # Get files_changed in proper format for classification
649
+ files_changed = commit.get("files_changed", [])
650
+ if isinstance(files_changed, int):
651
+ # If files_changed is just a count, we can't provide file names
652
+ files_changed = []
653
+ elif not isinstance(files_changed, list):
654
+ files_changed = []
655
+
656
+ # Use ticket information to enhance classification for tracked commits
657
+ ticket_refs = commit.get("ticket_references", [])
658
+
659
+ if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
660
+ # Use ML categorization with confidence for tracked commits
661
+ try:
662
+ result = extractor.categorize_commit_with_confidence(message, files_changed)
663
+ category = result['category']
664
+ # For tracked commits with ticket info, try to infer better category from ticket type
665
+ category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
666
+ except Exception:
667
+ # Fallback to basic categorization
668
+ category = extractor.categorize_commit(message)
669
+ else:
670
+ # Use basic categorization for untracked commits
671
+ category = extractor.categorize_commit(message)
672
+
673
+ # Initialize category count if not exists
674
+ if category not in project_classifications:
675
+ project_classifications[category] = 0
676
+
677
+ # Increment category count
678
+ project_classifications[category] += 1
679
+
680
+ return project_classifications
681
+
682
+ def _format_category_name(self, category: str) -> str:
683
+ """Convert internal category names to user-friendly display names.
684
+
685
+ Args:
686
+ category: Internal category name (e.g., 'bug_fix', 'feature', 'refactor')
687
+
688
+ Returns:
689
+ User-friendly display name (e.g., 'Bug Fixes', 'Features', 'Refactoring')
690
+ """
691
+ category_mapping = {
692
+ 'bug_fix': 'Bug Fixes',
693
+ 'feature': 'Features',
694
+ 'refactor': 'Refactoring',
695
+ 'documentation': 'Documentation',
696
+ 'maintenance': 'Maintenance',
697
+ 'test': 'Testing',
698
+ 'style': 'Code Style',
699
+ 'build': 'Build/CI',
700
+ 'other': 'Other'
701
+ }
702
+ return category_mapping.get(category, category.replace('_', ' ').title())
703
+
704
+ def _calculate_weekly_classification_percentages(
705
+ self,
706
+ commits: list[dict[str, Any]],
707
+ developer_id: str = None,
708
+ project_key: str = None,
709
+ weeks: int = 4,
710
+ analysis_start_date: datetime = None,
711
+ analysis_end_date: datetime = None
712
+ ) -> list[dict[str, Any]]:
713
+ """Calculate weekly classification percentages for trend lines.
714
+
715
+ WHY: This method creates detailed week-by-week breakdown of commit classifications
716
+ showing how work type distribution changes over time, providing granular insights
717
+ into development patterns and workload shifts.
718
+
719
+ DESIGN DECISION: Only show weeks that contain actual commit activity within the
720
+ analysis period. This prevents phantom "No activity" weeks for periods outside
721
+ the actual data collection range, providing more accurate and meaningful reports.
722
+
723
+ Args:
724
+ commits: List of all commits with timestamps and classifications
725
+ developer_id: Optional canonical developer ID to filter by
726
+ project_key: Optional project key to filter by
727
+ weeks: Total analysis period in weeks
728
+ analysis_start_date: Analysis period start (from CLI)
729
+ analysis_end_date: Analysis period end (from CLI)
730
+
731
+ Returns:
732
+ List of weekly data dictionaries:
733
+ [
734
+ {
735
+ 'week_start': datetime,
736
+ 'week_display': 'Jul 7-13',
737
+ 'classifications': {'Features': 45.0, 'Bug Fixes': 30.0, 'Maintenance': 25.0},
738
+ 'changes': {'Features': 5.0, 'Bug Fixes': -5.0, 'Maintenance': 0.0},
739
+ 'has_activity': True
740
+ },
741
+ ...
742
+ ]
743
+ """
744
+ if not commits or weeks < 1:
745
+ return []
746
+
747
+ # Filter commits by developer or project if specified
748
+ filtered_commits = []
749
+ for commit in commits:
750
+ if developer_id and commit.get('canonical_id') != developer_id:
751
+ continue
752
+ if project_key and commit.get('project_key') != project_key:
753
+ continue
754
+ filtered_commits.append(commit)
755
+
756
+ # If no commits match the filter, return empty
757
+ if not filtered_commits:
758
+ return []
759
+
760
+ # Determine the analysis period bounds
761
+ if analysis_start_date and analysis_end_date:
762
+ # Use the exact analysis period from the CLI
763
+ analysis_start = analysis_start_date
764
+ analysis_end = analysis_end_date
765
+ else:
766
+ # Fallback: Use the actual date range of the filtered commits
767
+ # This ensures we only show weeks that have potential for activity
768
+ filtered_timestamps = []
769
+ for commit in filtered_commits:
770
+ timestamp = commit.get('timestamp')
771
+ if timestamp:
772
+ # Ensure timezone consistency
773
+ if hasattr(timestamp, 'tzinfo'):
774
+ if timestamp.tzinfo is None:
775
+ timestamp = timestamp.replace(tzinfo=timezone.utc)
776
+ elif timestamp.tzinfo != timezone.utc:
777
+ timestamp = timestamp.astimezone(timezone.utc)
778
+ filtered_timestamps.append(timestamp)
779
+
780
+ if not filtered_timestamps:
781
+ return []
782
+
783
+ # Use the actual range of commits for this developer/project
784
+ analysis_start = min(filtered_timestamps)
785
+ analysis_end = max(filtered_timestamps)
786
+
787
+ # Generate ALL weeks in the analysis period (not just weeks with commits)
788
+ # This ensures complete week coverage from start to end
789
+ # FIX: Only include complete weeks (Monday-Sunday) within the analysis period
790
+ analysis_weeks = []
791
+ current_week_start = self._get_week_start(analysis_start)
792
+
793
+ # Only include weeks where the entire week (including Sunday) is within the analysis period
794
+ while current_week_start <= analysis_end:
795
+ week_end = current_week_start + timedelta(days=6, hours=23, minutes=59, seconds=59)
796
+ # Only include this week if it ends before or on the analysis end date
797
+ if week_end <= analysis_end:
798
+ analysis_weeks.append(current_week_start)
799
+ current_week_start += timedelta(weeks=1)
800
+
801
+ # Group commits by week
802
+ weekly_commits = {}
803
+ for week_start in analysis_weeks:
804
+ weekly_commits[week_start] = []
805
+
806
+ for commit in filtered_commits:
807
+ timestamp = commit.get('timestamp')
808
+ if not timestamp:
809
+ continue
810
+
811
+ # Ensure timezone consistency
812
+ if hasattr(timestamp, 'tzinfo'):
813
+ if timestamp.tzinfo is None:
814
+ timestamp = timestamp.replace(tzinfo=timezone.utc)
815
+ elif timestamp.tzinfo != timezone.utc:
816
+ timestamp = timestamp.astimezone(timezone.utc)
817
+
818
+ # Only include commits within the analysis period bounds
819
+ if analysis_start_date and analysis_end_date and not (analysis_start <= timestamp <= analysis_end):
820
+ continue
821
+
822
+ # Get week start (Monday) for this commit
823
+ commit_week_start = self._get_week_start(timestamp)
824
+
825
+ # Only include commits in weeks we're tracking
826
+ if commit_week_start in weekly_commits:
827
+ weekly_commits[commit_week_start].append(commit)
828
+
829
+ # Import classifiers
830
+ try:
831
+ from ..extractors.ml_tickets import MLTicketExtractor
832
+ extractor = MLTicketExtractor(enable_ml=True)
833
+ except Exception:
834
+ from ..extractors.tickets import TicketExtractor
835
+ extractor = TicketExtractor()
836
+
837
+ # Calculate classifications for each week in the analysis period
838
+ # This includes both weeks with activity and weeks with no commits
839
+ weekly_data = []
840
+ previous_percentages = {}
841
+
842
+ for week_start in analysis_weeks:
843
+ week_commits = weekly_commits[week_start]
844
+ has_activity = len(week_commits) > 0
845
+
846
+ # Classify commits for this week
847
+ week_classifications = {}
848
+ week_percentages = {}
849
+
850
+ if has_activity:
851
+ for commit in week_commits:
852
+ message = commit.get('message', '')
853
+ files_changed = commit.get('files_changed', [])
854
+ if isinstance(files_changed, int) or not isinstance(files_changed, list):
855
+ files_changed = []
856
+
857
+ ticket_refs = commit.get('ticket_references', [])
858
+
859
+ if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
860
+ try:
861
+ result = extractor.categorize_commit_with_confidence(message, files_changed)
862
+ category = result['category']
863
+ category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
864
+ except Exception:
865
+ category = extractor.categorize_commit(message)
866
+ else:
867
+ category = extractor.categorize_commit(message)
868
+
869
+ if category not in week_classifications:
870
+ week_classifications[category] = 0
871
+ week_classifications[category] += 1
872
+
873
+ # Calculate percentages for weeks with activity
874
+ total_commits = sum(week_classifications.values())
875
+ if total_commits > 0:
876
+ for category, count in week_classifications.items():
877
+ percentage = (count / total_commits) * 100
878
+ if percentage >= 5.0: # Only include significant categories
879
+ display_name = self._format_category_name(category)
880
+ week_percentages[display_name] = percentage
881
+
882
+ # Calculate changes from previous week
883
+ changes = {}
884
+ if previous_percentages and week_percentages:
885
+ for category in set(week_percentages.keys()) | set(previous_percentages.keys()):
886
+ current_pct = week_percentages.get(category, 0.0)
887
+ prev_pct = previous_percentages.get(category, 0.0)
888
+ change = current_pct - prev_pct
889
+ if abs(change) >= 1.0: # Only show changes >= 1%
890
+ changes[category] = change
891
+
892
+ # Format week display
893
+ week_end = week_start + timedelta(days=6)
894
+ week_display = f"{week_start.strftime('%b %d')}-{week_end.strftime('%d')}"
895
+
896
+ # Calculate ticket coverage stats for this week
897
+ total_commits_week = len(week_commits)
898
+ commits_with_tickets = sum(1 for commit in week_commits if commit.get('ticket_references'))
899
+ ticket_coverage_pct = (commits_with_tickets / total_commits_week * 100) if total_commits_week > 0 else 0
900
+
901
+ # Calculate activity score for this week
902
+ week_activity_score = 0.0
903
+ if total_commits_week > 0:
904
+ # Aggregate weekly metrics for activity score
905
+ total_lines_added = sum(commit.get('lines_added', 0) for commit in week_commits)
906
+ total_lines_deleted = sum(commit.get('lines_deleted', 0) for commit in week_commits)
907
+ total_files_changed = sum(commit.get('files_changed_count', 0) for commit in week_commits)
908
+
909
+ week_metrics = {
910
+ 'commits': total_commits_week,
911
+ 'prs_involved': 0, # PR data not available in commit data
912
+ 'lines_added': total_lines_added,
913
+ 'lines_removed': total_lines_deleted,
914
+ 'files_changed_count': total_files_changed,
915
+ 'complexity_delta': 0 # Complexity data not available
916
+ }
917
+
918
+ activity_result = self.activity_scorer.calculate_activity_score(week_metrics)
919
+ week_activity_score = activity_result.get('normalized_score', 0.0)
920
+
921
+ weekly_data.append({
922
+ 'week_start': week_start,
923
+ 'week_display': week_display,
924
+ 'classifications': week_percentages,
925
+ 'classification_counts': week_classifications, # Absolute counts
926
+ 'changes': changes,
927
+ 'has_activity': has_activity,
928
+ 'total_commits': total_commits_week,
929
+ 'commits_with_tickets': commits_with_tickets,
930
+ 'ticket_coverage': ticket_coverage_pct,
931
+ 'activity_score': week_activity_score
932
+ })
933
+
934
+ # Update previous percentages only if there was activity
935
+ if has_activity and week_percentages:
936
+ previous_percentages = week_percentages.copy()
937
+
938
+ return weekly_data
939
+
940
+ def _calculate_classification_trends(
941
+ self,
942
+ commits: list[dict[str, Any]],
943
+ developer_id: str = None,
944
+ project_key: str = None,
945
+ weeks: int = 4
946
+ ) -> dict[str, float]:
947
+ """Calculate week-over-week changes in classification percentages.
948
+
949
+ WHY: This method provides trend analysis showing how development patterns
950
+ change over time, helping identify shifts in work type distribution.
951
+
952
+ DESIGN DECISION: Compare the most recent half of the analysis period
953
+ with the earlier half to show meaningful trends. For shorter periods,
954
+ compare week-to-week. Use percentage point changes for clarity.
955
+
956
+ Args:
957
+ commits: List of all commits with timestamps and classifications
958
+ developer_id: Optional canonical developer ID to filter by
959
+ project_key: Optional project key to filter by
960
+ weeks: Total analysis period in weeks
961
+
962
+ Returns:
963
+ Dictionary mapping category names to percentage point changes:
964
+ {'Features': 15.2, 'Bug Fixes': -8.1, 'Refactoring': 3.4}
965
+ Positive values indicate increases, negative indicate decreases.
966
+ """
967
+ if not commits or len(commits) < 2:
968
+ return {}
969
+
970
+ # Filter commits by developer or project if specified
971
+ filtered_commits = []
972
+ for commit in commits:
973
+ if developer_id and commit.get('canonical_id') != developer_id:
974
+ continue
975
+ if project_key and commit.get('project_key') != project_key:
976
+ continue
977
+ filtered_commits.append(commit)
978
+
979
+ if len(filtered_commits) < 2:
980
+ return {}
981
+
982
+ # Sort commits by timestamp
983
+ def safe_timestamp_key(commit):
984
+ ts = commit.get('timestamp')
985
+ if ts is None:
986
+ return datetime.min.replace(tzinfo=timezone.utc)
987
+ if hasattr(ts, 'tzinfo'):
988
+ if ts.tzinfo is None:
989
+ ts = ts.replace(tzinfo=timezone.utc)
990
+ return ts
991
+ return ts
992
+
993
+ sorted_commits = sorted(filtered_commits, key=safe_timestamp_key)
994
+
995
+ if len(sorted_commits) < 4: # Need at least 4 commits for meaningful trend
996
+ return {}
997
+
998
+ # Determine time split strategy based on analysis period
999
+ if weeks <= 2:
1000
+ # For short periods (1-2 weeks), compare last 3 days vs previous 3+ days
1001
+ cutoff_days = 3
1002
+ elif weeks <= 4:
1003
+ # For 3-4 week periods, compare last week vs previous weeks
1004
+ cutoff_days = 7
1005
+ else:
1006
+ # For longer periods, compare recent half vs older half
1007
+ cutoff_days = (weeks * 7) // 2
1008
+
1009
+ # Calculate cutoff timestamp
1010
+ latest_timestamp = safe_timestamp_key(sorted_commits[-1])
1011
+ cutoff_timestamp = latest_timestamp - timedelta(days=cutoff_days)
1012
+
1013
+ # Split commits into recent and previous periods
1014
+ recent_commits = [c for c in sorted_commits if safe_timestamp_key(c) >= cutoff_timestamp]
1015
+ previous_commits = [c for c in sorted_commits if safe_timestamp_key(c) < cutoff_timestamp]
1016
+
1017
+ if not recent_commits or not previous_commits:
1018
+ return {}
1019
+
1020
+ # Classify commits for both periods
1021
+ def get_period_classifications(period_commits):
1022
+ period_classifications = {}
1023
+
1024
+ # Import classifiers
1025
+ try:
1026
+ from ..extractors.ml_tickets import MLTicketExtractor
1027
+ extractor = MLTicketExtractor(enable_ml=True)
1028
+ except Exception:
1029
+ from ..extractors.tickets import TicketExtractor
1030
+ extractor = TicketExtractor()
1031
+
1032
+ for commit in period_commits:
1033
+ message = commit.get('message', '')
1034
+ files_changed = commit.get('files_changed', [])
1035
+ if isinstance(files_changed, int) or not isinstance(files_changed, list):
1036
+ files_changed = []
1037
+
1038
+ # Get ticket info for enhancement
1039
+ ticket_refs = commit.get('ticket_references', [])
1040
+
1041
+ if ticket_refs and hasattr(extractor, 'categorize_commit_with_confidence'):
1042
+ try:
1043
+ result = extractor.categorize_commit_with_confidence(message, files_changed)
1044
+ category = result['category']
1045
+ category = self._enhance_category_with_ticket_info(category, ticket_refs, message)
1046
+ except Exception:
1047
+ category = extractor.categorize_commit(message)
1048
+ else:
1049
+ category = extractor.categorize_commit(message)
1050
+
1051
+ if category not in period_classifications:
1052
+ period_classifications[category] = 0
1053
+ period_classifications[category] += 1
1054
+
1055
+ return period_classifications
1056
+
1057
+ recent_classifications = get_period_classifications(recent_commits)
1058
+ previous_classifications = get_period_classifications(previous_commits)
1059
+
1060
+ # Calculate percentage changes
1061
+ trends = {}
1062
+ all_categories = set(recent_classifications.keys()) | set(previous_classifications.keys())
1063
+
1064
+ total_recent = sum(recent_classifications.values())
1065
+ total_previous = sum(previous_classifications.values())
1066
+
1067
+ if total_recent == 0 or total_previous == 0:
1068
+ return {}
1069
+
1070
+ for category in all_categories:
1071
+ recent_count = recent_classifications.get(category, 0)
1072
+ previous_count = previous_classifications.get(category, 0)
1073
+
1074
+ recent_pct = (recent_count / total_recent) * 100
1075
+ previous_pct = (previous_count / total_previous) * 100
1076
+
1077
+ change = recent_pct - previous_pct
1078
+
1079
+ # Only include significant changes (>= 5% absolute change)
1080
+ if abs(change) >= 5.0:
1081
+ display_name = self._format_category_name(category)
1082
+ trends[display_name] = change
1083
+
1084
+ return trends
1085
+
1086
+ def _format_trend_line(self, trends: dict[str, float], prefix: str = "📈 Trends") -> str:
1087
+ """Format trend data into a readable line with appropriate icons.
1088
+
1089
+ WHY: This method provides consistent formatting for trend display across
1090
+ different sections of the report, using visual indicators to highlight
1091
+ increases, decreases, and overall patterns.
1092
+
1093
+ Args:
1094
+ trends: Dictionary of category name to percentage change
1095
+ prefix: Text prefix for the trend line
1096
+
1097
+ Returns:
1098
+ Formatted trend line string, or empty string if no significant trends
1099
+ """
1100
+ if not trends:
1101
+ return ""
1102
+
1103
+ # Sort by absolute change magnitude (largest first)
1104
+ sorted_trends = sorted(trends.items(), key=lambda x: abs(x[1]), reverse=True)
1105
+
1106
+ trend_parts = []
1107
+ for category, change in sorted_trends[:4]: # Show top 4 trends
1108
+ if change > 0:
1109
+ icon = "⬆️"
1110
+ sign = "+"
1111
+ else:
1112
+ icon = "⬇️"
1113
+ sign = ""
1114
+
1115
+ trend_parts.append(f"{category} {icon}{sign}{change:.0f}%")
1116
+
1117
+ if trend_parts:
1118
+ return f"{prefix}: {', '.join(trend_parts)}"
1119
+
1120
+ return ""
1121
+
1122
+ def _write_weekly_trend_lines(
1123
+ self,
1124
+ report: StringIO,
1125
+ weekly_trends: list[dict[str, Any]],
1126
+ prefix: str = ""
1127
+ ) -> None:
1128
+ """Write weekly trend lines showing week-by-week classification changes.
106
1129
 
1130
+ WHY: This method provides detailed weekly breakdown of work patterns,
1131
+ showing how development focus shifts over time with specific percentages
1132
+ and change indicators from previous weeks. Shows ALL weeks in the analysis
1133
+ period, including weeks with no activity for complete timeline coverage.
1134
+
1135
+ Args:
1136
+ report: StringIO buffer to write to
1137
+ weekly_trends: List of weekly classification data (all weeks in period)
1138
+ prefix: Optional prefix for the trend section (e.g., "Project ")
1139
+ """
1140
+ if not weekly_trends:
1141
+ return
1142
+
1143
+ report.write(f"- {prefix}Weekly Trends:\n")
1144
+
1145
+ for i, week_data in enumerate(weekly_trends):
1146
+ week_display = week_data['week_display']
1147
+ classifications = week_data['classifications']
1148
+ changes = week_data['changes']
1149
+ has_activity = week_data.get('has_activity', True)
1150
+
1151
+ # Get additional data from week_data
1152
+ classification_counts = week_data.get('classification_counts', {})
1153
+ total_commits = week_data.get('total_commits', 0)
1154
+ commits_with_tickets = week_data.get('commits_with_tickets', 0)
1155
+ ticket_coverage = week_data.get('ticket_coverage', 0)
1156
+ activity_score = week_data.get('activity_score', 0.0)
1157
+
1158
+ # Handle weeks with no activity
1159
+ if not classifications and not has_activity:
1160
+ report.write(f" - Week {i+1} ({week_display}): No activity\n")
1161
+ continue
1162
+ elif not classifications:
1163
+ # Should not happen, but handle gracefully
1164
+ continue
1165
+
1166
+ # Format classifications with absolute numbers and percentages
1167
+ classification_parts = []
1168
+ for category in sorted(classifications.keys()):
1169
+ percentage = classifications[category]
1170
+
1171
+ # Find the count for this formatted category name by reverse mapping
1172
+ count = 0
1173
+ for raw_category, raw_count in classification_counts.items():
1174
+ if self._format_category_name(raw_category) == category:
1175
+ count = raw_count
1176
+ break
1177
+
1178
+ change = changes.get(category, 0.0)
1179
+
1180
+ if i == 0 or abs(change) < 1.0:
1181
+ # First week or no significant change - show count and percentage
1182
+ classification_parts.append(f"{category} {count} ({percentage:.0f}%)")
1183
+ else:
1184
+ # Show change from previous week
1185
+ change_indicator = f"(+{change:.0f}%)" if change > 0 else f"({change:.0f}%)"
1186
+ classification_parts.append(f"{category} {count} ({percentage:.0f}% {change_indicator})")
1187
+
1188
+ if classification_parts:
1189
+ classifications_text = ", ".join(classification_parts)
1190
+ # Add total commits, ticket coverage, and activity score to the week summary
1191
+ if total_commits > 0:
1192
+ ticket_info = f" | {commits_with_tickets}/{total_commits} tickets ({ticket_coverage:.0f}%)" if commits_with_tickets > 0 else f" | 0/{total_commits} tickets (0%)"
1193
+ activity_info = f" | Activity: {activity_score:.1f}/100"
1194
+ report.write(f" - Week {i+1} ({week_display}): {classifications_text}{ticket_info}{activity_info}\n")
1195
+ else:
1196
+ report.write(f" - Week {i+1} ({week_display}): {classifications_text}\n")
1197
+ else:
1198
+ # Fallback in case classifications exist but are empty
1199
+ report.write(f" - Week {i+1} ({week_display}): No significant activity\n")
1200
+
1201
+ # Add a blank line after trend lines for spacing
1202
+ # (Note: Don't add extra newline here as the caller will handle spacing)
1203
+
1204
+ def _write_team_composition(
1205
+ self,
1206
+ report: StringIO,
1207
+ developer_stats: list[dict[str, Any]],
1208
+ focus_data: list[dict[str, Any]],
1209
+ commits: list[dict[str, Any]] = None,
1210
+ prs: list[dict[str, Any]] = None,
1211
+ ticket_analysis: dict[str, Any] = None,
1212
+ weeks: int = 4,
1213
+ ) -> None:
1214
+ """Write team composition analysis with activity scores and commit classifications.
1215
+
1216
+ WHY: Enhanced team composition shows not just how much each developer commits,
1217
+ but what types of work they're doing. This provides actionable insights into
1218
+ developer specializations, training needs, and work distribution patterns.
1219
+ """
1220
+ report.write("### Developer Profiles\n\n")
1221
+
107
1222
  # Create developer lookup for focus data
108
- focus_lookup = {d['developer']: d for d in focus_data}
1223
+ focus_lookup = {d["developer"]: d for d in focus_data}
1224
+
1225
+ # Calculate activity scores for all developers
1226
+ activity_scores = {}
1227
+ dev_metrics = {} # Initialize outside if block to ensure it's always defined
1228
+
1229
+ if commits:
1230
+ # Aggregate metrics by developer
1231
+ for commit in commits:
1232
+ canonical_id = commit.get("canonical_id", "")
1233
+ if canonical_id not in dev_metrics:
1234
+ dev_metrics[canonical_id] = {
1235
+ "commits": 0,
1236
+ "lines_added": 0,
1237
+ "lines_removed": 0,
1238
+ "files_changed": set(),
1239
+ "complexity_delta": 0,
1240
+ "prs_involved": 0,
1241
+ }
1242
+
1243
+ metrics = dev_metrics[canonical_id]
1244
+ metrics["commits"] += 1
1245
+ metrics["lines_added"] += commit.get(
1246
+ "filtered_insertions", commit.get("insertions", 0)
1247
+ ) or 0
1248
+ metrics["lines_removed"] += commit.get(
1249
+ "filtered_deletions", commit.get("deletions", 0)
1250
+ ) or 0
1251
+ metrics["complexity_delta"] += commit.get("complexity_delta", 0) or 0
1252
+
1253
+ # Track unique files
1254
+ files = commit.get("files_changed", [])
1255
+ if isinstance(files, list):
1256
+ # Only update if metrics["files_changed"] is still a set
1257
+ if isinstance(metrics["files_changed"], set):
1258
+ metrics["files_changed"].update(files)
1259
+ else:
1260
+ # If it's already an int, convert back to set and update
1261
+ metrics["files_changed"] = set()
1262
+ metrics["files_changed"].update(files)
1263
+ elif isinstance(files, int):
1264
+ # If it's already aggregated, just add the count
1265
+ if isinstance(metrics["files_changed"], set):
1266
+ metrics["files_changed"] = len(metrics["files_changed"]) + files
1267
+ else:
1268
+ metrics["files_changed"] += files
1269
+
1270
+ # Count PRs per developer
1271
+ if prs:
1272
+ for pr in prs:
1273
+ author = pr.get("author", "")
1274
+ # Map PR author to canonical ID - need to look up in developer_stats
1275
+ for dev in developer_stats:
1276
+ if (
1277
+ dev.get("github_username") == author
1278
+ or dev.get("primary_name") == author
1279
+ ):
1280
+ canonical_id = dev.get("canonical_id")
1281
+ if canonical_id in dev_metrics:
1282
+ dev_metrics[canonical_id]["prs_involved"] += 1
1283
+ break
1284
+
1285
+ # Calculate scores
1286
+ raw_scores_for_curve = {}
1287
+ for canonical_id, metrics in dev_metrics.items():
1288
+ # Convert set to count
1289
+ if isinstance(metrics["files_changed"], set):
1290
+ metrics["files_changed"] = len(metrics["files_changed"])
1291
+
1292
+ score_result = self.activity_scorer.calculate_activity_score(metrics)
1293
+ activity_scores[canonical_id] = score_result
1294
+ raw_scores_for_curve[canonical_id] = score_result["raw_score"]
1295
+
1296
+ # Apply curve normalization
1297
+ curve_normalized = self.activity_scorer.normalize_scores_on_curve(raw_scores_for_curve)
1298
+
1299
+ # Update activity scores with curve data
1300
+ for canonical_id, curve_data in curve_normalized.items():
1301
+ if canonical_id in activity_scores:
1302
+ activity_scores[canonical_id]["curve_data"] = curve_data
1303
+
1304
+ # Calculate team scores for relative ranking
1305
+ all_scores = [score["raw_score"] for score in activity_scores.values()]
1306
+
1307
+ # Consolidate developer_stats by canonical_id to avoid duplicates from identity aliasing
1308
+ consolidated_devs = {}
1309
+ for dev in developer_stats:
1310
+ canonical_id = dev.get("canonical_id")
1311
+ if canonical_id and canonical_id not in consolidated_devs:
1312
+ consolidated_devs[canonical_id] = dev
1313
+
1314
+ # BUGFIX: Only include developers who have commits in the analysis period
1315
+ # Filter using dev_metrics (period-specific) instead of developer_stats (all-time)
1316
+ active_devs = {}
109
1317
 
110
- for dev in developer_stats[:10]: # Top 10 developers
111
- name = dev['primary_name']
112
- commits = dev['total_commits']
1318
+ # Only process developers if we have commit data for the period
1319
+ for canonical_id, dev in consolidated_devs.items():
1320
+ # Only include developers who have commits in the current analysis period
1321
+ if canonical_id in dev_metrics:
1322
+ active_devs[canonical_id] = dev
1323
+ # If no commits in period, no developers will be shown
1324
+ # (This handles the case where all commits are outside the analysis period)
1325
+
1326
+ for canonical_id, dev in active_devs.items(): # Only developers with commits in period
1327
+ # Handle both 'primary_name' (production) and 'name' (tests) for backward compatibility
1328
+ name = dev.get("primary_name", dev.get("name", "Unknown Developer"))
113
1329
 
1330
+ # BUGFIX: Use period-specific commit count instead of all-time total
1331
+ # Safety check: dev_metrics should exist if we got here, but be defensive
1332
+ if canonical_id in dev_metrics:
1333
+ period_commits = dev_metrics[canonical_id]["commits"]
1334
+ total_commits = period_commits # For backward compatibility with existing logic
1335
+ else:
1336
+ # Fallback (shouldn't happen with the filtering above)
1337
+ total_commits = 0
1338
+
114
1339
  report.write(f"**{name}**\n")
115
- report.write(f"- Commits: {commits}\n")
116
1340
 
1341
+ # Try to get commit classification breakdown if available
1342
+ if ticket_analysis:
1343
+ classifications = self._aggregate_commit_classifications(
1344
+ ticket_analysis, commits, developer_stats
1345
+ )
1346
+ dev_classifications = classifications.get(canonical_id, {})
1347
+
1348
+ if dev_classifications:
1349
+ # Sort categories by count (descending)
1350
+ sorted_categories = sorted(
1351
+ dev_classifications.items(),
1352
+ key=lambda x: x[1],
1353
+ reverse=True
1354
+ )
1355
+
1356
+ # Format as "Features: 15 (45%), Bug Fixes: 8 (24%), etc."
1357
+ total_classified = sum(dev_classifications.values())
1358
+ if total_classified > 0:
1359
+ category_parts = []
1360
+ for category, count in sorted_categories:
1361
+ pct = (count / total_classified) * 100
1362
+ display_name = self._format_category_name(category)
1363
+ category_parts.append(f"{display_name}: {count} ({pct:.0f}%)")
1364
+
1365
+ # Show top categories (limit to avoid excessive length)
1366
+ max_categories = 5
1367
+ if len(category_parts) > max_categories:
1368
+ shown_parts = category_parts[:max_categories]
1369
+ remaining = len(category_parts) - max_categories
1370
+ shown_parts.append(f"({remaining} more)")
1371
+ category_display = ", ".join(shown_parts)
1372
+ else:
1373
+ category_display = ", ".join(category_parts)
1374
+
1375
+ # Calculate ticket coverage for this developer
1376
+ ticket_coverage_pct = dev.get("ticket_coverage_pct", 0)
1377
+ report.write(f"- Commits: {category_display}\n")
1378
+ report.write(f"- Ticket Coverage: {ticket_coverage_pct:.1f}%\n")
1379
+
1380
+ # Add weekly trend lines if available
1381
+ if commits:
1382
+ weekly_trends = self._calculate_weekly_classification_percentages(
1383
+ commits, developer_id=canonical_id, weeks=weeks,
1384
+ analysis_start_date=self._analysis_start_date,
1385
+ analysis_end_date=self._analysis_end_date
1386
+ )
1387
+ if weekly_trends:
1388
+ self._write_weekly_trend_lines(report, weekly_trends)
1389
+ else:
1390
+ # Fallback to simple trend analysis
1391
+ trends = self._calculate_classification_trends(
1392
+ commits, developer_id=canonical_id, weeks=weeks
1393
+ )
1394
+ trend_line = self._format_trend_line(trends)
1395
+ if trend_line:
1396
+ report.write(f"- {trend_line}\n")
1397
+ else:
1398
+ # Fallback to simple count if no classifications
1399
+ ticket_coverage_pct = dev.get("ticket_coverage_pct", 0)
1400
+ report.write(f"- Commits: {total_commits}\n")
1401
+ report.write(f"- Ticket Coverage: {ticket_coverage_pct:.1f}%\n")
1402
+
1403
+ # Still try to add weekly trend lines for simple commits
1404
+ if commits:
1405
+ weekly_trends = self._calculate_weekly_classification_percentages(
1406
+ commits, developer_id=canonical_id, weeks=weeks,
1407
+ analysis_start_date=self._analysis_start_date,
1408
+ analysis_end_date=self._analysis_end_date
1409
+ )
1410
+ if weekly_trends:
1411
+ self._write_weekly_trend_lines(report, weekly_trends)
1412
+ else:
1413
+ # Fallback to simple trend analysis
1414
+ trends = self._calculate_classification_trends(
1415
+ commits, developer_id=canonical_id, weeks=weeks
1416
+ )
1417
+ trend_line = self._format_trend_line(trends)
1418
+ if trend_line:
1419
+ report.write(f"- {trend_line}\n")
1420
+ else:
1421
+ # Fallback to simple count if no classification data for this developer
1422
+ ticket_coverage_pct = dev.get("ticket_coverage_pct", 0)
1423
+ report.write(f"- Commits: {total_commits}\n")
1424
+ report.write(f"- Ticket Coverage: {ticket_coverage_pct:.1f}%\n")
1425
+
1426
+ # Still try to add weekly trend lines
1427
+ if commits:
1428
+ weekly_trends = self._calculate_weekly_classification_percentages(
1429
+ commits, developer_id=canonical_id, weeks=weeks,
1430
+ analysis_start_date=self._analysis_start_date,
1431
+ analysis_end_date=self._analysis_end_date
1432
+ )
1433
+ if weekly_trends:
1434
+ self._write_weekly_trend_lines(report, weekly_trends)
1435
+ else:
1436
+ # Fallback to simple trend analysis
1437
+ trends = self._calculate_classification_trends(
1438
+ commits, developer_id=canonical_id, weeks=weeks
1439
+ )
1440
+ trend_line = self._format_trend_line(trends)
1441
+ if trend_line:
1442
+ report.write(f"- {trend_line}\n")
1443
+ else:
1444
+ # Fallback to simple count if no ticket analysis available
1445
+ report.write(f"- Commits: {total_commits}\n")
1446
+ # No ticket coverage info available in this case
1447
+
1448
+ # Still try to add weekly trend lines if commits available
1449
+ if commits:
1450
+ weekly_trends = self._calculate_weekly_classification_percentages(
1451
+ commits, developer_id=canonical_id, weeks=weeks,
1452
+ analysis_start_date=self._analysis_start_date,
1453
+ analysis_end_date=self._analysis_end_date
1454
+ )
1455
+ if weekly_trends:
1456
+ self._write_weekly_trend_lines(report, weekly_trends)
1457
+ else:
1458
+ # Fallback to simple trend analysis
1459
+ trends = self._calculate_classification_trends(
1460
+ commits, developer_id=canonical_id, weeks=weeks
1461
+ )
1462
+ trend_line = self._format_trend_line(trends)
1463
+ if trend_line:
1464
+ report.write(f"- {trend_line}\n")
1465
+
1466
+ # Add activity score if available
1467
+ if canonical_id and canonical_id in activity_scores:
1468
+ score_data = activity_scores[canonical_id]
1469
+
1470
+ # Use curve data if available, otherwise fall back to relative scoring
1471
+ if "curve_data" in score_data:
1472
+ curve_data = score_data["curve_data"]
1473
+ report.write(
1474
+ f"- Activity Score: {curve_data['curved_score']:.1f}/100 "
1475
+ f"({curve_data['activity_level']}, {curve_data['level_description']})\n"
1476
+ )
1477
+ else:
1478
+ relative_data = self.activity_scorer.calculate_team_relative_score(
1479
+ score_data["raw_score"], all_scores
1480
+ )
1481
+ report.write(
1482
+ f"- Activity Score: {score_data['normalized_score']:.1f}/100 "
1483
+ f"({score_data['activity_level']}, {relative_data['percentile']:.0f}th percentile)\n"
1484
+ )
1485
+
117
1486
  # Add focus data if available
118
1487
  if name in focus_lookup:
119
1488
  focus = focus_lookup[name]
120
- report.write(f"- Primary Project: {focus['primary_project']} ")
121
- report.write(f"({focus['primary_project_pct']:.1f}% of time)\n")
1489
+
1490
+ # Get all projects for this developer - check for both naming patterns
1491
+ project_percentages = []
1492
+
1493
+ # First try the _dev_pct pattern - use 0.05 threshold to include small percentages but filter out noise
1494
+ for key in focus:
1495
+ if key.endswith("_dev_pct") and focus[key] > 0.05:
1496
+ project_name = key.replace("_dev_pct", "")
1497
+ project_percentages.append((project_name, focus[key]))
1498
+
1499
+ # If no _dev_pct found, try _pct pattern
1500
+ if not project_percentages:
1501
+ for key in focus:
1502
+ if (
1503
+ key.endswith("_pct")
1504
+ and not key.startswith("primary_")
1505
+ and focus[key] > 0.05
1506
+ ):
1507
+ project_name = key.replace("_pct", "")
1508
+ project_percentages.append((project_name, focus[key]))
1509
+
1510
+ # Sort by percentage descending
1511
+ project_percentages.sort(key=lambda x: x[1], reverse=True)
1512
+
1513
+ # Build projects string - show all projects above threshold with percentages
1514
+ if project_percentages:
1515
+ projects_str = ", ".join(
1516
+ f"{proj} ({pct:.1f}%)" for proj, pct in project_percentages
1517
+ )
1518
+ report.write(f"- Projects: {projects_str}\n")
1519
+ else:
1520
+ # Fallback to primary project if no percentage fields found above threshold
1521
+ primary_project = focus.get("primary_project", "UNKNOWN")
1522
+ primary_pct = focus.get("primary_project_pct", 0)
1523
+ if primary_pct > 0.05: # Apply same threshold to fallback
1524
+ report.write(f"- Projects: {primary_project} ({primary_pct:.1f}%)\n")
1525
+ else:
1526
+ # If even primary project is below threshold, show it anyway to avoid empty projects
1527
+ report.write(f"- Projects: {primary_project} ({primary_pct:.1f}%)\n")
1528
+
122
1529
  report.write(f"- Work Style: {focus['work_style']}\n")
123
1530
  report.write(f"- Active Pattern: {focus['time_pattern']}\n")
124
-
1531
+
125
1532
  report.write("\n")
126
-
127
- def _write_project_activity(self, report: StringIO, activity_dist: List[Dict[str, Any]],
128
- commits: List[Dict[str, Any]]) -> None:
129
- """Write project activity breakdown."""
130
- # Aggregate by project
131
- project_totals: Dict[str, Dict[str, Any]] = {}
1533
+
1534
+ def _write_project_activity(
1535
+ self, report: StringIO, activity_dist: list[dict[str, Any]], commits: list[dict[str, Any]],
1536
+ branch_health_metrics: dict[str, dict[str, Any]] = None,
1537
+ ticket_analysis: dict[str, Any] = None,
1538
+ weeks: int = 4
1539
+ ) -> None:
1540
+ """Write project activity breakdown with commit classifications.
1541
+
1542
+ WHY: Enhanced project activity section now includes commit classification
1543
+ breakdown per project, providing insights into what types of work are
1544
+ happening in each project (features, bug fixes, refactoring, etc.).
1545
+ This helps identify project-specific development patterns.
1546
+ """
1547
+ # Aggregate by project with developer details
1548
+ project_totals: dict[str, dict[str, Any]] = {}
1549
+ project_developers: dict[str, dict[str, int]] = {}
1550
+
132
1551
  for row in activity_dist:
133
- project = row['project']
1552
+ # Handle missing fields gracefully for test compatibility
1553
+ project = row.get("project", "UNKNOWN")
1554
+ developer = row.get("developer", "Unknown Developer")
1555
+
134
1556
  if project not in project_totals:
135
- project_totals[project] = {
136
- 'commits': 0,
137
- 'lines': 0,
138
- 'developers': set()
139
- }
1557
+ project_totals[project] = {"commits": 0, "lines": 0, "developers": set()}
1558
+ project_developers[project] = {}
1559
+
140
1560
  data = project_totals[project]
141
- data['commits'] += row['commits']
142
- data['lines'] += row['lines_changed']
143
- developers_set: Set[str] = data['developers']
144
- developers_set.add(row['developer'])
145
-
1561
+ # Handle missing fields gracefully for test compatibility
1562
+ data["commits"] += row.get("commits", 1) # Default to 1 if missing
1563
+ data["lines"] += row.get("lines_changed", 0)
1564
+ developers_set: set[str] = data["developers"]
1565
+ developers_set.add(developer)
1566
+
1567
+ # Track commits per developer per project
1568
+ if developer not in project_developers[project]:
1569
+ project_developers[project][developer] = 0
1570
+ project_developers[project][developer] += row.get(
1571
+ "commits", 1
1572
+ ) # Default to 1 if missing
1573
+
146
1574
  # Sort by commits
147
- sorted_projects = sorted(project_totals.items(),
148
- key=lambda x: x[1]['commits'], reverse=True)
149
-
1575
+ sorted_projects = sorted(
1576
+ project_totals.items(), key=lambda x: x[1]["commits"], reverse=True
1577
+ )
1578
+
1579
+ # Calculate total commits across all projects in activity distribution
1580
+ total_activity_commits = sum(data["commits"] for data in project_totals.values())
1581
+
150
1582
  report.write("### Activity by Project\n\n")
151
1583
  for project, data in sorted_projects:
152
1584
  report.write(f"**{project}**\n")
153
1585
  report.write(f"- Commits: {data['commits']} ")
154
- report.write(f"({data['commits'] / len(commits) * 100:.1f}% of total)\n")
1586
+ report.write(f"({data['commits'] / total_activity_commits * 100:.1f}% of total)\n")
155
1587
  report.write(f"- Lines Changed: {data['lines']:,}\n")
156
- report.write(f"- Active Developers: {len(data['developers'])}\n\n")
157
-
158
- def _write_development_patterns(self, report: StringIO, insights: List[Dict[str, Any]],
159
- focus_data: List[Dict[str, Any]]) -> None:
1588
+
1589
+ # Get developer contributions for this project
1590
+ dev_contributions = project_developers[project]
1591
+ # Sort by commits descending
1592
+ sorted_devs = sorted(dev_contributions.items(), key=lambda x: x[1], reverse=True)
1593
+
1594
+ # Build contributors string
1595
+ contributors = []
1596
+ for dev_name, dev_commits in sorted_devs:
1597
+ dev_pct = dev_commits / data["commits"] * 100
1598
+ contributors.append(f"{dev_name} ({dev_pct:.1f}%)")
1599
+
1600
+ contributors_str = ", ".join(contributors)
1601
+ report.write(f"- Contributors: {contributors_str}\n")
1602
+
1603
+ # Add commit classification breakdown for this project
1604
+ if ticket_analysis:
1605
+ project_classifications = self._get_project_classifications(project, commits, ticket_analysis)
1606
+ if project_classifications:
1607
+ # Sort categories by count (descending)
1608
+ sorted_categories = sorted(
1609
+ project_classifications.items(),
1610
+ key=lambda x: x[1],
1611
+ reverse=True
1612
+ )
1613
+
1614
+ # Calculate total for percentages
1615
+ total_classified = sum(project_classifications.values())
1616
+ if total_classified > 0:
1617
+ category_parts = []
1618
+ for category, count in sorted_categories:
1619
+ pct = (count / total_classified) * 100
1620
+ display_name = self._format_category_name(category)
1621
+ category_parts.append(f"{display_name}: {count} ({pct:.0f}%)")
1622
+
1623
+ # Show top categories to avoid excessive length
1624
+ max_categories = 4
1625
+ if len(category_parts) > max_categories:
1626
+ shown_parts = category_parts[:max_categories]
1627
+ remaining = len(category_parts) - max_categories
1628
+ shown_parts.append(f"({remaining} more)")
1629
+ category_display = ", ".join(shown_parts)
1630
+ else:
1631
+ category_display = ", ".join(category_parts)
1632
+
1633
+ report.write(f"- Classifications: {category_display}\n")
1634
+
1635
+ # Add project-level weekly trend lines
1636
+ if commits:
1637
+ project_weekly_trends = self._calculate_weekly_classification_percentages(
1638
+ commits, project_key=project, weeks=weeks,
1639
+ analysis_start_date=self._analysis_start_date,
1640
+ analysis_end_date=self._analysis_end_date
1641
+ )
1642
+ if project_weekly_trends:
1643
+ self._write_weekly_trend_lines(report, project_weekly_trends, "Project ")
1644
+ else:
1645
+ # Fallback to simple project trend analysis
1646
+ project_trends = self._calculate_classification_trends(
1647
+ commits, project_key=project, weeks=weeks
1648
+ )
1649
+ project_trend_line = self._format_trend_line(
1650
+ project_trends, prefix="📊 Weekly Trend"
1651
+ )
1652
+ if project_trend_line:
1653
+ report.write(f"- {project_trend_line}\n")
1654
+
1655
+ # Add branch health for this project/repository if available
1656
+ if branch_health_metrics and project in branch_health_metrics:
1657
+ repo_health = branch_health_metrics[project]
1658
+ summary = repo_health.get("summary", {})
1659
+ health_indicators = repo_health.get("health_indicators", {})
1660
+ branches = repo_health.get("branches", [])
1661
+
1662
+ health_score = health_indicators.get("overall_health_score", 0)
1663
+ total_branches = summary.get("total_branches", 0)
1664
+ stale_branches = summary.get("stale_branches", 0)
1665
+ active_branches = summary.get("active_branches", 0)
1666
+ long_lived_branches = summary.get("long_lived_branches", 0)
1667
+
1668
+ # Determine health status
1669
+ if health_score >= 80:
1670
+ status_emoji = "🟢"
1671
+ status_text = "Excellent"
1672
+ elif health_score >= 60:
1673
+ status_emoji = "🟡"
1674
+ status_text = "Good"
1675
+ elif health_score >= 40:
1676
+ status_emoji = "🟠"
1677
+ status_text = "Fair"
1678
+ else:
1679
+ status_emoji = "🔴"
1680
+ status_text = "Needs Attention"
1681
+
1682
+ report.write("\n**Branch Management**\n")
1683
+ report.write(f"- Overall Health: {status_emoji} {status_text} ({health_score:.0f}/100)\n")
1684
+ report.write(f"- Total Branches: {total_branches}\n")
1685
+ report.write(f" - Active: {active_branches} branches\n")
1686
+ report.write(f" - Long-lived: {long_lived_branches} branches (>30 days)\n")
1687
+ report.write(f" - Stale: {stale_branches} branches (>90 days)\n")
1688
+
1689
+ # Show top problematic branches if any
1690
+ if branches:
1691
+ # Sort branches by health score (ascending) to get worst first
1692
+ problem_branches = [b for b in branches if b.get("health_score", 100) < 60 and not b.get("is_merged", False)]
1693
+ problem_branches.sort(key=lambda x: x.get("health_score", 100))
1694
+
1695
+ if problem_branches:
1696
+ report.write("\n**Branches Needing Attention**:\n")
1697
+ for i, branch in enumerate(problem_branches[:3]): # Show top 3
1698
+ name = branch.get("name", "unknown")
1699
+ age = branch.get("age_days", 0)
1700
+ behind = branch.get("behind_main", 0)
1701
+ ahead = branch.get("ahead_of_main", 0)
1702
+ score = branch.get("health_score", 0)
1703
+
1704
+ report.write(f" {i+1}. `{name}` (score: {score:.0f}/100)\n")
1705
+ report.write(f" - Age: {age} days\n")
1706
+ if behind > 0:
1707
+ report.write(f" - Behind main: {behind} commits\n")
1708
+ if ahead > 0:
1709
+ report.write(f" - Ahead of main: {ahead} commits\n")
1710
+
1711
+ # Add recommendations
1712
+ recommendations = repo_health.get("recommendations", [])
1713
+ if recommendations:
1714
+ report.write("\n**Recommended Actions**:\n")
1715
+ for rec in recommendations[:3]: # Show top 3 recommendations
1716
+ report.write(f"- {rec}\n")
1717
+
1718
+ report.write("\n")
1719
+
1720
+ def _get_week_start(self, date: datetime) -> datetime:
1721
+ """Get Monday of the week for a given date."""
1722
+ # Ensure consistent timezone handling - keep timezone info
1723
+ if hasattr(date, "tzinfo") and date.tzinfo is not None:
1724
+ # Keep timezone-aware but ensure it's UTC
1725
+ if date.tzinfo != timezone.utc:
1726
+ date = date.astimezone(timezone.utc)
1727
+ else:
1728
+ # Convert naive datetime to UTC timezone-aware
1729
+ date = date.replace(tzinfo=timezone.utc)
1730
+
1731
+ days_since_monday = date.weekday()
1732
+ monday = date - timedelta(days=days_since_monday)
1733
+ result = monday.replace(hour=0, minute=0, second=0, microsecond=0)
1734
+
1735
+ return result
1736
+
1737
+ def _write_development_patterns(
1738
+ self, report: StringIO, insights: list[dict[str, Any]], focus_data: list[dict[str, Any]]
1739
+ ) -> None:
160
1740
  """Write development patterns analysis."""
161
1741
  report.write("### Key Patterns Identified\n\n")
162
-
163
- # Group insights by category
164
- by_category: Dict[str, List[Dict[str, Any]]] = {}
1742
+
1743
+ # Group insights by category (handle missing category field gracefully)
1744
+ by_category: dict[str, list[dict[str, Any]]] = {}
165
1745
  for insight in insights:
166
- category = insight['category']
1746
+ category = insight.get("category", "General")
167
1747
  if category not in by_category:
168
1748
  by_category[category] = []
169
1749
  by_category[category].append(insight)
170
-
1750
+
171
1751
  for category, category_insights in by_category.items():
172
1752
  report.write(f"**{category}**:\n")
173
1753
  for insight in category_insights:
174
- report.write(f"- {insight['insight']}: {insight['value']} ")
175
- report.write(f"({insight['impact']})\n")
1754
+ # Handle missing fields gracefully for test compatibility
1755
+ insight_text = insight.get("insight", insight.get("metric", "Unknown"))
1756
+ insight_value = insight.get("value", "N/A")
1757
+ insight_impact = insight.get("impact", "No impact specified")
1758
+ report.write(f"- {insight_text}: {insight_value} ")
1759
+ report.write(f"({insight_impact})\n")
176
1760
  report.write("\n")
177
-
178
- # Add focus insights
1761
+
1762
+ # Add focus insights (handle missing focus_score field gracefully)
179
1763
  if focus_data:
180
- avg_focus = sum(d['focus_score'] for d in focus_data) / len(focus_data)
181
- report.write(f"**Developer Focus**: Average focus score of {avg_focus:.1f}% ")
182
-
183
- if avg_focus > 80:
184
- report.write("indicates strong project concentration\n")
185
- elif avg_focus > 60:
186
- report.write("shows moderate multi-project work\n")
187
- else:
188
- report.write("suggests high context switching\n")
189
-
190
- def _write_pr_analysis(self, report: StringIO, pr_metrics: Dict[str, Any],
191
- prs: List[Dict[str, Any]]) -> None:
1764
+ # Use focus_ratio if focus_score is not available
1765
+ focus_scores = []
1766
+ for d in focus_data:
1767
+ if "focus_score" in d:
1768
+ focus_scores.append(d["focus_score"])
1769
+ elif "focus_ratio" in d:
1770
+ focus_scores.append(d["focus_ratio"] * 100) # Convert ratio to percentage
1771
+ else:
1772
+ focus_scores.append(50) # Default value
1773
+
1774
+ if focus_scores:
1775
+ avg_focus = sum(focus_scores) / len(focus_scores)
1776
+ report.write(f"**Developer Focus**: Average focus score of {avg_focus:.1f}% ")
1777
+
1778
+ if avg_focus > 80:
1779
+ report.write("indicates strong project concentration\n")
1780
+ elif avg_focus > 60:
1781
+ report.write("shows moderate multi-project work\n")
1782
+ else:
1783
+ report.write("suggests high context switching\n")
1784
+
1785
+ def _write_pr_analysis(
1786
+ self, report: StringIO, pr_metrics: dict[str, Any], prs: list[dict[str, Any]]
1787
+ ) -> None:
192
1788
  """Write pull request analysis."""
193
- report.write(f"- **Total PRs Merged**: {pr_metrics['total_prs']}\n")
194
- report.write(f"- **Average PR Size**: {pr_metrics['avg_pr_size']:.0f} lines\n")
195
- report.write(f"- **Average PR Lifetime**: {pr_metrics['avg_pr_lifetime_hours']:.1f} hours\n")
196
- report.write(f"- **Story Point Coverage**: {pr_metrics['story_point_coverage']:.1f}%\n")
197
-
198
- if pr_metrics['total_review_comments'] > 0:
199
- report.write(f"- **Total Review Comments**: {pr_metrics['total_review_comments']}\n")
200
- avg_comments = pr_metrics['total_review_comments'] / pr_metrics['total_prs']
1789
+ report.write(f"- **Total PRs Merged**: {pr_metrics.get('total_prs', 0)}\n")
1790
+ report.write(f"- **Average PR Size**: {pr_metrics.get('avg_pr_size', 0):.0f} lines\n")
1791
+
1792
+ # Handle optional metrics gracefully
1793
+ if "avg_pr_lifetime_hours" in pr_metrics:
1794
+ report.write(
1795
+ f"- **Average PR Lifetime**: {pr_metrics['avg_pr_lifetime_hours']:.1f} hours\n"
1796
+ )
1797
+
1798
+ if "story_point_coverage" in pr_metrics:
1799
+ report.write(f"- **Story Point Coverage**: {pr_metrics['story_point_coverage']:.1f}%\n")
1800
+
1801
+ total_comments = pr_metrics.get("total_review_comments", 0)
1802
+ if total_comments > 0:
1803
+ report.write(f"- **Total Review Comments**: {total_comments}\n")
1804
+ total_prs = pr_metrics.get("total_prs", 1)
1805
+ avg_comments = total_comments / total_prs if total_prs > 0 else 0
201
1806
  report.write(f"- **Average Comments per PR**: {avg_comments:.1f}\n")
202
-
203
- def _write_ticket_tracking(self, report: StringIO, ticket_analysis: Dict[str, Any]) -> None:
204
- """Write ticket tracking analysis."""
205
- report.write("### Platform Usage\n\n")
206
-
207
- total_tickets = sum(ticket_analysis['ticket_summary'].values())
208
-
209
- for platform, count in sorted(ticket_analysis['ticket_summary'].items(),
210
- key=lambda x: x[1], reverse=True):
211
- pct = count / total_tickets * 100 if total_tickets > 0 else 0
212
- report.write(f"- **{platform.title()}**: {count} tickets ({pct:.1f}%)\n")
213
-
214
- report.write(f"\n### Coverage Analysis\n\n")
215
- report.write(f"- **Commits with Tickets**: {ticket_analysis['commits_with_tickets']} ")
216
- report.write(f"of {ticket_analysis['total_commits']} ")
217
- report.write(f"({ticket_analysis['commit_coverage_pct']:.1f}%)\n")
218
-
219
- if ticket_analysis['untracked_commits']:
220
- report.write(f"\n### Significant Untracked Work\n\n")
221
- for commit in ticket_analysis['untracked_commits'][:5]:
222
- report.write(f"- `{commit['hash']}`: {commit['message']} ")
223
- report.write(f"({commit['files_changed']} files)\n")
224
-
225
- def _write_recommendations(self, report: StringIO, insights: List[Dict[str, Any]],
226
- ticket_analysis: Dict[str, Any], focus_data: List[Dict[str, Any]]) -> None:
1807
+
1808
+ def _write_ticket_tracking(
1809
+ self,
1810
+ report: StringIO,
1811
+ ticket_analysis: dict[str, Any],
1812
+ developer_stats: list[dict[str, Any]],
1813
+ ) -> None:
1814
+ """Write ticket tracking analysis with simplified platform usage section."""
1815
+ # Simplified platform usage matching old report format
1816
+ ticket_summary = ticket_analysis.get("ticket_summary", {})
1817
+ total_tickets = sum(ticket_summary.values()) if ticket_summary else 0
1818
+
1819
+ if total_tickets > 0:
1820
+ report.write("### Platform Usage\n\n")
1821
+ for platform, count in sorted(ticket_summary.items(), key=lambda x: x[1], reverse=True):
1822
+ pct = count / total_tickets * 100 if total_tickets > 0 else 0
1823
+ report.write(f"- **{platform.title()}**: {count} tickets ({pct:.1f}%)\n")
1824
+
1825
+ report.write("\n### Coverage Analysis\n\n")
1826
+
1827
+ # Handle missing fields gracefully
1828
+ commits_with_tickets = ticket_analysis.get("commits_with_tickets", 0)
1829
+ total_commits = ticket_analysis.get("total_commits", 0)
1830
+ coverage_pct = ticket_analysis.get("commit_coverage_pct", 0)
1831
+
1832
+ # Debug logging for ticket coverage issues
1833
+ logger.debug(f"Ticket coverage analysis - commits_with_tickets: {commits_with_tickets}, total_commits: {total_commits}, coverage_pct: {coverage_pct}")
1834
+ if commits_with_tickets == 0 and total_commits > 0:
1835
+ logger.warning(f"No commits found with ticket references out of {total_commits} total commits")
1836
+ # Log sample of ticket_analysis structure for debugging
1837
+ if "ticket_summary" in ticket_analysis:
1838
+ logger.debug(f"Ticket summary: {ticket_analysis['ticket_summary']}")
1839
+ if "ticket_platforms" in ticket_analysis:
1840
+ logger.debug(f"Ticket platforms: {ticket_analysis['ticket_platforms']}")
1841
+
1842
+ report.write(f"- **Commits with Tickets**: {commits_with_tickets} ")
1843
+ report.write(f"of {total_commits} ")
1844
+ report.write(f"({coverage_pct:.1f}%)\n")
1845
+
1846
+ # Enhanced untracked commits reporting
1847
+ untracked_commits = ticket_analysis.get("untracked_commits", [])
1848
+ if untracked_commits:
1849
+ self._write_enhanced_untracked_analysis(
1850
+ report, untracked_commits, ticket_analysis, developer_stats
1851
+ )
1852
+
1853
+ def _write_enhanced_untracked_analysis(
1854
+ self,
1855
+ report: StringIO,
1856
+ untracked_commits: list[dict[str, Any]],
1857
+ ticket_analysis: dict[str, Any],
1858
+ developer_stats: list[dict[str, Any]],
1859
+ ) -> None:
1860
+ """Write comprehensive untracked commits analysis.
1861
+
1862
+ WHY: Enhanced untracked analysis provides actionable insights into what
1863
+ types of work are happening outside the tracked process, helping identify
1864
+ process improvements and training opportunities.
1865
+ """
1866
+ report.write("\n### Untracked Work Analysis\n\n")
1867
+
1868
+ total_untracked = len(untracked_commits)
1869
+ total_commits = ticket_analysis.get("total_commits", 0)
1870
+ untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
1871
+
1872
+ report.write(
1873
+ f"**Summary**: {total_untracked} commits ({untracked_pct:.1f}% of total) lack ticket references.\n\n"
1874
+ )
1875
+
1876
+ # Analyze categories
1877
+ categories = {}
1878
+ contributors = {}
1879
+ projects = {}
1880
+
1881
+ for commit in untracked_commits:
1882
+ # Category analysis
1883
+ category = commit.get("category", "other")
1884
+ if category not in categories:
1885
+ categories[category] = {"count": 0, "lines": 0, "examples": []}
1886
+ categories[category]["count"] += 1
1887
+ categories[category]["lines"] += commit.get("lines_changed", 0)
1888
+ if len(categories[category]["examples"]) < 2:
1889
+ categories[category]["examples"].append(
1890
+ {
1891
+ "hash": commit.get("hash", ""),
1892
+ "message": commit.get("message", ""),
1893
+ "author": commit.get("author", ""),
1894
+ }
1895
+ )
1896
+
1897
+ # Contributor analysis
1898
+ author = commit.get("author", "Unknown")
1899
+ if author not in contributors:
1900
+ contributors[author] = {"count": 0, "categories": set()}
1901
+ contributors[author]["count"] += 1
1902
+ contributors[author]["categories"].add(category)
1903
+
1904
+ # Project analysis
1905
+ project = commit.get("project_key", "UNKNOWN")
1906
+ if project not in projects:
1907
+ projects[project] = {"count": 0, "categories": set()}
1908
+ projects[project]["count"] += 1
1909
+ projects[project]["categories"].add(category)
1910
+
1911
+ # Write category breakdown
1912
+ if categories:
1913
+ report.write("#### Work Categories\n\n")
1914
+ sorted_categories = sorted(
1915
+ categories.items(), key=lambda x: x[1]["count"], reverse=True
1916
+ )
1917
+
1918
+ for category, data in sorted_categories[:8]: # Show top 8 categories
1919
+ pct = (data["count"] / total_untracked) * 100
1920
+ avg_size = data["lines"] / data["count"] if data["count"] > 0 else 0
1921
+
1922
+ # Categorize the impact
1923
+ if category in ["style", "documentation", "maintenance"]:
1924
+ impact_note = " *(acceptable untracked)*"
1925
+ elif category in ["feature", "bug_fix"]:
1926
+ impact_note = " *(should be tracked)*"
1927
+ else:
1928
+ impact_note = ""
1929
+
1930
+ report.write(f"- **{category.replace('_', ' ').title()}**: ")
1931
+ report.write(f"{data['count']} commits ({pct:.1f}%), ")
1932
+ report.write(f"avg {avg_size:.0f} lines{impact_note}\n")
1933
+
1934
+ # Add examples
1935
+ if data["examples"]:
1936
+ for example in data["examples"]:
1937
+ report.write(f" - `{example['hash']}`: {example['message'][:80]}...\n")
1938
+ report.write("\n")
1939
+
1940
+ # Write top contributors to untracked work with enhanced percentage analysis
1941
+ if contributors:
1942
+ report.write("#### Top Contributors (Untracked Work)\n\n")
1943
+
1944
+ # Create developer lookup for total commits
1945
+ dev_lookup = {}
1946
+ for dev in developer_stats:
1947
+ # Map canonical_id to developer data
1948
+ dev_lookup[dev["canonical_id"]] = dev
1949
+ # Also map primary name and primary email as fallbacks
1950
+ dev_lookup[dev["primary_name"]] = dev
1951
+ dev_lookup[dev["primary_email"]] = dev
1952
+
1953
+ sorted_contributors = sorted(
1954
+ contributors.items(), key=lambda x: x[1]["count"], reverse=True
1955
+ )
1956
+
1957
+ for author, data in sorted_contributors[:5]: # Show top 5
1958
+ untracked_count = data["count"]
1959
+ pct_of_untracked = (untracked_count / total_untracked) * 100
1960
+
1961
+ # Find developer's total commits to calculate percentage of their work that's untracked
1962
+ dev_data = dev_lookup.get(author)
1963
+ if dev_data:
1964
+ total_dev_commits = dev_data["total_commits"]
1965
+ pct_of_dev_work = (
1966
+ (untracked_count / total_dev_commits) * 100 if total_dev_commits > 0 else 0
1967
+ )
1968
+ dev_context = f", {pct_of_dev_work:.1f}% of their work"
1969
+ else:
1970
+ dev_context = ""
1971
+
1972
+ categories_list = list(data["categories"])
1973
+ categories_str = ", ".join(categories_list[:3]) # Show up to 3 categories
1974
+ if len(categories_list) > 3:
1975
+ categories_str += f" (+{len(categories_list) - 3} more)"
1976
+
1977
+ report.write(f"- **{author}**: {untracked_count} commits ")
1978
+ report.write(f"({pct_of_untracked:.1f}% of untracked{dev_context}) - ")
1979
+ report.write(f"*{categories_str}*\n")
1980
+ report.write("\n")
1981
+
1982
+ # Write project breakdown
1983
+ if len(projects) > 1:
1984
+ report.write("#### Projects with Untracked Work\n\n")
1985
+ sorted_projects = sorted(projects.items(), key=lambda x: x[1]["count"], reverse=True)
1986
+
1987
+ for project, data in sorted_projects:
1988
+ pct = (data["count"] / total_untracked) * 100
1989
+ categories_list = list(data["categories"])
1990
+ report.write(f"- **{project}**: {data['count']} commits ({pct:.1f}%)\n")
1991
+ report.write("\n")
1992
+
1993
+ # Write recent examples (configurable limit, default 15 for better visibility)
1994
+ if untracked_commits:
1995
+ report.write("#### Recent Untracked Commits\n\n")
1996
+
1997
+ # Show configurable number of recent commits (increased from 10 to 15)
1998
+ max_recent_commits = 15
1999
+
2000
+ # Safe timestamp sorting that handles mixed timezone types
2001
+ def safe_timestamp_key(commit):
2002
+ ts = commit.get("timestamp")
2003
+ if ts is None:
2004
+ return datetime.min.replace(tzinfo=timezone.utc)
2005
+ # If it's a datetime object, handle timezone issues
2006
+ if hasattr(ts, "tzinfo"):
2007
+ # Make timezone-naive datetime UTC-aware for consistent comparison
2008
+ if ts.tzinfo is None:
2009
+ ts = ts.replace(tzinfo=timezone.utc)
2010
+ return ts
2011
+ # If it's a string or other type, try to parse or use as-is
2012
+ return ts
2013
+
2014
+ recent_commits = sorted(
2015
+ untracked_commits, key=safe_timestamp_key, reverse=True
2016
+ )[:max_recent_commits]
2017
+
2018
+ if len(untracked_commits) > max_recent_commits:
2019
+ report.write(
2020
+ f"*Showing {max_recent_commits} most recent of {len(untracked_commits)} untracked commits*\n\n"
2021
+ )
2022
+
2023
+ for commit in recent_commits:
2024
+ # Format date
2025
+ timestamp = commit.get("timestamp")
2026
+ if timestamp and hasattr(timestamp, "strftime"):
2027
+ date_str = timestamp.strftime("%Y-%m-%d")
2028
+ else:
2029
+ date_str = "unknown date"
2030
+
2031
+ report.write(f"- `{commit.get('hash', '')}` ({date_str}) ")
2032
+ report.write(f"**{commit.get('author', 'Unknown')}** ")
2033
+ report.write(f"[{commit.get('category', 'other')}]: ")
2034
+ report.write(f"{commit.get('message', '')[:100]}")
2035
+ if len(commit.get("message", "")) > 100:
2036
+ report.write("...")
2037
+ report.write(f" *({commit.get('files_changed', 0)} files, ")
2038
+ report.write(f"{commit.get('lines_changed', 0)} lines)*\n")
2039
+ report.write("\n")
2040
+
2041
+ # Add recommendations based on untracked analysis
2042
+ self._write_untracked_recommendations(
2043
+ report, categories, contributors, total_untracked, total_commits
2044
+ )
2045
+
2046
+ def _write_untracked_recommendations(
2047
+ self,
2048
+ report: StringIO,
2049
+ categories: dict[str, Any],
2050
+ contributors: dict[str, Any],
2051
+ total_untracked: int,
2052
+ total_commits: int,
2053
+ ) -> None:
2054
+ """Write specific recommendations based on untracked commit analysis."""
2055
+ report.write("#### Recommendations for Untracked Work\n\n")
2056
+
2057
+ recommendations = []
2058
+
2059
+ # Category-based recommendations
2060
+ feature_count = categories.get("feature", {}).get("count", 0)
2061
+ bug_fix_count = categories.get("bug_fix", {}).get("count", 0)
2062
+ maintenance_count = categories.get("maintenance", {}).get("count", 0)
2063
+ docs_count = categories.get("documentation", {}).get("count", 0)
2064
+ style_count = categories.get("style", {}).get("count", 0)
2065
+
2066
+ if feature_count > total_untracked * 0.2:
2067
+ recommendations.append(
2068
+ "🎫 **Require tickets for features**: Many feature developments lack ticket references. "
2069
+ "Consider enforcing ticket creation for new functionality."
2070
+ )
2071
+
2072
+ if bug_fix_count > total_untracked * 0.15:
2073
+ recommendations.append(
2074
+ "🐛 **Track bug fixes**: Bug fixes should be linked to issue tickets for better "
2075
+ "visibility and follow-up."
2076
+ )
2077
+
2078
+ # Positive recognition for appropriate untracked work
2079
+ acceptable_count = maintenance_count + docs_count + style_count
2080
+ if acceptable_count > total_untracked * 0.6:
2081
+ recommendations.append(
2082
+ "✅ **Good process balance**: Most untracked work consists of maintenance, "
2083
+ "documentation, and style improvements - this is acceptable and shows good "
2084
+ "development hygiene."
2085
+ )
2086
+
2087
+ # Coverage recommendations
2088
+ untracked_pct = (total_untracked / total_commits * 100) if total_commits > 0 else 0
2089
+ if untracked_pct > 50:
2090
+ recommendations.append(
2091
+ "📈 **Improve overall tracking**: Over 50% of commits lack ticket references. "
2092
+ "Consider team training on linking commits to work items."
2093
+ )
2094
+ elif untracked_pct < 20:
2095
+ recommendations.append(
2096
+ "🎯 **Excellent tracking**: Less than 20% of commits are untracked - "
2097
+ "the team shows strong process adherence."
2098
+ )
2099
+
2100
+ # Developer-specific recommendations
2101
+ if len(contributors) > 1:
2102
+ max_contributor_pct = max(
2103
+ (data["count"] / total_untracked * 100) for data in contributors.values()
2104
+ )
2105
+ if max_contributor_pct > 40:
2106
+ recommendations.append(
2107
+ "👥 **Targeted training**: Some developers need additional guidance on "
2108
+ "ticket referencing practices. Consider peer mentoring or process review."
2109
+ )
2110
+
2111
+ if not recommendations:
2112
+ recommendations.append(
2113
+ "✅ **Balanced approach**: Untracked work appears well-balanced between "
2114
+ "necessary maintenance and tracked development work."
2115
+ )
2116
+
2117
+ for rec in recommendations:
2118
+ report.write(f"{rec}\n\n")
2119
+
2120
+ def _write_recommendations(
2121
+ self,
2122
+ report: StringIO,
2123
+ insights: list[dict[str, Any]],
2124
+ ticket_analysis: dict[str, Any],
2125
+ focus_data: list[dict[str, Any]],
2126
+ ) -> None:
227
2127
  """Write recommendations based on analysis."""
228
2128
  recommendations = []
229
-
2129
+
230
2130
  # Ticket coverage recommendations
231
- coverage = ticket_analysis['commit_coverage_pct']
2131
+ coverage = ticket_analysis["commit_coverage_pct"]
232
2132
  if coverage < 50:
233
2133
  recommendations.append(
234
2134
  "🎫 **Improve ticket tracking**: Current coverage is below 50%. "
235
2135
  "Consider enforcing ticket references in commit messages or PR descriptions."
236
2136
  )
237
-
238
- # Work distribution recommendations
2137
+
2138
+ # Work distribution recommendations (handle missing insight field gracefully)
239
2139
  for insight in insights:
240
- if insight['insight'] == 'Work distribution':
241
- if 'unbalanced' in insight['value'].lower():
2140
+ insight_text = insight.get("insight", insight.get("metric", ""))
2141
+ if insight_text == "Work distribution":
2142
+ insight_value = str(insight.get("value", ""))
2143
+ if "unbalanced" in insight_value.lower():
242
2144
  recommendations.append(
243
2145
  "⚖️ **Balance workload**: Work is concentrated among few developers. "
244
2146
  "Consider distributing tasks more evenly or adding team members."
245
2147
  )
246
-
247
- # Focus recommendations
2148
+
2149
+ # Focus recommendations (handle missing focus_score field gracefully)
248
2150
  if focus_data:
249
- low_focus = [d for d in focus_data if d['focus_score'] < 50]
2151
+ low_focus = []
2152
+ for d in focus_data:
2153
+ focus_score = d.get("focus_score", d.get("focus_ratio", 0.5) * 100)
2154
+ if focus_score < 50:
2155
+ low_focus.append(d)
250
2156
  if len(low_focus) > len(focus_data) / 2:
251
2157
  recommendations.append(
252
2158
  "🎯 **Reduce context switching**: Many developers work across multiple projects. "
253
2159
  "Consider more focused project assignments to improve efficiency."
254
2160
  )
255
-
256
- # Branching strategy
2161
+
2162
+ # Branching strategy (handle missing insight field gracefully)
257
2163
  for insight in insights:
258
- if insight['insight'] == 'Branching strategy' and 'Heavy' in insight['value']:
2164
+ insight_text = insight.get("insight", insight.get("metric", ""))
2165
+ insight_value = str(insight.get("value", ""))
2166
+ if insight_text == "Branching strategy" and "Heavy" in insight_value:
259
2167
  recommendations.append(
260
2168
  "🌿 **Review branching strategy**: High percentage of merge commits suggests "
261
2169
  "complex branching. Consider simplifying the Git workflow."
262
2170
  )
263
-
2171
+
264
2172
  if recommendations:
265
2173
  for rec in recommendations:
266
2174
  report.write(f"{rec}\n\n")
267
2175
  else:
268
2176
  report.write("✅ The team shows healthy development patterns. ")
269
- report.write("Continue current practices while monitoring for changes.\n")
2177
+ report.write("Continue current practices while monitoring for changes.\n")
2178
+
2179
+ def _write_commit_classification_analysis(
2180
+ self, report: StringIO, ticket_analysis: dict[str, Any]
2181
+ ) -> None:
2182
+ """Write commit classification analysis section.
2183
+
2184
+ WHY: This section provides insights into automated commit categorization
2185
+ quality and distribution, helping teams understand their development patterns
2186
+ and the effectiveness of ML-based categorization.
2187
+
2188
+ Args:
2189
+ report: StringIO buffer to write to
2190
+ ticket_analysis: Ticket analysis data containing ML classification results
2191
+ """
2192
+ ml_analysis = ticket_analysis.get("ml_analysis", {})
2193
+ if not ml_analysis.get("enabled", False):
2194
+ return
2195
+
2196
+ report.write("The team's commit patterns reveal the following automated classification insights:\n\n")
2197
+
2198
+ # Overall classification statistics
2199
+ total_ml_predictions = ml_analysis.get("total_ml_predictions", 0)
2200
+ total_rule_predictions = ml_analysis.get("total_rule_predictions", 0)
2201
+ total_cached_predictions = ml_analysis.get("total_cached_predictions", 0)
2202
+ total_predictions = total_ml_predictions + total_rule_predictions + total_cached_predictions
2203
+
2204
+ if total_predictions > 0:
2205
+ report.write("### Classification Method Distribution\n\n")
2206
+
2207
+ # Calculate percentages
2208
+ ml_pct = (total_ml_predictions / total_predictions) * 100
2209
+ rules_pct = (total_rule_predictions / total_predictions) * 100
2210
+ cached_pct = (total_cached_predictions / total_predictions) * 100
2211
+
2212
+ report.write(f"- **ML-based Classifications**: {total_ml_predictions} commits ({ml_pct:.1f}%)\n")
2213
+ report.write(f"- **Rule-based Classifications**: {total_rule_predictions} commits ({rules_pct:.1f}%)\n")
2214
+ report.write(f"- **Cached Results**: {total_cached_predictions} commits ({cached_pct:.1f}%)\n\n")
2215
+
2216
+ # Classification confidence analysis
2217
+ avg_confidence = ml_analysis.get("avg_confidence", 0)
2218
+ confidence_dist = ml_analysis.get("confidence_distribution", {})
2219
+
2220
+ if confidence_dist:
2221
+ report.write("### Classification Confidence\n\n")
2222
+ report.write(f"- **Average Confidence**: {avg_confidence:.1%} across all classifications\n")
2223
+
2224
+ high_conf = confidence_dist.get("high", 0)
2225
+ medium_conf = confidence_dist.get("medium", 0)
2226
+ low_conf = confidence_dist.get("low", 0)
2227
+ total_conf_items = high_conf + medium_conf + low_conf
2228
+
2229
+ if total_conf_items > 0:
2230
+ high_pct = (high_conf / total_conf_items) * 100
2231
+ medium_pct = (medium_conf / total_conf_items) * 100
2232
+ low_pct = (low_conf / total_conf_items) * 100
2233
+
2234
+ report.write(f"- **High Confidence** (≥80%): {high_conf} commits ({high_pct:.1f}%)\n")
2235
+ report.write(f"- **Medium Confidence** (60-79%): {medium_conf} commits ({medium_pct:.1f}%)\n")
2236
+ report.write(f"- **Low Confidence** (<60%): {low_conf} commits ({low_pct:.1f}%)\n\n")
2237
+
2238
+ # Category confidence breakdown
2239
+ category_confidence = ml_analysis.get("category_confidence", {})
2240
+ if category_confidence:
2241
+ report.write("### Classification Categories\n\n")
2242
+
2243
+ # Sort categories by count (descending)
2244
+ sorted_categories = sorted(
2245
+ category_confidence.items(),
2246
+ key=lambda x: x[1].get("count", 0),
2247
+ reverse=True
2248
+ )
2249
+
2250
+ # Calculate total commits for percentages
2251
+ total_categorized = sum(data.get("count", 0) for data in category_confidence.values())
2252
+
2253
+ for category, data in sorted_categories:
2254
+ count = data.get("count", 0)
2255
+ avg_conf = data.get("avg", 0)
2256
+
2257
+ if count > 0:
2258
+ category_pct = (count / total_categorized) * 100
2259
+ category_display = category.replace("_", " ").title()
2260
+ report.write(f"- **{category_display}**: {count} commits ({category_pct:.1f}%, avg confidence: {avg_conf:.1%})\n")
2261
+
2262
+ report.write("\n")
2263
+
2264
+ # Performance metrics
2265
+ processing_stats = ml_analysis.get("processing_time_stats", {})
2266
+ if processing_stats.get("total_ms", 0) > 0:
2267
+ avg_ms = processing_stats.get("avg_ms", 0)
2268
+ total_ms = processing_stats.get("total_ms", 0)
2269
+
2270
+ report.write("### Processing Performance\n\n")
2271
+ report.write(f"- **Average Processing Time**: {avg_ms:.1f}ms per commit\n")
2272
+ report.write(f"- **Total Processing Time**: {total_ms:.0f}ms ({total_ms/1000:.1f} seconds)\n\n")
2273
+
2274
+
2275
+ else:
2276
+ report.write("No classification data available for analysis.\n\n")
2277
+
2278
+ def _write_pm_insights(self, report: StringIO, pm_data: dict[str, Any]) -> None:
2279
+ """Write PM platform integration insights.
2280
+
2281
+ WHY: PM platform integration provides valuable insights into work item
2282
+ tracking, story point accuracy, and development velocity that complement
2283
+ Git-based analytics. This section highlights the value of PM integration.
2284
+ """
2285
+ metrics = pm_data.get("metrics", {})
2286
+
2287
+ # Platform overview
2288
+ platform_coverage = metrics.get("platform_coverage", {})
2289
+ total_issues = metrics.get("total_pm_issues", 0)
2290
+ correlations = len(pm_data.get("correlations", []))
2291
+
2292
+ report.write(f"The team has integrated **{len(platform_coverage)} PM platforms** ")
2293
+ report.write(
2294
+ f"tracking **{total_issues:,} issues** with **{correlations} commit correlations**.\n\n"
2295
+ )
2296
+
2297
+ # Story point analysis
2298
+ story_analysis = metrics.get("story_point_analysis", {})
2299
+ pm_story_points = story_analysis.get("pm_total_story_points", 0)
2300
+ git_story_points = story_analysis.get("git_total_story_points", 0)
2301
+ coverage_pct = story_analysis.get("story_point_coverage_pct", 0)
2302
+
2303
+ if pm_story_points > 0:
2304
+ report.write("### Story Point Tracking\n\n")
2305
+ report.write(f"- **PM Platform Story Points**: {pm_story_points:,}\n")
2306
+ report.write(f"- **Git Extracted Story Points**: {git_story_points:,}\n")
2307
+ report.write(
2308
+ f"- **Story Point Coverage**: {coverage_pct:.1f}% of issues have story points\n"
2309
+ )
2310
+
2311
+ if git_story_points > 0:
2312
+ accuracy = min(git_story_points / pm_story_points, 1.0) * 100
2313
+ report.write(
2314
+ f"- **Extraction Accuracy**: {accuracy:.1f}% of PM story points found in Git\n"
2315
+ )
2316
+ report.write("\n")
2317
+
2318
+ # Issue type distribution
2319
+ issue_types = metrics.get("issue_type_distribution", {})
2320
+ if issue_types:
2321
+ report.write("### Work Item Types\n\n")
2322
+ sorted_types = sorted(issue_types.items(), key=lambda x: x[1], reverse=True)
2323
+ total_typed_issues = sum(issue_types.values())
2324
+
2325
+ for issue_type, count in sorted_types[:5]: # Top 5 types
2326
+ pct = (count / total_typed_issues * 100) if total_typed_issues > 0 else 0
2327
+ report.write(f"- **{issue_type.title()}**: {count} issues ({pct:.1f}%)\n")
2328
+ report.write("\n")
2329
+
2330
+ # Platform-specific insights
2331
+ if platform_coverage:
2332
+ report.write("### Platform Coverage\n\n")
2333
+ for platform, coverage_data in platform_coverage.items():
2334
+ platform_issues = coverage_data.get("total_issues", 0)
2335
+ linked_issues = coverage_data.get("linked_issues", 0)
2336
+ coverage_percentage = coverage_data.get("coverage_percentage", 0)
2337
+
2338
+ report.write(f"**{platform.title()}**: ")
2339
+ report.write(f"{platform_issues} issues, {linked_issues} linked to commits ")
2340
+ report.write(f"({coverage_percentage:.1f}% coverage)\n")
2341
+ report.write("\n")
2342
+
2343
+ # Correlation quality
2344
+ correlation_quality = metrics.get("correlation_quality", {})
2345
+ if correlation_quality.get("total_correlations", 0) > 0:
2346
+ avg_confidence = correlation_quality.get("average_confidence", 0)
2347
+ high_confidence = correlation_quality.get("high_confidence_correlations", 0)
2348
+ correlation_methods = correlation_quality.get("correlation_methods", {})
2349
+
2350
+ report.write("### Correlation Quality\n\n")
2351
+ report.write(f"- **Average Confidence**: {avg_confidence:.2f} (0.0-1.0 scale)\n")
2352
+ report.write(f"- **High Confidence Matches**: {high_confidence} correlations\n")
2353
+
2354
+ if correlation_methods:
2355
+ report.write("- **Methods Used**: ")
2356
+ method_list = [
2357
+ f"{method.replace('_', ' ').title()} ({count})"
2358
+ for method, count in correlation_methods.items()
2359
+ ]
2360
+ report.write(", ".join(method_list))
2361
+ report.write("\n")
2362
+ report.write("\n")
2363
+
2364
+ # Key insights
2365
+ report.write("### Key Insights\n\n")
2366
+
2367
+ if coverage_pct > 80:
2368
+ report.write(
2369
+ "✅ **Excellent story point coverage** - Most issues have effort estimates\n"
2370
+ )
2371
+ elif coverage_pct > 50:
2372
+ report.write(
2373
+ "⚠️ **Moderate story point coverage** - Consider improving estimation practices\n"
2374
+ )
2375
+ else:
2376
+ report.write(
2377
+ "❌ **Low story point coverage** - Story point tracking needs improvement\n"
2378
+ )
2379
+
2380
+ if correlations > total_issues * 0.5:
2381
+ report.write(
2382
+ "✅ **Strong commit-issue correlation** - Good traceability between work items and code\n"
2383
+ )
2384
+ elif correlations > total_issues * 0.2:
2385
+ report.write(
2386
+ "⚠️ **Moderate commit-issue correlation** - Some work items lack code links\n"
2387
+ )
2388
+ else:
2389
+ report.write(
2390
+ "❌ **Weak commit-issue correlation** - Improve ticket referencing in commits\n"
2391
+ )
2392
+
2393
+ if len(platform_coverage) > 1:
2394
+ report.write(
2395
+ "📊 **Multi-platform integration** - Comprehensive work item tracking across tools\n"
2396
+ )
2397
+
2398
+ report.write("\n")