gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -1,39 +1,132 @@
1
1
  """Ticket reference extraction for multiple platforms."""
2
+
3
+ import logging
2
4
  import re
3
5
  from collections import defaultdict
4
- from typing import Any, Dict, List
6
+ from datetime import timezone
7
+ from typing import Any, Optional, cast
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def filter_git_artifacts(message: str) -> str:
13
+ """Filter out git artifacts from commit messages before classification.
14
+
15
+ WHY: Git-generated content like Co-authored-by lines, Signed-off-by lines,
16
+ and other metadata should not influence commit classification. This function
17
+ removes such artifacts to provide cleaner input for categorization.
18
+
19
+ Args:
20
+ message: Raw commit message that may contain git artifacts
21
+
22
+ Returns:
23
+ Cleaned commit message with git artifacts removed
24
+ """
25
+ if not message or not message.strip():
26
+ return ""
27
+
28
+ # Remove Co-authored-by lines (including standalone ones)
29
+ message = re.sub(r"^Co-authored-by:.*$", "", message, flags=re.MULTILINE | re.IGNORECASE)
30
+
31
+ # Remove Signed-off-by lines
32
+ message = re.sub(r"^Signed-off-by:.*$", "", message, flags=re.MULTILINE | re.IGNORECASE)
33
+
34
+ # Remove Reviewed-by lines (common in some workflows)
35
+ message = re.sub(r"^Reviewed-by:.*$", "", message, flags=re.MULTILINE | re.IGNORECASE)
36
+
37
+ # Remove Tested-by lines
38
+ message = re.sub(r"^Tested-by:.*$", "", message, flags=re.MULTILINE | re.IGNORECASE)
39
+
40
+ # Remove merge artifact lines (dashes, stars, or other separator patterns)
41
+ message = re.sub(r"^-+$", "", message, flags=re.MULTILINE)
42
+ message = re.sub(r"^\*\s*$", "", message, flags=re.MULTILINE)
43
+ message = re.sub(r"^#+$", "", message, flags=re.MULTILINE)
44
+
45
+ # Remove GitHub Copilot co-authorship lines
46
+ message = re.sub(
47
+ r"^Co-authored-by:.*[Cc]opilot.*$", "", message, flags=re.MULTILINE | re.IGNORECASE
48
+ )
49
+
50
+ # Remove common merge commit artifacts
51
+ message = re.sub(
52
+ r"^\s*Merge\s+(branch|pull request).*$", "", message, flags=re.MULTILINE | re.IGNORECASE
53
+ )
54
+ message = re.sub(
55
+ r"^\s*(into|from)\s+[a-zA-Z0-9/_-]+$", "", message, flags=re.MULTILINE | re.IGNORECASE
56
+ )
57
+
58
+ # Clean up whitespace while preserving meaningful blank lines
59
+ lines = message.split("\n")
60
+ cleaned_lines = []
61
+
62
+ for i, line in enumerate(lines):
63
+ stripped = line.strip()
64
+ if stripped: # Non-empty line
65
+ cleaned_lines.append(stripped)
66
+ elif (
67
+ i > 0
68
+ and i < len(lines) - 1
69
+ and any(line.strip() for line in lines[:i])
70
+ and any(line.strip() for line in lines[i + 1 :])
71
+ ): # Preserve blank lines in middle if there's content both before and after
72
+ cleaned_lines.append("")
73
+
74
+ cleaned = "\n".join(cleaned_lines)
75
+
76
+ # Handle edge cases - empty or dots-only messages
77
+ if not cleaned:
78
+ return ""
79
+
80
+ # Check if message is only dots (with any whitespace)
81
+ dots_only = re.sub(r"[.\s\n]+", "", cleaned) == ""
82
+ if dots_only and "..." in cleaned:
83
+ return ""
84
+
85
+ return cleaned.strip()
5
86
 
6
87
 
7
88
  class TicketExtractor:
8
- """Extract ticket references from various issue tracking systems."""
9
-
10
- def __init__(self, allowed_platforms=None):
89
+ """Extract ticket references from various issue tracking systems.
90
+
91
+ Enhanced to support detailed untracked commit analysis including:
92
+ - Commit categorization (maintenance, bug fix, refactor, docs, etc.)
93
+ - Configurable file change thresholds
94
+ - Extended untracked commit metadata collection
95
+ """
96
+
97
+ def __init__(
98
+ self, allowed_platforms: Optional[list[str]] = None, untracked_file_threshold: int = 1
99
+ ) -> None:
11
100
  """Initialize with patterns for different platforms.
12
-
101
+
13
102
  Args:
14
103
  allowed_platforms: List of platforms to extract tickets from.
15
104
  If None, all platforms are allowed.
105
+ untracked_file_threshold: Minimum number of files changed to consider
106
+ a commit as 'significant' for untracked analysis.
107
+ Default is 1 (all commits), previously was 3.
16
108
  """
17
109
  self.allowed_platforms = allowed_platforms
110
+ self.untracked_file_threshold = untracked_file_threshold
18
111
  self.patterns = {
19
- 'jira': [
20
- r'([A-Z]{2,10}-\d+)', # Standard JIRA format: PROJ-123
112
+ "jira": [
113
+ r"([A-Z]{2,10}-\d+)", # Standard JIRA format: PROJ-123
21
114
  ],
22
- 'github': [
23
- r'#(\d+)', # GitHub issues: #123
24
- r'GH-(\d+)', # Alternative format: GH-123
25
- r'(?:fix|fixes|fixed|close|closes|closed|resolve|resolves|resolved)\s+#(\d+)',
115
+ "github": [
116
+ r"#(\d+)", # GitHub issues: #123
117
+ r"GH-(\d+)", # Alternative format: GH-123
118
+ r"(?:fix|fixes|fixed|close|closes|closed|resolve|resolves|resolved)\s+#(\d+)",
26
119
  ],
27
- 'clickup': [
28
- r'CU-([a-z0-9]+)', # ClickUp: CU-abc123
29
- r'#([a-z0-9]{6,})', # ClickUp short format
120
+ "clickup": [
121
+ r"CU-([a-z0-9]+)", # ClickUp: CU-abc123
122
+ r"#([a-z0-9]{6,})", # ClickUp short format
123
+ ],
124
+ "linear": [
125
+ r"([A-Z]{2,5}-\d+)", # Linear: ENG-123, similar to JIRA
126
+ r"LIN-(\d+)", # Alternative: LIN-123
30
127
  ],
31
- 'linear': [
32
- r'([A-Z]{2,5}-\d+)', # Linear: ENG-123, similar to JIRA
33
- r'LIN-(\d+)', # Alternative: LIN-123
34
- ]
35
128
  }
36
-
129
+
37
130
  # Compile patterns only for allowed platforms
38
131
  self.compiled_patterns = {}
39
132
  for platform, patterns in self.patterns.items():
@@ -41,126 +134,785 @@ class TicketExtractor:
41
134
  if self.allowed_platforms and platform not in self.allowed_platforms:
42
135
  continue
43
136
  self.compiled_patterns[platform] = [
44
- re.compile(pattern, re.IGNORECASE if platform != 'jira' else 0)
137
+ re.compile(pattern, re.IGNORECASE if platform != "jira" else 0)
45
138
  for pattern in patterns
46
139
  ]
47
-
48
- def extract_from_text(self, text: str) -> List[Dict[str, str]]:
140
+
141
+ # Commit categorization patterns
142
+ self.category_patterns = {
143
+ "bug_fix": [
144
+ r"^fix:",
145
+ r"\b(fix|bug|error|issue|problem|crash|exception|failure)\b",
146
+ r"\b(resolve|solve|repair|correct|corrected|address)\b",
147
+ r"\b(hotfix|bugfix|patch|quickfix)\b",
148
+ r"\b(broken|failing|failed|fault|defect)\b",
149
+ r"\b(prevent|stop|avoid)\s+(error|bug|issue|crash)\b",
150
+ r"\b(fixes|resolves|solves)\s+(bug|issue|error|problem)\b",
151
+ r"\b(beacon|beacons)\b.*\b(fix|fixes|issue|problem)\b",
152
+ r"\bmissing\s+(space|field|data|property)\b",
153
+ r"\b(counting|allowing|episodes)\s+(was|not|issue)\b",
154
+ r"^fixes\s+\b(beacon|beacons|combo|issue|problem)\b",
155
+ ],
156
+ "feature": [
157
+ r"^(feat|feature):",
158
+ r"\b(add|new|feature|implement|create|build)\b",
159
+ r"\b(introduce|enhance|extend|expand)\b",
160
+ r"\b(functionality|capability|support|enable)\b",
161
+ r"\b(initial|first)\s+(implementation|version)\b",
162
+ r"\b(addition|initialize|prepare)\b",
163
+ r"added?\s+(new|feature|functionality|capability)\b",
164
+ r"added?\s+(column|field|property|thumbnail)\b",
165
+ r"\b(homilists?|homily|homilies)\b",
166
+ r"\b(sticky|column)\s+(feature|functionality)\b",
167
+ r"adds?\s+(data|localization|beacon)\b",
168
+ r"\b(episode|episodes|audio|video)\s+(feature|support|implementation)\b",
169
+ r"\b(beacon)\s+(implementation|for|tracking)\b",
170
+ r"\b(localization)\s+(data|structure)\b",
171
+ ],
172
+ "refactor": [
173
+ r"\b(refactor|restructure|reorganize|cleanup|clean up)\b",
174
+ r"\b(optimize|improve|simplify|streamline)\b",
175
+ r"\b(rename|move|extract|consolidate)\b",
176
+ r"\b(modernize|redesign|rework|rewrite)\b",
177
+ r"\b(code\s+quality|tech\s+debt|legacy)\b",
178
+ r"\b(refine|ensure|replace)\b",
179
+ r"improves?\s+(performance|efficiency|structure)\b",
180
+ r"improves?\s+(combo|box|focus|behavior)\b",
181
+ r"using\s+\w+\s+instead\s+of\s+\w+\b", # "using X instead of Y" pattern
182
+ ],
183
+ "documentation": [
184
+ r"\b(doc|docs|documentation|readme|comment|comments)\b",
185
+ r"\b(javadoc|jsdoc|docstring|sphinx)\b",
186
+ r"\b(manual|guide|tutorial|how-to|howto)\b",
187
+ r"\b(explain|clarify|describe)\b",
188
+ r"\b(changelog|notes|examples)\b",
189
+ ],
190
+ "deployment": [
191
+ r"^deploy:",
192
+ r"\b(deploy|deployment|publish|rollout)\b",
193
+ r"\b(production|prod|staging|live)\b",
194
+ r"\b(go\s+live|launch|ship)\b",
195
+ r"\b(promote|migration|migrate)\b",
196
+ r"\brelease\s+(v\d+\.\d+|\d+\.\d+\.\d+)?\s+(to|on)\s+(production|staging|live)\b",
197
+ ],
198
+ "configuration": [
199
+ r"\b(config|configure|configuration|setup|settings)\b",
200
+ r"\b(env|environment|parameter|option)\b",
201
+ r"\b(property|properties|yaml|json|xml)\b",
202
+ r"\b(database\s+config|db\s+config|connection)\b",
203
+ r"\.env|\.config|\.yaml|\.json",
204
+ r"\b(setup|configure)\s+(new|for)\b",
205
+ r"\b(user|role|permission|access)\s+(change|update|configuration)\b",
206
+ r"\b(api|service|system)\s+(config|configuration|setup)\b",
207
+ r"\b(role|permission|access)\s+(update|change|management)\b",
208
+ r"\b(schema|model)\s+(update|change|addition)\b",
209
+ r"changing\s+(user|role|permission)\s+(roles?|settings?)\b",
210
+ r"\b(schema)\b(?!.*\b(test|spec)\b)", # Schema but not test schemas
211
+ r"\bsanity\s+schema\b",
212
+ r"changing\s+(some)?\s*(user|role)\s+(roles?|permissions?)\b",
213
+ ],
214
+ "content": [
215
+ r"\b(content|copy|text|wording|messaging)\b",
216
+ r"\b(translation|i18n|l10n|locale|localize)\b",
217
+ r"\b(language|multilingual|international)\b",
218
+ r"\b(strings|labels|captions|titles)\b",
219
+ r"\b(typo|spelling|grammar|proofreading)\b",
220
+ r"\b(typo|spelling)\s+(in|on|for)\b",
221
+ r"\b(spanish|translations?)\b",
222
+ r"\b(blast|banner|video|media)\s+(content|update)\b",
223
+ r"added?\s+(spanish|translation|text|copy|label)\b",
224
+ r"\b(label|message)\s+(change|update|fix)\b",
225
+ ],
226
+ "ui": [
227
+ r"\b(ui|ux|design|layout|styling|visual)\b",
228
+ r"\b(css|scss|sass|less|style)\b",
229
+ r"\b(responsive|mobile|desktop|tablet)\b",
230
+ r"\b(theme|color|font|icon|image)\b",
231
+ r"\b(component|widget|element|button|form)\b",
232
+ r"\b(frontend|front-end|client-side)\b",
233
+ r"\b(sticky|column)\b(?!.*\b(database|table)\b)", # UI sticky, not database
234
+ r"\b(focus|behavior)\b.*\b(combo|box)\b",
235
+ ],
236
+ "infrastructure": [
237
+ r"\b(infra|infrastructure|aws|azure|gcp|cloud)\b",
238
+ r"\b(docker|k8s|kubernetes|container|pod)\b",
239
+ r"\b(terraform|ansible|chef|puppet)\b",
240
+ r"\b(server|hosting|network|load\s+balancer)\b",
241
+ r"\b(monitoring|logging|alerting|metrics)\b",
242
+ ],
243
+ "security": [
244
+ r"\b(security|vulnerability|cve|exploit)\b",
245
+ r"\b(auth|authentication|authorization|permission)\b",
246
+ r"\b(ssl|tls|https|certificate|cert)\b",
247
+ r"\b(encrypt|decrypt|hash|token|oauth)\b",
248
+ r"\b(access\s+control|rbac|cors|xss|csrf)\b",
249
+ r"\b(secure|safety|protect|prevent)\b",
250
+ ],
251
+ "performance": [
252
+ r"\b(perf|performance|optimize|speed|faster)\b",
253
+ r"\b(cache|caching|memory|cpu|disk)\b",
254
+ r"\b(slow|lag|delay|timeout|bottleneck)\b",
255
+ r"\b(efficient|efficiency|throughput|latency)\b",
256
+ r"\b(load\s+time|response\s+time|benchmark)\b",
257
+ r"\b(improve|better)\s+(load|performance|speed)\b",
258
+ ],
259
+ "chore": [
260
+ r"^chore:",
261
+ r"\b(chore|cleanup|housekeeping|maintenance)\b",
262
+ r"\b(routine|regular|scheduled)\b",
263
+ r"\b(lint|linting|format|formatting|prettier)\b",
264
+ r"\b(gitignore|ignore\s+file|artifacts)\b",
265
+ r"\b(console|debug|log|logging)\s+(removal?|clean)\b",
266
+ r"\b(sync|auto-sync)\b",
267
+ r"\b(script\s+update|merge\s+main)\b",
268
+ r"removes?\s+(console|debug|log)\b",
269
+ ],
270
+ "wip": [
271
+ r"\b(wip|work\s+in\s+progress|temp|temporary|tmp)\b",
272
+ r"\b(draft|unfinished|partial|incomplete)\b",
273
+ r"\b(placeholder|todo|fixme)\b",
274
+ r"^wip:",
275
+ r"\b(experiment|experimental|poc|proof\s+of\s+concept)\b",
276
+ r"\b(temporary|temp)\s+(fix|solution|workaround)\b",
277
+ ],
278
+ "version": [
279
+ r"\b(version|bump|tag)\b",
280
+ r"\b(v\d+\.\d+|version\s+\d+|\d+\.\d+\.\d+)\b",
281
+ r"\b(major|minor|patch)\s+(version|release|bump)\b",
282
+ r"^(version|bump):",
283
+ r"\b(prepare\s+for\s+release|pre-release)\b",
284
+ ],
285
+ "maintenance": [
286
+ r"\b(update|upgrade|bump|maintenance|maint)\b",
287
+ r"\b(dependency|dependencies|package|packages)\b",
288
+ r"\b(npm\s+update|pip\s+install|yarn\s+upgrade)\b",
289
+ r"\b(deprecated|obsolete|outdated)\b",
290
+ r"package\.json|requirements\.txt|pom\.xml|Gemfile",
291
+ r"\b(combo|beacon)\s+(hacking|fixes?)\b",
292
+ r"\b(temp|temporary|hack|hacking)\b",
293
+ r"\b(test|testing)\s+(change|update|fix)\b",
294
+ r"\b(more|only)\s+(combo|beacon)\s+(hacking|fires?)\b",
295
+ r"adds?\s+(console|debug|log)\b",
296
+ ],
297
+ "test": [
298
+ r"^test:",
299
+ r"\b(test|testing|spec|unit\s+test|integration\s+test)\b",
300
+ r"\b(junit|pytest|mocha|jest|cypress|selenium)\b",
301
+ r"\b(mock|stub|fixture|factory)\b",
302
+ r"\b(e2e|end-to-end|acceptance|smoke)\b",
303
+ r"\b(coverage|assert|expect|should)\b",
304
+ ],
305
+ "style": [
306
+ r"^style:",
307
+ r"\b(format|formatting|style|lint|linting)\b",
308
+ r"\b(prettier|eslint|black|autopep8|rubocop)\b",
309
+ r"\b(whitespace|indentation|spacing|tabs)\b",
310
+ r"\b(code\s+style|consistent|standardize)\b",
311
+ ],
312
+ "build": [
313
+ r"^build:",
314
+ r"\b(build|compile|bundle|webpack|rollup)\b",
315
+ r"\b(ci|cd|pipeline|workflow|github\s+actions)\b",
316
+ r"\b(docker|dockerfile|makefile|npm\s+scripts)\b",
317
+ r"\b(jenkins|travis|circleci|gitlab)\b",
318
+ r"\b(artifact|binary|executable|jar|war)\b",
319
+ ],
320
+ "integration": [
321
+ r"\b(integrate|integration)\s+(with|posthog|iubenda|auth0)\b",
322
+ r"\b(posthog|iubenda|auth0|oauth|third-party|external)\b",
323
+ r"\b(api|endpoint|service)\s+(integration|connection|setup)\b",
324
+ r"\b(connect|linking|sync)\s+(with|to)\s+[a-z]+(hog|enda|auth)\b",
325
+ r"implement\s+(posthog|iubenda|auth0|api)\b",
326
+ r"adding\s+(posthog|auth|integration)\b",
327
+ r"\b(third-party|external)\s+(service|integration|api)\b",
328
+ r"\bniveles\s+de\s+acceso\s+a\s+la\s+api\b", # Spanish: API access levels
329
+ r"\b(implementation|removing)\s+(iubenda|posthog|auth0)\b",
330
+ ],
331
+ }
332
+
333
+ # Compile categorization patterns
334
+ self.compiled_category_patterns = {}
335
+ for category, patterns in self.category_patterns.items():
336
+ self.compiled_category_patterns[category] = [
337
+ re.compile(pattern, re.IGNORECASE) for pattern in patterns
338
+ ]
339
+
340
+ def extract_from_text(self, text: str) -> list[dict[str, str]]:
49
341
  """Extract all ticket references from text."""
50
342
  if not text:
51
343
  return []
52
-
344
+
53
345
  tickets = []
54
346
  seen = set() # Avoid duplicates
55
-
347
+
56
348
  for platform, patterns in self.compiled_patterns.items():
57
349
  for pattern in patterns:
58
350
  matches = pattern.findall(text)
59
351
  for match in matches:
60
352
  ticket_id = match if isinstance(match, str) else match[0]
61
-
353
+
62
354
  # Normalize ticket ID
63
- if platform == 'jira' or platform == 'linear':
355
+ if platform == "jira" or platform == "linear":
64
356
  ticket_id = ticket_id.upper()
65
-
357
+
66
358
  # Create unique key
67
359
  key = f"{platform}:{ticket_id}"
68
360
  if key not in seen:
69
361
  seen.add(key)
70
- tickets.append({
71
- 'platform': platform,
72
- 'id': ticket_id,
73
- 'full_id': self._format_ticket_id(platform, ticket_id)
74
- })
75
-
362
+ tickets.append(
363
+ {
364
+ "platform": platform,
365
+ "id": ticket_id,
366
+ "full_id": self._format_ticket_id(platform, ticket_id),
367
+ }
368
+ )
369
+
76
370
  return tickets
77
-
78
- def extract_by_platform(self, text: str) -> Dict[str, List[str]]:
371
+
372
+ def extract_by_platform(self, text: str) -> dict[str, list[str]]:
79
373
  """Extract tickets grouped by platform."""
80
374
  tickets = self.extract_from_text(text)
81
-
375
+
82
376
  by_platform = defaultdict(list)
83
377
  for ticket in tickets:
84
- by_platform[ticket['platform']].append(ticket['id'])
85
-
378
+ by_platform[ticket["platform"]].append(ticket["id"])
379
+
86
380
  return dict(by_platform)
87
-
88
- def analyze_ticket_coverage(self, commits: List[Dict[str, Any]],
89
- prs: List[Dict[str, Any]]) -> Dict[str, Any]:
381
+
382
+ def analyze_ticket_coverage(
383
+ self, commits: list[dict[str, Any]], prs: list[dict[str, Any]]
384
+ ) -> dict[str, Any]:
90
385
  """Analyze ticket reference coverage across commits and PRs."""
386
+ ticket_platforms: defaultdict[str, int] = defaultdict(int)
387
+ untracked_commits: list[dict[str, Any]] = []
388
+ ticket_summary: defaultdict[str, set[str]] = defaultdict(set)
389
+
91
390
  results = {
92
- 'total_commits': len(commits),
93
- 'total_prs': len(prs),
94
- 'commits_with_tickets': 0,
95
- 'prs_with_tickets': 0,
96
- 'ticket_platforms': defaultdict(int),
97
- 'untracked_commits': [],
98
- 'ticket_summary': defaultdict(set)
391
+ "total_commits": len(commits),
392
+ "total_prs": len(prs),
393
+ "commits_with_tickets": 0,
394
+ "prs_with_tickets": 0,
395
+ "ticket_platforms": ticket_platforms,
396
+ "untracked_commits": untracked_commits,
397
+ "ticket_summary": ticket_summary,
99
398
  }
100
-
399
+
101
400
  # Analyze commits
401
+ commits_analyzed = 0
402
+ commits_with_ticket_refs = 0
403
+
102
404
  for commit in commits:
103
- ticket_refs = commit.get('ticket_references', [])
405
+ # Debug: check if commit is actually a dictionary
406
+ if not isinstance(commit, dict):
407
+ logger.error(f"Expected commit to be dict, got {type(commit)}: {commit}")
408
+ continue
409
+
410
+ commits_analyzed += 1
411
+ ticket_refs = commit.get("ticket_references", [])
412
+
413
+ # Debug logging for the first few commits
414
+ if commits_analyzed <= 5:
415
+ logger.debug(
416
+ f"Commit {commits_analyzed}: hash={commit.get('hash', 'N/A')[:8]}, ticket_refs={ticket_refs}"
417
+ )
418
+
104
419
  if ticket_refs:
105
- results['commits_with_tickets'] += 1
420
+ commits_with_ticket_refs += 1
421
+ commits_with_tickets = cast(int, results["commits_with_tickets"])
422
+ results["commits_with_tickets"] = commits_with_tickets + 1
106
423
  for ticket in ticket_refs:
107
424
  if isinstance(ticket, dict):
108
- platform = ticket.get('platform', 'unknown')
109
- ticket_id = ticket.get('id', '')
425
+ platform = ticket.get("platform", "unknown")
426
+ ticket_id = ticket.get("id", "")
110
427
  else:
111
428
  # Legacy format - assume JIRA
112
- platform = 'jira'
429
+ platform = "jira"
113
430
  ticket_id = ticket
114
-
115
- results['ticket_platforms'][platform] += 1
116
- results['ticket_summary'][platform].add(ticket_id)
431
+
432
+ platform_count = ticket_platforms[platform]
433
+ ticket_platforms[platform] = platform_count + 1
434
+ ticket_summary[platform].add(ticket_id)
117
435
  else:
118
- # Track significant untracked commits
119
- if (not commit.get('is_merge') and
120
- commit.get('files_changed', 0) > 3):
121
- results['untracked_commits'].append({
122
- 'hash': commit['hash'][:7],
123
- 'message': commit['message'].split('\n')[0][:60],
124
- 'files_changed': commit.get('files_changed', 0)
125
- })
126
-
436
+ # Track untracked commits with configurable threshold and enhanced data
437
+ files_changed = self._get_files_changed_count(commit)
438
+ if not commit.get("is_merge") and files_changed >= self.untracked_file_threshold:
439
+ # Categorize the commit
440
+ category = self.categorize_commit(commit.get("message", ""))
441
+
442
+ # Extract enhanced commit data
443
+ commit_data = {
444
+ "hash": commit.get("hash", "")[:7],
445
+ "full_hash": commit.get("hash", ""),
446
+ "message": commit.get("message", "").split("\n")[0][
447
+ :100
448
+ ], # Increased from 60 to 100
449
+ "full_message": commit.get("message", ""),
450
+ "author": commit.get("author_name", "Unknown"),
451
+ "author_email": commit.get("author_email", ""),
452
+ "canonical_id": commit.get("canonical_id", commit.get("author_email", "")),
453
+ "timestamp": commit.get("timestamp"),
454
+ "project_key": commit.get("project_key", "UNKNOWN"),
455
+ "files_changed": files_changed,
456
+ "lines_added": commit.get("insertions", 0),
457
+ "lines_removed": commit.get("deletions", 0),
458
+ "lines_changed": (commit.get("insertions", 0) + commit.get("deletions", 0)),
459
+ "category": category,
460
+ "is_merge": commit.get("is_merge", False),
461
+ }
462
+
463
+ untracked_commits.append(commit_data)
464
+
127
465
  # Analyze PRs
128
466
  for pr in prs:
129
467
  # Extract tickets from PR title and description
130
468
  pr_text = f"{pr.get('title', '')} {pr.get('description', '')}"
131
469
  tickets = self.extract_from_text(pr_text)
132
-
470
+
133
471
  if tickets:
134
- results['prs_with_tickets'] += 1
472
+ prs_with_tickets = cast(int, results["prs_with_tickets"])
473
+ results["prs_with_tickets"] = prs_with_tickets + 1
135
474
  for ticket in tickets:
136
- platform = ticket['platform']
137
- results['ticket_platforms'][platform] += 1
138
- results['ticket_summary'][platform].add(ticket['id'])
139
-
475
+ platform = ticket["platform"]
476
+ platform_count = ticket_platforms[platform]
477
+ ticket_platforms[platform] = platform_count + 1
478
+ ticket_summary[platform].add(ticket["id"])
479
+
140
480
  # Calculate coverage percentages
141
- results['commit_coverage_pct'] = (
142
- results['commits_with_tickets'] / results['total_commits'] * 100
143
- if results['total_commits'] > 0 else 0
481
+ total_commits = cast(int, results["total_commits"])
482
+ commits_with_tickets_count = cast(int, results["commits_with_tickets"])
483
+ results["commit_coverage_pct"] = (
484
+ commits_with_tickets_count / total_commits * 100 if total_commits > 0 else 0
144
485
  )
145
-
146
- results['pr_coverage_pct'] = (
147
- results['prs_with_tickets'] / results['total_prs'] * 100
148
- if results['total_prs'] > 0 else 0
486
+
487
+ total_prs = cast(int, results["total_prs"])
488
+ prs_with_tickets_count = cast(int, results["prs_with_tickets"])
489
+ results["pr_coverage_pct"] = (
490
+ prs_with_tickets_count / total_prs * 100 if total_prs > 0 else 0
149
491
  )
150
-
492
+
151
493
  # Convert sets to counts for summary
152
- results['ticket_summary'] = {
153
- platform: len(tickets)
154
- for platform, tickets in results['ticket_summary'].items()
494
+ results["ticket_summary"] = {
495
+ platform: len(tickets) for platform, tickets in ticket_summary.items()
155
496
  }
156
-
497
+
498
+ # Sort untracked commits by timestamp (most recent first)
499
+ # Handle timezone-aware and timezone-naive datetimes
500
+ def safe_timestamp_key(commit):
501
+ ts = commit.get("timestamp")
502
+ if ts is None:
503
+ return ""
504
+ # If it's a datetime object, handle timezone issues
505
+ if hasattr(ts, "tzinfo") and ts.tzinfo is None:
506
+ # Make timezone-naive datetime UTC-aware for consistent comparison
507
+ ts = ts.replace(tzinfo=timezone.utc)
508
+ return ts
509
+
510
+ untracked_commits.sort(key=safe_timestamp_key, reverse=True)
511
+
512
+ # Debug logging for ticket coverage analysis
513
+ final_commits_with_tickets = cast(int, results["commits_with_tickets"])
514
+ logger.debug(
515
+ f"Ticket coverage analysis complete: {commits_analyzed} commits analyzed, {commits_with_ticket_refs} had ticket_refs, {final_commits_with_tickets} counted as with tickets"
516
+ )
517
+ if commits_analyzed > 0 and final_commits_with_tickets == 0:
518
+ logger.warning(
519
+ f"Zero commits with tickets found out of {commits_analyzed} commits analyzed"
520
+ )
521
+
157
522
  return results
158
-
523
+
524
+ def calculate_developer_ticket_coverage(
525
+ self, commits: list[dict[str, Any]]
526
+ ) -> dict[str, float]:
527
+ """Calculate ticket coverage percentage per developer.
528
+
529
+ WHY: Individual developer ticket coverage was hardcoded to 0.0, causing
530
+ reports to show contradictory information where total coverage was >0%
531
+ but all individual developers showed 0%. This method provides the missing
532
+ per-developer calculation.
533
+
534
+ DESIGN DECISION: Uses canonical_id when available (post-identity resolution)
535
+ or falls back to author_email for consistent developer identification.
536
+ The coverage calculation only considers commits that meet the untracked
537
+ file threshold to maintain consistency with the overall analysis.
538
+
539
+ Args:
540
+ commits: List of commit dictionaries with ticket_references and identity info
541
+
542
+ Returns:
543
+ Dictionary mapping canonical_id/author_email to coverage percentage
544
+ """
545
+ if not commits:
546
+ return {}
547
+
548
+ # Group commits by developer (canonical_id preferred, fallback to author_email)
549
+ developer_commits = {}
550
+ developer_with_tickets = {}
551
+
552
+ for commit in commits:
553
+ # Skip merge commits (consistent with main analysis)
554
+ if commit.get("is_merge"):
555
+ continue
556
+
557
+ # Only count commits that meet the file threshold (consistent with untracked analysis)
558
+ files_changed = self._get_files_changed_count(commit)
559
+ if files_changed < self.untracked_file_threshold:
560
+ continue
561
+
562
+ # Determine developer identifier (canonical_id preferred)
563
+ developer_id = commit.get("canonical_id") or commit.get("author_email", "unknown")
564
+
565
+ # Initialize counters for this developer
566
+ if developer_id not in developer_commits:
567
+ developer_commits[developer_id] = 0
568
+ developer_with_tickets[developer_id] = 0
569
+
570
+ # Count total commits for this developer
571
+ developer_commits[developer_id] += 1
572
+
573
+ # Count commits with ticket references
574
+ ticket_refs = commit.get("ticket_references", [])
575
+ if ticket_refs:
576
+ developer_with_tickets[developer_id] += 1
577
+
578
+ # Calculate coverage percentages
579
+ coverage_by_developer = {}
580
+ for developer_id in developer_commits:
581
+ total_commits = developer_commits[developer_id]
582
+ commits_with_tickets = developer_with_tickets[developer_id]
583
+
584
+ if total_commits > 0:
585
+ coverage_pct = (commits_with_tickets / total_commits) * 100
586
+ coverage_by_developer[developer_id] = round(coverage_pct, 1)
587
+ else:
588
+ coverage_by_developer[developer_id] = 0.0
589
+
590
+ logger.debug(f"Calculated ticket coverage for {len(coverage_by_developer)} developers")
591
+ return coverage_by_developer
592
+
593
+ def _get_files_changed_count(self, commit: dict[str, Any]) -> int:
594
+ """Extract the number of files changed from commit data.
595
+
596
+ WHY: Commit data can have files_changed as either an integer count
597
+ or a list of file paths. This method handles both cases correctly
598
+ and provides a consistent integer count for analysis.
599
+
600
+ DESIGN DECISION: Priority order is:
601
+ 1. files_changed_count (if present, use directly)
602
+ 2. files_changed as integer (use directly)
603
+ 3. files_changed as list (use length)
604
+ 4. Default to 0 if none available
605
+
606
+ Args:
607
+ commit: Commit data dictionary
608
+
609
+ Returns:
610
+ Integer count of files changed
611
+ """
612
+ # First priority: explicit count field
613
+ if "files_changed_count" in commit:
614
+ return commit["files_changed_count"]
615
+
616
+ # Second priority: files_changed field
617
+ files_changed = commit.get("files_changed")
618
+ if files_changed is not None:
619
+ if isinstance(files_changed, int):
620
+ return files_changed
621
+ elif isinstance(files_changed, list):
622
+ return len(files_changed)
623
+
624
+ # Default fallback
625
+ return 0
626
+
627
+ def categorize_commit(self, message: str) -> str:
628
+ """Categorize a commit based on its message.
629
+
630
+ WHY: Commit categorization helps identify patterns in untracked work,
631
+ enabling better insights into what types of work are not being tracked
632
+ through tickets. This supports improved process recommendations.
633
+
634
+ DESIGN DECISION: Categories are checked in priority order to ensure
635
+ more specific patterns match before general ones. For example,
636
+ "security" patterns are checked before "feature" patterns to prevent
637
+ "add authentication" from being classified as a feature instead of security.
638
+
639
+ Args:
640
+ message: The commit message to categorize
641
+
642
+ Returns:
643
+ String category (bug_fix, feature, refactor, documentation,
644
+ maintenance, test, style, build, or other)
645
+ """
646
+ if not message:
647
+ return "other"
648
+
649
+ # Filter git artifacts before categorization
650
+ cleaned_message = filter_git_artifacts(message)
651
+ if not cleaned_message:
652
+ return "other"
653
+
654
+ # Remove ticket references to focus on content analysis
655
+ # This helps classify commits with ticket references based on their actual content
656
+ message_without_tickets = self._remove_ticket_references(cleaned_message)
657
+ message_lower = message_without_tickets.lower()
658
+
659
+ # Define priority order - conventional commits first, then specific patterns
660
+ priority_order = [
661
+ # Conventional commit formats (start with specific prefixes)
662
+ "wip", # ^wip: prefix
663
+ "chore", # ^chore: prefix
664
+ "style", # ^style: prefix
665
+ "bug_fix", # ^fix: prefix
666
+ "feature", # ^feat: prefix
667
+ "test", # ^test: prefix
668
+ "build", # ^build: prefix
669
+ "deployment", # ^deploy: prefix and specific deployment terms
670
+ # Specific domain patterns (no conventional prefix conflicts)
671
+ "version", # Version-specific patterns
672
+ "security", # Security-specific terms
673
+ "performance", # Performance-specific terms
674
+ "infrastructure", # Infrastructure-specific terms
675
+ "integration", # Third-party integration terms
676
+ "configuration", # Configuration-specific terms
677
+ "content", # Content-specific terms
678
+ "ui", # UI-specific terms
679
+ "documentation", # Documentation terms
680
+ "refactor", # Refactoring terms
681
+ "maintenance", # General maintenance terms
682
+ ]
683
+
684
+ # First, check for conventional commit patterns (^prefix:) which have absolute priority
685
+ conventional_patterns = {
686
+ "chore": r"^chore:",
687
+ "style": r"^style:",
688
+ "bug_fix": r"^fix:",
689
+ "feature": r"^(feat|feature):",
690
+ "test": r"^test:",
691
+ "build": r"^build:",
692
+ "deployment": r"^deploy:",
693
+ "wip": r"^wip:",
694
+ "version": r"^(version|bump):",
695
+ }
696
+
697
+ for category, pattern in conventional_patterns.items():
698
+ if re.match(pattern, message_lower):
699
+ return category
700
+
701
+ # Then check categories in priority order for non-conventional patterns
702
+ for category in priority_order:
703
+ if category in self.compiled_category_patterns:
704
+ for pattern in self.compiled_category_patterns[category]:
705
+ if pattern.search(message_lower):
706
+ return category
707
+
708
+ return "other"
709
+
710
+ def _remove_ticket_references(self, message: str) -> str:
711
+ """Remove ticket references from commit message to focus on content analysis.
712
+
713
+ WHY: Ticket references like 'RMVP-941' or '[CNA-482]' don't indicate the type
714
+ of work being done. We need to analyze the actual description to properly
715
+ categorize commits with ticket references.
716
+
717
+ Args:
718
+ message: The commit message possibly containing ticket references
719
+
720
+ Returns:
721
+ Message with ticket references removed, focusing on the actual description
722
+ """
723
+ if not message:
724
+ return ""
725
+
726
+ # Remove common ticket patterns at the start of messages
727
+ patterns_to_remove = [
728
+ # JIRA-style patterns
729
+ r"^[A-Z]{2,10}-\d+:?\s*", # RMVP-941: or RMVP-941
730
+ r"^\[[A-Z]{2,10}-\d+\]\s*", # [CNA-482]
731
+ # GitHub issue patterns
732
+ r"^#\d+:?\s*", # #123: or #123
733
+ r"^GH-\d+:?\s*", # GH-123:
734
+ # ClickUp patterns
735
+ r"^CU-[a-z0-9]+:?\s*", # CU-abc123:
736
+ # Linear patterns
737
+ r"^[A-Z]{2,5}-\d+:?\s*", # ENG-123:
738
+ r"^LIN-\d+:?\s*", # LIN-123:
739
+ # GitHub PR patterns in messages
740
+ r"\(#\d+\)$", # (#115) at end
741
+ r"\(#\d+\)\s*\(#\d+\)*\s*$", # (#131) (#133) (#134) at end
742
+ # Other ticket-like patterns
743
+ r"^[A-Z]{2,10}\s+\d+\s*", # NEWS 206
744
+ ]
745
+
746
+ cleaned_message = message
747
+ for pattern in patterns_to_remove:
748
+ cleaned_message = re.sub(pattern, "", cleaned_message, flags=re.IGNORECASE).strip()
749
+
750
+ # If we removed everything, return the original message
751
+ # This handles cases where the entire message was just a ticket reference
752
+ if not cleaned_message.strip():
753
+ return message
754
+
755
+ return cleaned_message
756
+
757
+ def analyze_untracked_patterns(self, untracked_commits: list[dict[str, Any]]) -> dict[str, Any]:
758
+ """Analyze patterns in untracked commits for insights.
759
+
760
+ WHY: Understanding patterns in untracked work helps identify:
761
+ - Common types of work that bypass ticket tracking
762
+ - Developers who need process guidance
763
+ - Categories of work that should be tracked vs. allowed to be untracked
764
+
765
+ Args:
766
+ untracked_commits: List of untracked commit data
767
+
768
+ Returns:
769
+ Dictionary with pattern analysis results
770
+ """
771
+ if not untracked_commits:
772
+ return {
773
+ "total_untracked": 0,
774
+ "categories": {},
775
+ "top_contributors": [],
776
+ "projects": {},
777
+ "avg_commit_size": 0,
778
+ "recommendations": [],
779
+ }
780
+
781
+ # Category analysis
782
+ categories = {}
783
+ for commit in untracked_commits:
784
+ category = commit.get("category", "other")
785
+ if category not in categories:
786
+ categories[category] = {"count": 0, "lines_changed": 0, "examples": []}
787
+ categories[category]["count"] += 1
788
+ categories[category]["lines_changed"] += commit.get("lines_changed", 0)
789
+ if len(categories[category]["examples"]) < 3:
790
+ categories[category]["examples"].append(
791
+ {
792
+ "hash": commit.get("hash", ""),
793
+ "message": commit.get("message", ""),
794
+ "author": commit.get("author", ""),
795
+ }
796
+ )
797
+
798
+ # Contributor analysis
799
+ contributors = {}
800
+ for commit in untracked_commits:
801
+ author = commit.get("canonical_id", commit.get("author_email", "Unknown"))
802
+ if author not in contributors:
803
+ contributors[author] = {"count": 0, "categories": set()}
804
+ contributors[author]["count"] += 1
805
+ contributors[author]["categories"].add(commit.get("category", "other"))
806
+
807
+ # Convert sets to lists for JSON serialization
808
+ for author_data in contributors.values():
809
+ author_data["categories"] = list(author_data["categories"])
810
+
811
+ # Top contributors
812
+ top_contributors = sorted(
813
+ [(author, data["count"]) for author, data in contributors.items()],
814
+ key=lambda x: x[1],
815
+ reverse=True,
816
+ )[:5]
817
+
818
+ # Project analysis
819
+ projects = {}
820
+ for commit in untracked_commits:
821
+ project = commit.get("project_key", "UNKNOWN")
822
+ if project not in projects:
823
+ projects[project] = {"count": 0, "categories": set()}
824
+ projects[project]["count"] += 1
825
+ projects[project]["categories"].add(commit.get("category", "other"))
826
+
827
+ # Convert sets to lists for JSON serialization
828
+ for project_data in projects.values():
829
+ project_data["categories"] = list(project_data["categories"])
830
+
831
+ # Calculate average commit size
832
+ total_lines = sum(commit.get("lines_changed", 0) for commit in untracked_commits)
833
+ avg_commit_size = total_lines / len(untracked_commits) if untracked_commits else 0
834
+
835
+ # Generate recommendations
836
+ recommendations = self._generate_untracked_recommendations(
837
+ categories, contributors, projects, len(untracked_commits)
838
+ )
839
+
840
+ return {
841
+ "total_untracked": len(untracked_commits),
842
+ "categories": categories,
843
+ "top_contributors": top_contributors,
844
+ "projects": projects,
845
+ "avg_commit_size": round(avg_commit_size, 1),
846
+ "recommendations": recommendations,
847
+ }
848
+
849
+ def _generate_untracked_recommendations(
850
+ self,
851
+ categories: dict[str, Any],
852
+ contributors: dict[str, Any],
853
+ projects: dict[str, Any],
854
+ total_untracked: int,
855
+ ) -> list[dict[str, str]]:
856
+ """Generate recommendations based on untracked commit patterns."""
857
+ recommendations = []
858
+
859
+ # Category-based recommendations
860
+ if categories.get("feature", {}).get("count", 0) > total_untracked * 0.2:
861
+ recommendations.append(
862
+ {
863
+ "type": "process",
864
+ "title": "Track Feature Development",
865
+ "description": "Many feature commits lack ticket references. Consider requiring tickets for new features.",
866
+ "priority": "high",
867
+ }
868
+ )
869
+
870
+ if categories.get("bug_fix", {}).get("count", 0) > total_untracked * 0.15:
871
+ recommendations.append(
872
+ {
873
+ "type": "process",
874
+ "title": "Improve Bug Tracking",
875
+ "description": "Bug fixes should be tracked through issue management systems.",
876
+ "priority": "high",
877
+ }
878
+ )
879
+
880
+ # Allow certain categories to be untracked
881
+ low_priority_categories = ["style", "documentation", "maintenance"]
882
+ low_priority_count = sum(
883
+ categories.get(cat, {}).get("count", 0) for cat in low_priority_categories
884
+ )
885
+
886
+ if low_priority_count > total_untracked * 0.6:
887
+ recommendations.append(
888
+ {
889
+ "type": "positive",
890
+ "title": "Appropriate Untracked Work",
891
+ "description": "Most untracked commits are maintenance/style/docs - this is acceptable.",
892
+ "priority": "low",
893
+ }
894
+ )
895
+
896
+ # Contributor-based recommendations
897
+ if len(contributors) > 1:
898
+ max_contributor_count = max(data["count"] for data in contributors.values())
899
+ if max_contributor_count > total_untracked * 0.5:
900
+ recommendations.append(
901
+ {
902
+ "type": "team",
903
+ "title": "Provide Process Training",
904
+ "description": "Some developers need guidance on ticket referencing practices.",
905
+ "priority": "medium",
906
+ }
907
+ )
908
+
909
+ return recommendations
910
+
159
911
  def _format_ticket_id(self, platform: str, ticket_id: str) -> str:
160
912
  """Format ticket ID for display."""
161
- if platform == 'github':
913
+ if platform == "github":
162
914
  return f"#{ticket_id}"
163
- elif platform == 'clickup':
164
- return f"CU-{ticket_id}" if not ticket_id.startswith('CU-') else ticket_id
915
+ elif platform == "clickup":
916
+ return f"CU-{ticket_id}" if not ticket_id.startswith("CU-") else ticket_id
165
917
  else:
166
- return ticket_id
918
+ return ticket_id