gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,652 @@
1
+ """PM Framework Orchestrator for multi-platform data collection and correlation.
2
+
3
+ This module provides the main orchestration layer that coordinates data collection
4
+ across multiple PM platforms and correlates issues with Git commits for unified
5
+ analytics.
6
+ """
7
+
8
+ import logging
9
+ from datetime import datetime
10
+ from typing import Any, Optional
11
+
12
+ from .base import BasePlatformAdapter
13
+ from .models import UnifiedIssue, UnifiedProject
14
+ from .registry import PlatformRegistry
15
+
16
+ # Configure logger for orchestrator
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class PMFrameworkOrchestrator:
21
+ """Orchestrates data collection across multiple PM platforms.
22
+
23
+ WHY: Different organizations use different combinations of PM tools.
24
+ The orchestrator provides a unified interface for collecting data from
25
+ multiple platforms simultaneously and correlating that data with Git
26
+ commits for comprehensive analytics.
27
+
28
+ DESIGN DECISION: Use orchestrator pattern to coordinate multiple adapters
29
+ rather than requiring callers to manage individual adapters. This provides
30
+ a clean API and handles cross-platform data correlation logic.
31
+ """
32
+
33
+ def __init__(self, config: dict[str, Any]):
34
+ """Initialize the PM framework orchestrator.
35
+
36
+ Args:
37
+ config: Configuration dictionary containing PM platform settings.
38
+ Expected format:
39
+ {
40
+ 'pm_platforms': {
41
+ 'platform_name': {
42
+ 'enabled': bool,
43
+ 'config_key': 'config_value',
44
+ ...
45
+ }
46
+ },
47
+ 'analysis': {
48
+ 'pm_integration': {
49
+ 'enabled': bool,
50
+ 'primary_platform': str,
51
+ 'correlation': {...}
52
+ }
53
+ }
54
+ }
55
+ """
56
+ logger.info("Initializing PM Framework Orchestrator...")
57
+ logger.debug(f"PM platforms in config: {list(config.get('pm_platforms', {}).keys())}")
58
+
59
+ self.config = config
60
+ # Create a new registry instance for this orchestrator
61
+ # This ensures we don't use the default registry which may lack credentials
62
+ self.registry = PlatformRegistry()
63
+ self.adapters: dict[str, BasePlatformAdapter] = {}
64
+
65
+ # Register built-in adapters (to be implemented in future)
66
+ self._register_builtin_adapters()
67
+
68
+ # Configuration for PM integration
69
+ pm_config = config.get("analysis", {}).get("pm_integration", {})
70
+ self.pm_integration_enabled = pm_config.get("enabled", False)
71
+ self.primary_platform = pm_config.get("primary_platform", None)
72
+
73
+ logger.info(f"PM integration enabled: {self.pm_integration_enabled}")
74
+ if self.primary_platform:
75
+ logger.info(f"Primary platform: {self.primary_platform}")
76
+
77
+ # Correlation settings
78
+ correlation_config = pm_config.get("correlation", {})
79
+ self.fuzzy_matching_enabled = correlation_config.get("fuzzy_matching", True)
80
+ self.temporal_window_hours = correlation_config.get("temporal_window_hours", 72)
81
+ self.confidence_threshold = correlation_config.get("confidence_threshold", 0.8)
82
+
83
+ import traceback
84
+
85
+ logger.info("PM Framework Orchestrator initialized")
86
+ print(" 🔍 PM Framework init stack trace:")
87
+ for line in traceback.format_stack()[-5:-1]:
88
+ print(" " + line.strip())
89
+
90
+ # Initialize configured platforms if PM integration is enabled
91
+ if self.pm_integration_enabled:
92
+ self._initialize_platforms()
93
+ else:
94
+ logger.info("PM integration disabled, skipping platform initialization")
95
+
96
+ def _register_builtin_adapters(self) -> None:
97
+ """Register built-in platform adapters.
98
+
99
+ WHY: Built-in adapters should be automatically available without
100
+ requiring manual registration. Future implementations will add
101
+ JIRA, Azure DevOps, Linear, and other adapters here.
102
+ """
103
+ logger.debug("Registering built-in platform adapters...")
104
+
105
+ # Register available adapters
106
+ from .adapters import JIRAAdapter
107
+
108
+ self.registry.register_adapter("jira", JIRAAdapter)
109
+ logger.debug("Registered JIRA adapter")
110
+
111
+ # self.registry.register_adapter('azure_devops', AzureDevOpsAdapter)
112
+ # self.registry.register_adapter('linear', LinearAdapter)
113
+ # self.registry.register_adapter('asana', AsanaAdapter)
114
+
115
+ available_platforms = self.registry.get_available_platforms()
116
+ logger.info(f"Built-in adapters registered: {available_platforms}")
117
+
118
+ def _initialize_platforms(self) -> None:
119
+ """Initialize platform adapters based on configuration.
120
+
121
+ WHY: Automated initialization reduces setup complexity and ensures
122
+ all configured platforms are ready for data collection. Failed
123
+ initializations are logged but don't prevent other platforms from
124
+ working (graceful degradation).
125
+ """
126
+ platforms_config = self.config.get("pm_platforms", {})
127
+
128
+ if not platforms_config:
129
+ logger.warning("No PM platforms configured")
130
+ return
131
+
132
+ initialization_results = []
133
+
134
+ for platform_name, platform_config in platforms_config.items():
135
+ if not platform_config.get("enabled", False):
136
+ logger.info(f"Platform {platform_name} disabled, skipping initialization")
137
+ continue
138
+
139
+ # Log configuration details for debugging (without credentials)
140
+ config_keys = list(platform_config.keys())
141
+ has_credentials = (
142
+ "username" in platform_config and "api_token" in platform_config
143
+ ) or ("access_user" in platform_config and "access_token" in platform_config)
144
+ logger.debug(
145
+ f"Platform {platform_name} config keys: {config_keys}, has credentials: {has_credentials}"
146
+ )
147
+
148
+ try:
149
+ logger.info(f"Initializing {platform_name} adapter...")
150
+ adapter = self.registry.create_adapter(platform_name, platform_config)
151
+ self.adapters[platform_name] = adapter
152
+
153
+ # Test adapter capabilities
154
+ connection_info = adapter.test_connection()
155
+ logger.info(
156
+ f"✅ {platform_name} initialized: {connection_info.get('status', 'unknown')}"
157
+ )
158
+
159
+ initialization_results.append(
160
+ {
161
+ "platform": platform_name,
162
+ "status": "success",
163
+ "connection_info": connection_info,
164
+ }
165
+ )
166
+
167
+ except Exception as e:
168
+ error_msg = f"Failed to initialize {platform_name}: {e}"
169
+ logger.error(f"❌ {error_msg}")
170
+ logger.debug(f"Full error details for {platform_name}: {e}", exc_info=True)
171
+
172
+ initialization_results.append(
173
+ {"platform": platform_name, "status": "error", "error": str(e)}
174
+ )
175
+
176
+ # Log initialization summary
177
+ successful = sum(1 for r in initialization_results if r["status"] == "success")
178
+ total = len(initialization_results)
179
+
180
+ if successful > 0:
181
+ logger.info(f"Successfully initialized {successful}/{total} PM platforms")
182
+ else:
183
+ logger.warning("No PM platforms successfully initialized")
184
+
185
+ def is_enabled(self) -> bool:
186
+ """Check if PM integration is enabled and has active adapters.
187
+
188
+ Returns:
189
+ True if PM integration is enabled and at least one adapter is active.
190
+ """
191
+ return self.pm_integration_enabled and len(self.adapters) > 0
192
+
193
+ def get_active_platforms(self) -> list[str]:
194
+ """Get list of active platform names.
195
+
196
+ Returns:
197
+ List of platform identifiers that are successfully initialized.
198
+ """
199
+ return list(self.adapters.keys())
200
+
201
+ def get_platform_status(self) -> dict[str, Any]:
202
+ """Get status information for all platforms.
203
+
204
+ WHY: Provides diagnostic information for monitoring and troubleshooting
205
+ platform connections and configuration issues.
206
+
207
+ Returns:
208
+ Dictionary containing status for each platform and overall summary.
209
+ """
210
+ status = {
211
+ "pm_integration_enabled": self.pm_integration_enabled,
212
+ "primary_platform": self.primary_platform,
213
+ "active_platforms": len(self.adapters),
214
+ "platforms": {},
215
+ }
216
+
217
+ for platform_name, adapter in self.adapters.items():
218
+ try:
219
+ connection_info = adapter.test_connection()
220
+ capabilities = adapter.capabilities
221
+
222
+ status["platforms"][platform_name] = {
223
+ "status": connection_info.get("status", "unknown"),
224
+ "platform_type": adapter.platform_name,
225
+ "capabilities": {
226
+ "supports_sprints": capabilities.supports_sprints,
227
+ "supports_story_points": capabilities.supports_story_points,
228
+ "supports_time_tracking": capabilities.supports_time_tracking,
229
+ "rate_limit_per_hour": capabilities.rate_limit_requests_per_hour,
230
+ },
231
+ "connection_info": connection_info,
232
+ }
233
+ except Exception as e:
234
+ status["platforms"][platform_name] = {"status": "error", "error": str(e)}
235
+
236
+ return status
237
+
238
+ def get_all_projects(self) -> dict[str, list[UnifiedProject]]:
239
+ """Get projects from all configured platforms.
240
+
241
+ WHY: Projects are the primary organizational unit in PM platforms.
242
+ This method discovers all accessible projects across platforms for
243
+ subsequent issue retrieval and project-level analytics.
244
+
245
+ Returns:
246
+ Dictionary mapping platform names to lists of UnifiedProject objects.
247
+ """
248
+ all_projects = {}
249
+
250
+ for platform_name, adapter in self.adapters.items():
251
+ try:
252
+ logger.info(f"Fetching projects from {platform_name}...")
253
+ projects = adapter.get_projects()
254
+ all_projects[platform_name] = projects
255
+ logger.info(f"📁 Found {len(projects)} projects in {platform_name}")
256
+
257
+ except Exception as e:
258
+ logger.error(f"⚠️ Failed to get projects from {platform_name}: {e}")
259
+ all_projects[platform_name] = []
260
+
261
+ total_projects = sum(len(projects) for projects in all_projects.values())
262
+ logger.info(f"Total projects discovered: {total_projects}")
263
+
264
+ return all_projects
265
+
266
+ def get_all_issues(
267
+ self,
268
+ since: Optional[datetime] = None,
269
+ project_filter: Optional[dict[str, list[str]]] = None,
270
+ ) -> dict[str, list[UnifiedIssue]]:
271
+ """Get issues from all configured platforms.
272
+
273
+ WHY: Issues are the core work items that need to be correlated with
274
+ Git commits. This method collects issues from all platforms with
275
+ optional filtering to optimize performance and focus on relevant data.
276
+
277
+ Args:
278
+ since: Optional datetime to filter issues updated after this date.
279
+ project_filter: Optional dict mapping platform names to lists of
280
+ project keys to filter by. Format:
281
+ {'jira': ['PROJ1', 'PROJ2'], 'azure': ['Project1']}
282
+
283
+ Returns:
284
+ Dictionary mapping platform names to lists of UnifiedIssue objects.
285
+ """
286
+ all_issues = {}
287
+
288
+ for platform_name, adapter in self.adapters.items():
289
+ try:
290
+ logger.info(f"Fetching issues from {platform_name}...")
291
+ platform_issues = []
292
+
293
+ # Get projects for this platform
294
+ projects = adapter.get_projects()
295
+
296
+ # Apply project filter if specified
297
+ if project_filter and platform_name in project_filter:
298
+ project_keys = project_filter[platform_name]
299
+ projects = [p for p in projects if p.key in project_keys]
300
+ logger.info(f"Filtered to {len(projects)} projects for {platform_name}")
301
+
302
+ # Get issues for each project
303
+ for project in projects:
304
+ try:
305
+ logger.debug(f"Fetching issues from {platform_name}/{project.key}")
306
+ issues = adapter.get_issues(project.key, since)
307
+ platform_issues.extend(issues)
308
+ logger.info(
309
+ f"🎫 Found {len(issues)} issues in {platform_name}/{project.key}"
310
+ )
311
+
312
+ except Exception as e:
313
+ logger.error(
314
+ f"⚠️ Failed to get issues from {platform_name}/{project.key}: {e}"
315
+ )
316
+
317
+ all_issues[platform_name] = platform_issues
318
+ logger.info(f"Total issues from {platform_name}: {len(platform_issues)}")
319
+
320
+ except Exception as e:
321
+ logger.error(f"⚠️ Failed to get issues from {platform_name}: {e}")
322
+ all_issues[platform_name] = []
323
+
324
+ total_issues = sum(len(issues) for issues in all_issues.values())
325
+ logger.info(f"Total issues collected: {total_issues}")
326
+
327
+ return all_issues
328
+
329
+ def get_issues_by_keys(self, platform: str, issue_keys: list[str]) -> dict[str, UnifiedIssue]:
330
+ """Get specific issues by their keys from a platform.
331
+
332
+ WHY: Training pipeline needs to fetch specific issues referenced in commits
333
+ to determine their types for classification labeling.
334
+
335
+ Args:
336
+ platform: Platform name (e.g., 'jira')
337
+ issue_keys: List of issue keys to fetch
338
+
339
+ Returns:
340
+ Dictionary mapping issue keys to UnifiedIssue objects.
341
+ """
342
+ if platform not in self.adapters:
343
+ # Don't log errors for non-configured platforms - this is expected
344
+ logger.debug(f"Platform {platform} not configured, skipping")
345
+ return {}
346
+
347
+ adapter = self.adapters[platform]
348
+ issues_dict = {}
349
+
350
+ # For JIRA, we can fetch issues directly by key
351
+ if platform == "jira" and hasattr(adapter, "get_issue_by_key"):
352
+ for key in issue_keys:
353
+ try:
354
+ issue = adapter.get_issue_by_key(key)
355
+ if issue:
356
+ issues_dict[key] = issue
357
+ except Exception as e:
358
+ logger.warning(f"Failed to fetch {key} from {platform}: {e}")
359
+ else:
360
+ # For other platforms, we may need to use search or other methods
361
+ logger.warning(f"Batch fetch by keys not implemented for {platform}")
362
+
363
+ return issues_dict
364
+
365
+ def correlate_issues_with_commits(
366
+ self, issues: dict[str, list[UnifiedIssue]], commits: list[dict[str, Any]]
367
+ ) -> list[dict[str, Any]]:
368
+ """Correlate PM platform issues with Git commits.
369
+
370
+ WHY: The core value of PM platform integration is correlating work
371
+ items with actual code changes. This enables tracking story point
372
+ accuracy, development velocity, and work item completion metrics.
373
+
374
+ DESIGN DECISION: Use multiple correlation strategies (ticket references,
375
+ fuzzy matching, temporal correlation) to maximize correlation accuracy
376
+ while maintaining confidence scoring for quality assessment.
377
+
378
+ Args:
379
+ issues: Dictionary mapping platform names to lists of issues.
380
+ commits: List of Git commit dictionaries with metadata.
381
+
382
+ Returns:
383
+ List of correlation dictionaries containing matched issues and commits.
384
+ """
385
+ if not issues or not commits:
386
+ logger.warning("No issues or commits provided for correlation")
387
+ return []
388
+
389
+ logger.info(
390
+ f"Correlating {sum(len(i) for i in issues.values())} issues with {len(commits)} commits"
391
+ )
392
+
393
+ correlations = []
394
+
395
+ # Build a lookup of all issues by key for efficient searching
396
+ issue_lookup = {}
397
+ for platform_issues in issues.values():
398
+ for issue in platform_issues:
399
+ issue_lookup[issue.key] = issue
400
+
401
+ logger.debug(f"Built issue lookup with {len(issue_lookup)} issues")
402
+
403
+ # Strategy 1: Direct ticket reference correlation
404
+ direct_correlations = self._correlate_by_ticket_references(issue_lookup, commits)
405
+ correlations.extend(direct_correlations)
406
+
407
+ # Strategy 2: Fuzzy matching correlation (if enabled)
408
+ if self.fuzzy_matching_enabled:
409
+ fuzzy_correlations = self._correlate_by_fuzzy_matching(
410
+ issue_lookup, commits, direct_correlations
411
+ )
412
+ correlations.extend(fuzzy_correlations)
413
+
414
+ # Strategy 3: Temporal correlation for bug fixes (future enhancement)
415
+ # temporal_correlations = self._correlate_by_temporal_proximity(issue_lookup, commits)
416
+ # correlations.extend(temporal_correlations)
417
+
418
+ # Remove duplicates while preserving highest confidence matches
419
+ unique_correlations = self._deduplicate_correlations(correlations)
420
+
421
+ logger.info(f"Found {len(unique_correlations)} issue-commit correlations")
422
+
423
+ return unique_correlations
424
+
425
+ def _correlate_by_ticket_references(
426
+ self, issue_lookup: dict[str, UnifiedIssue], commits: list[dict[str, Any]]
427
+ ) -> list[dict[str, Any]]:
428
+ """Correlate issues with commits based on explicit ticket references.
429
+
430
+ WHY: Explicit ticket references in commit messages are the most reliable
431
+ correlation method. This strategy matches issues using existing ticket
432
+ extraction from GitFlow Analytics.
433
+
434
+ Args:
435
+ issue_lookup: Dictionary mapping issue keys to UnifiedIssue objects.
436
+ commits: List of commit dictionaries.
437
+
438
+ Returns:
439
+ List of correlation dictionaries for ticket reference matches.
440
+ """
441
+ correlations = []
442
+
443
+ for commit in commits:
444
+ # Check existing ticket references (from GitFlow Analytics ticket extractor)
445
+ ticket_refs = commit.get("ticket_references", [])
446
+
447
+ for ref in ticket_refs:
448
+ # Handle both dict and string formats from ticket extractor
449
+ if isinstance(ref, dict):
450
+ ticket_key = ref.get("id", "")
451
+ full_id = ref.get("full_id", ticket_key)
452
+ else:
453
+ ticket_key = str(ref)
454
+ full_id = ticket_key
455
+
456
+ # Try to find issue in our collected data
457
+ issue = issue_lookup.get(ticket_key)
458
+ if not issue and full_id:
459
+ issue = issue_lookup.get(full_id)
460
+
461
+ if issue:
462
+ correlation = {
463
+ "commit_hash": commit["hash"],
464
+ "commit_message": commit.get("message", "").split("\n")[0][:100],
465
+ "commit_author": commit.get("author", ""),
466
+ "commit_date": commit.get("date"),
467
+ "issue_key": issue.key,
468
+ "issue_title": issue.title,
469
+ "issue_type": issue.issue_type.value,
470
+ "issue_status": issue.status.value,
471
+ "issue_platform": issue.platform,
472
+ "story_points": issue.story_points,
473
+ "correlation_method": "ticket_reference",
474
+ "confidence": 1.0, # Highest confidence for explicit references
475
+ "matched_text": full_id,
476
+ }
477
+ correlations.append(correlation)
478
+
479
+ logger.debug(f"Direct correlation: {commit['hash'][:8]} → {issue.key}")
480
+
481
+ logger.info(f"Found {len(correlations)} direct ticket reference correlations")
482
+ return correlations
483
+
484
+ def _correlate_by_fuzzy_matching(
485
+ self,
486
+ issue_lookup: dict[str, UnifiedIssue],
487
+ commits: list[dict[str, Any]],
488
+ existing_correlations: list[dict[str, Any]],
489
+ ) -> list[dict[str, Any]]:
490
+ """Correlate issues with commits using fuzzy text matching.
491
+
492
+ WHY: Not all commits have explicit ticket references, but may mention
493
+ issue titles or keywords. Fuzzy matching can find additional correlations
494
+ while maintaining confidence scoring to filter low-quality matches.
495
+
496
+ Args:
497
+ issue_lookup: Dictionary mapping issue keys to UnifiedIssue objects.
498
+ commits: List of commit dictionaries.
499
+ existing_correlations: Already found correlations to avoid duplicates.
500
+
501
+ Returns:
502
+ List of correlation dictionaries for fuzzy matches.
503
+ """
504
+ # TODO: Implement fuzzy matching correlation
505
+ # This would use techniques like:
506
+ # 1. TF-IDF similarity between commit messages and issue titles
507
+ # 2. Keyword extraction and matching
508
+ # 3. Semantic similarity using embeddings (optional)
509
+
510
+ logger.debug("Fuzzy matching correlation not yet implemented")
511
+ return []
512
+
513
+ def _deduplicate_correlations(self, correlations: list[dict[str, Any]]) -> list[dict[str, Any]]:
514
+ """Remove duplicate correlations while preserving highest confidence matches.
515
+
516
+ WHY: Multiple correlation strategies may find the same issue-commit
517
+ pairs. We need to deduplicate while preserving the highest confidence
518
+ match for each unique pair.
519
+
520
+ Args:
521
+ correlations: List of correlation dictionaries potentially containing duplicates.
522
+
523
+ Returns:
524
+ List of unique correlations with highest confidence matches preserved.
525
+ """
526
+ # Group correlations by (commit_hash, issue_key) pair
527
+ correlation_groups: dict[tuple[str, str], dict[str, Any]] = {}
528
+
529
+ for correlation in correlations:
530
+ key = (correlation["commit_hash"], correlation["issue_key"])
531
+ confidence = correlation.get("confidence", 0.0)
532
+
533
+ if key not in correlation_groups or confidence > correlation_groups[key]["confidence"]:
534
+ correlation_groups[key] = correlation
535
+
536
+ unique_correlations = list(correlation_groups.values())
537
+
538
+ if len(unique_correlations) < len(correlations):
539
+ removed = len(correlations) - len(unique_correlations)
540
+ logger.debug(f"Removed {removed} duplicate correlations")
541
+
542
+ return unique_correlations
543
+
544
+ def calculate_enhanced_metrics(
545
+ self,
546
+ commits: list[dict[str, Any]],
547
+ prs: list[dict[str, Any]],
548
+ pm_issues: dict[str, list[UnifiedIssue]],
549
+ correlations: list[dict[str, Any]],
550
+ ) -> dict[str, Any]:
551
+ """Calculate metrics enhanced with PM platform data.
552
+
553
+ WHY: PM platform integration enables new metrics that combine Git data
554
+ with work item information. These metrics provide insights into story
555
+ point accuracy, cross-platform coverage, and development efficiency.
556
+
557
+ Args:
558
+ commits: List of Git commit dictionaries.
559
+ prs: List of pull request dictionaries.
560
+ pm_issues: Dictionary mapping platforms to lists of issues.
561
+ correlations: List of issue-commit correlations.
562
+
563
+ Returns:
564
+ Dictionary containing enhanced metrics with PM platform data.
565
+ """
566
+ # Initialize metrics dictionary
567
+ metrics: dict[str, Any] = {}
568
+
569
+ # Cross-platform issue metrics
570
+ total_issues = sum(len(issues) for issues in pm_issues.values())
571
+ metrics["total_pm_issues"] = total_issues
572
+
573
+ # Story point analysis
574
+ pm_story_points = 0
575
+ issues_with_story_points = 0
576
+
577
+ for platform_issues in pm_issues.values():
578
+ for issue in platform_issues:
579
+ if issue.story_points:
580
+ pm_story_points += issue.story_points
581
+ issues_with_story_points += 1
582
+
583
+ git_story_points = sum(commit.get("story_points", 0) or 0 for commit in commits)
584
+
585
+ metrics["story_point_analysis"] = {
586
+ "pm_total_story_points": pm_story_points,
587
+ "git_total_story_points": git_story_points,
588
+ "issues_with_story_points": issues_with_story_points,
589
+ "story_point_coverage_pct": (
590
+ (issues_with_story_points / total_issues * 100) if total_issues > 0 else 0
591
+ ),
592
+ "correlation_accuracy": (
593
+ min(git_story_points / pm_story_points, 1.0) if pm_story_points > 0 else 0
594
+ ),
595
+ }
596
+
597
+ # Issue type distribution
598
+ issue_types: dict[str, int] = {}
599
+ for platform_issues in pm_issues.values():
600
+ for issue in platform_issues:
601
+ issue_type = issue.issue_type.value
602
+ issue_types[issue_type] = issue_types.get(issue_type, 0) + 1
603
+
604
+ metrics["issue_type_distribution"] = issue_types
605
+
606
+ # Platform coverage analysis
607
+ platform_coverage: dict[str, dict[str, Any]] = {}
608
+ for platform, issues in pm_issues.items():
609
+ linked_issues = [
610
+ c["issue_key"] for c in correlations if c.get("issue_platform") == platform
611
+ ]
612
+ unique_linked = set(linked_issues)
613
+
614
+ coverage_pct = len(unique_linked) / len(issues) * 100 if issues else 0
615
+
616
+ platform_coverage[platform] = {
617
+ "total_issues": len(issues),
618
+ "linked_issues": len(unique_linked),
619
+ "coverage_percentage": coverage_pct,
620
+ "correlation_rate": (
621
+ len(linked_issues) / len(correlations) * 100 if correlations else 0
622
+ ),
623
+ }
624
+
625
+ metrics["platform_coverage"] = platform_coverage
626
+
627
+ # Correlation quality metrics
628
+ if correlations:
629
+ confidence_scores = [c.get("confidence", 0) for c in correlations]
630
+ correlation_methods: dict[str, int] = {}
631
+
632
+ for correlation in correlations:
633
+ method = correlation.get("correlation_method", "unknown")
634
+ correlation_methods[method] = correlation_methods.get(method, 0) + 1
635
+
636
+ metrics["correlation_quality"] = {
637
+ "total_correlations": len(correlations),
638
+ "average_confidence": sum(confidence_scores) / len(confidence_scores),
639
+ "high_confidence_correlations": sum(
640
+ 1 for score in confidence_scores if score >= self.confidence_threshold
641
+ ),
642
+ "correlation_methods": correlation_methods,
643
+ }
644
+ else:
645
+ metrics["correlation_quality"] = {
646
+ "total_correlations": 0,
647
+ "average_confidence": 0,
648
+ "high_confidence_correlations": 0,
649
+ "correlation_methods": {},
650
+ }
651
+
652
+ return metrics