gitflow-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ """Ticket reference extraction for multiple platforms."""
2
+ import re
3
+ from typing import List, Dict, Any, Set
4
+ from collections import defaultdict
5
+
6
+
7
+ class TicketExtractor:
8
+ """Extract ticket references from various issue tracking systems."""
9
+
10
+ def __init__(self):
11
+ """Initialize with patterns for different platforms."""
12
+ self.patterns = {
13
+ 'jira': [
14
+ r'([A-Z]{2,10}-\d+)', # Standard JIRA format: PROJ-123
15
+ ],
16
+ 'github': [
17
+ r'#(\d+)', # GitHub issues: #123
18
+ r'GH-(\d+)', # Alternative format: GH-123
19
+ r'(?:fix|fixes|fixed|close|closes|closed|resolve|resolves|resolved)\s+#(\d+)',
20
+ ],
21
+ 'clickup': [
22
+ r'CU-([a-z0-9]+)', # ClickUp: CU-abc123
23
+ r'#([a-z0-9]{6,})', # ClickUp short format
24
+ ],
25
+ 'linear': [
26
+ r'([A-Z]{2,5}-\d+)', # Linear: ENG-123, similar to JIRA
27
+ r'LIN-(\d+)', # Alternative: LIN-123
28
+ ]
29
+ }
30
+
31
+ # Compile patterns
32
+ self.compiled_patterns = {}
33
+ for platform, patterns in self.patterns.items():
34
+ self.compiled_patterns[platform] = [
35
+ re.compile(pattern, re.IGNORECASE if platform != 'jira' else 0)
36
+ for pattern in patterns
37
+ ]
38
+
39
+ def extract_from_text(self, text: str) -> List[Dict[str, str]]:
40
+ """Extract all ticket references from text."""
41
+ if not text:
42
+ return []
43
+
44
+ tickets = []
45
+ seen = set() # Avoid duplicates
46
+
47
+ for platform, patterns in self.compiled_patterns.items():
48
+ for pattern in patterns:
49
+ matches = pattern.findall(text)
50
+ for match in matches:
51
+ ticket_id = match if isinstance(match, str) else match[0]
52
+
53
+ # Normalize ticket ID
54
+ if platform == 'jira' or platform == 'linear':
55
+ ticket_id = ticket_id.upper()
56
+
57
+ # Create unique key
58
+ key = f"{platform}:{ticket_id}"
59
+ if key not in seen:
60
+ seen.add(key)
61
+ tickets.append({
62
+ 'platform': platform,
63
+ 'id': ticket_id,
64
+ 'full_id': self._format_ticket_id(platform, ticket_id)
65
+ })
66
+
67
+ return tickets
68
+
69
+ def extract_by_platform(self, text: str) -> Dict[str, List[str]]:
70
+ """Extract tickets grouped by platform."""
71
+ tickets = self.extract_from_text(text)
72
+
73
+ by_platform = defaultdict(list)
74
+ for ticket in tickets:
75
+ by_platform[ticket['platform']].append(ticket['id'])
76
+
77
+ return dict(by_platform)
78
+
79
+ def analyze_ticket_coverage(self, commits: List[Dict[str, Any]],
80
+ prs: List[Dict[str, Any]]) -> Dict[str, Any]:
81
+ """Analyze ticket reference coverage across commits and PRs."""
82
+ results = {
83
+ 'total_commits': len(commits),
84
+ 'total_prs': len(prs),
85
+ 'commits_with_tickets': 0,
86
+ 'prs_with_tickets': 0,
87
+ 'ticket_platforms': defaultdict(int),
88
+ 'untracked_commits': [],
89
+ 'ticket_summary': defaultdict(set)
90
+ }
91
+
92
+ # Analyze commits
93
+ for commit in commits:
94
+ ticket_refs = commit.get('ticket_references', [])
95
+ if ticket_refs:
96
+ results['commits_with_tickets'] += 1
97
+ for ticket in ticket_refs:
98
+ if isinstance(ticket, dict):
99
+ platform = ticket.get('platform', 'unknown')
100
+ ticket_id = ticket.get('id', '')
101
+ else:
102
+ # Legacy format - assume JIRA
103
+ platform = 'jira'
104
+ ticket_id = ticket
105
+
106
+ results['ticket_platforms'][platform] += 1
107
+ results['ticket_summary'][platform].add(ticket_id)
108
+ else:
109
+ # Track significant untracked commits
110
+ if (not commit.get('is_merge') and
111
+ commit.get('files_changed', 0) > 3):
112
+ results['untracked_commits'].append({
113
+ 'hash': commit['hash'][:7],
114
+ 'message': commit['message'].split('\n')[0][:60],
115
+ 'files_changed': commit.get('files_changed', 0)
116
+ })
117
+
118
+ # Analyze PRs
119
+ for pr in prs:
120
+ # Extract tickets from PR title and description
121
+ pr_text = f"{pr.get('title', '')} {pr.get('description', '')}"
122
+ tickets = self.extract_from_text(pr_text)
123
+
124
+ if tickets:
125
+ results['prs_with_tickets'] += 1
126
+ for ticket in tickets:
127
+ platform = ticket['platform']
128
+ results['ticket_platforms'][platform] += 1
129
+ results['ticket_summary'][platform].add(ticket['id'])
130
+
131
+ # Calculate coverage percentages
132
+ results['commit_coverage_pct'] = (
133
+ results['commits_with_tickets'] / results['total_commits'] * 100
134
+ if results['total_commits'] > 0 else 0
135
+ )
136
+
137
+ results['pr_coverage_pct'] = (
138
+ results['prs_with_tickets'] / results['total_prs'] * 100
139
+ if results['total_prs'] > 0 else 0
140
+ )
141
+
142
+ # Convert sets to counts for summary
143
+ results['ticket_summary'] = {
144
+ platform: len(tickets)
145
+ for platform, tickets in results['ticket_summary'].items()
146
+ }
147
+
148
+ return results
149
+
150
+ def _format_ticket_id(self, platform: str, ticket_id: str) -> str:
151
+ """Format ticket ID for display."""
152
+ if platform == 'github':
153
+ return f"#{ticket_id}"
154
+ elif platform == 'clickup':
155
+ return f"CU-{ticket_id}" if not ticket_id.startswith('CU-') else ticket_id
156
+ else:
157
+ return ticket_id
File without changes
@@ -0,0 +1,160 @@
1
+ """GitHub API integration for PR and issue enrichment."""
2
+ from datetime import datetime, timedelta
3
+ from typing import List, Dict, Any, Optional
4
+ import time
5
+ from github import Github
6
+ from github.GithubException import RateLimitExceededException, UnknownObjectException
7
+
8
+ from ..core.cache import GitAnalysisCache
9
+
10
+
11
+ class GitHubIntegration:
12
+ """Integrate with GitHub API for PR and issue data."""
13
+
14
+ def __init__(self, token: str, cache: GitAnalysisCache,
15
+ rate_limit_retries: int = 3, backoff_factor: int = 2):
16
+ """Initialize GitHub integration."""
17
+ self.github = Github(token)
18
+ self.cache = cache
19
+ self.rate_limit_retries = rate_limit_retries
20
+ self.backoff_factor = backoff_factor
21
+
22
+ def enrich_repository_with_prs(self, repo_name: str, commits: List[Dict[str, Any]],
23
+ since: datetime) -> List[Dict[str, Any]]:
24
+ """Enrich repository commits with PR data."""
25
+ try:
26
+ repo = self.github.get_repo(repo_name)
27
+ except UnknownObjectException:
28
+ print(f" ⚠️ GitHub repo not found: {repo_name}")
29
+ return []
30
+
31
+ # Get PRs for the time period
32
+ prs = self._get_pull_requests(repo, since)
33
+
34
+ # Build commit to PR mapping
35
+ commit_to_pr = {}
36
+ for pr in prs:
37
+ pr_data = self._extract_pr_data(pr)
38
+
39
+ # Cache PR data
40
+ self.cache.cache_pr(repo_name, pr_data)
41
+
42
+ # Map commits to this PR
43
+ for commit in pr.get_commits():
44
+ commit_to_pr[commit.sha] = pr_data
45
+
46
+ # Enrich commits with PR data
47
+ enriched_prs = []
48
+ for commit in commits:
49
+ if commit['hash'] in commit_to_pr:
50
+ pr_data = commit_to_pr[commit['hash']]
51
+
52
+ # Use PR story points if commit doesn't have them
53
+ if not commit.get('story_points') and pr_data.get('story_points'):
54
+ commit['story_points'] = pr_data['story_points']
55
+
56
+ # Add PR reference
57
+ commit['pr_number'] = pr_data['number']
58
+ commit['pr_title'] = pr_data['title']
59
+
60
+ # Add to PR list if not already there
61
+ if pr_data not in enriched_prs:
62
+ enriched_prs.append(pr_data)
63
+
64
+ return enriched_prs
65
+
66
+ def _get_pull_requests(self, repo, since: datetime) -> List[Any]:
67
+ """Get pull requests with rate limit handling."""
68
+ prs = []
69
+
70
+ for attempt in range(self.rate_limit_retries):
71
+ try:
72
+ # Get all PRs updated since the date
73
+ for pr in repo.get_pulls(state='all', sort='updated', direction='desc'):
74
+ if pr.updated_at < since:
75
+ break
76
+
77
+ # Only include PRs that were merged in our time period
78
+ if pr.merged and pr.merged_at >= since:
79
+ prs.append(pr)
80
+
81
+ return prs
82
+
83
+ except RateLimitExceededException:
84
+ if attempt < self.rate_limit_retries - 1:
85
+ wait_time = self.backoff_factor ** attempt
86
+ print(f" ⏳ GitHub rate limit hit, waiting {wait_time}s...")
87
+ time.sleep(wait_time)
88
+ else:
89
+ print(" ❌ GitHub rate limit exceeded, skipping PR enrichment")
90
+ return []
91
+
92
+ return prs
93
+
94
+ def _extract_pr_data(self, pr) -> Dict[str, Any]:
95
+ """Extract relevant data from a GitHub PR object."""
96
+ from ..extractors.story_points import StoryPointExtractor
97
+ from ..extractors.tickets import TicketExtractor
98
+
99
+ sp_extractor = StoryPointExtractor()
100
+ ticket_extractor = TicketExtractor()
101
+
102
+ # Extract story points from PR title and body
103
+ pr_text = f"{pr.title} {pr.body or ''}"
104
+ story_points = sp_extractor.extract_from_text(pr_text)
105
+
106
+ # Extract ticket references
107
+ tickets = ticket_extractor.extract_from_text(pr_text)
108
+
109
+ # Get commit SHAs
110
+ commit_hashes = [c.sha for c in pr.get_commits()]
111
+
112
+ return {
113
+ 'number': pr.number,
114
+ 'title': pr.title,
115
+ 'description': pr.body,
116
+ 'author': pr.user.login,
117
+ 'created_at': pr.created_at,
118
+ 'merged_at': pr.merged_at,
119
+ 'story_points': story_points,
120
+ 'labels': [label.name for label in pr.labels],
121
+ 'commit_hashes': commit_hashes,
122
+ 'ticket_references': tickets,
123
+ 'review_comments': pr.review_comments,
124
+ 'changed_files': pr.changed_files,
125
+ 'additions': pr.additions,
126
+ 'deletions': pr.deletions
127
+ }
128
+
129
+ def calculate_pr_metrics(self, prs: List[Dict[str, Any]]) -> Dict[str, Any]:
130
+ """Calculate PR-level metrics."""
131
+ if not prs:
132
+ return {
133
+ 'avg_pr_size': 0,
134
+ 'avg_pr_lifetime_hours': 0,
135
+ 'avg_files_per_pr': 0,
136
+ 'total_review_comments': 0
137
+ }
138
+
139
+ total_size = sum(pr['additions'] + pr['deletions'] for pr in prs)
140
+ total_files = sum(pr.get('changed_files', 0) for pr in prs)
141
+ total_comments = sum(pr.get('review_comments', 0) for pr in prs)
142
+
143
+ # Calculate average PR lifetime
144
+ lifetimes = []
145
+ for pr in prs:
146
+ if pr.get('merged_at') and pr.get('created_at'):
147
+ lifetime = (pr['merged_at'] - pr['created_at']).total_seconds() / 3600
148
+ lifetimes.append(lifetime)
149
+
150
+ avg_lifetime = sum(lifetimes) / len(lifetimes) if lifetimes else 0
151
+
152
+ return {
153
+ 'total_prs': len(prs),
154
+ 'avg_pr_size': total_size / len(prs),
155
+ 'avg_pr_lifetime_hours': avg_lifetime,
156
+ 'avg_files_per_pr': total_files / len(prs),
157
+ 'total_review_comments': total_comments,
158
+ 'prs_with_story_points': sum(1 for pr in prs if pr.get('story_points')),
159
+ 'story_point_coverage': sum(1 for pr in prs if pr.get('story_points')) / len(prs) * 100
160
+ }
@@ -0,0 +1,119 @@
1
+ """Integration orchestrator for multiple platforms."""
2
+ from typing import Dict, Any, List, Optional
3
+ from datetime import datetime
4
+ import json
5
+
6
+ from ..core.cache import GitAnalysisCache
7
+ from .github_integration import GitHubIntegration
8
+
9
+
10
+ class IntegrationOrchestrator:
11
+ """Orchestrate integrations with multiple platforms."""
12
+
13
+ def __init__(self, config: Any, cache: GitAnalysisCache):
14
+ """Initialize integration orchestrator."""
15
+ self.config = config
16
+ self.cache = cache
17
+ self.integrations = {}
18
+
19
+ # Initialize available integrations
20
+ if config.github and config.github.token:
21
+ self.integrations['github'] = GitHubIntegration(
22
+ config.github.token,
23
+ cache,
24
+ config.github.max_retries,
25
+ config.github.backoff_factor
26
+ )
27
+
28
+ def enrich_repository_data(self, repo_config: Any, commits: List[Dict[str, Any]],
29
+ since: datetime) -> Dict[str, Any]:
30
+ """Enrich repository data from all available integrations."""
31
+ enrichment = {
32
+ 'prs': [],
33
+ 'issues': [],
34
+ 'pr_metrics': {}
35
+ }
36
+
37
+ # GitHub enrichment
38
+ if 'github' in self.integrations and repo_config.github_repo:
39
+ github = self.integrations['github']
40
+
41
+ try:
42
+ # Get PR data
43
+ prs = github.enrich_repository_with_prs(
44
+ repo_config.github_repo, commits, since
45
+ )
46
+ enrichment['prs'] = prs
47
+
48
+ # Calculate PR metrics
49
+ if prs:
50
+ enrichment['pr_metrics'] = github.calculate_pr_metrics(prs)
51
+
52
+ except Exception as e:
53
+ print(f" ⚠️ GitHub enrichment failed: {e}")
54
+
55
+ # Future: Add other platform integrations here
56
+ # - ClickUp
57
+ # - JIRA
58
+ # - Linear
59
+
60
+ return enrichment
61
+
62
+ def get_platform_issues(self, project_key: str, since: datetime) -> List[Dict[str, Any]]:
63
+ """Get issues from all configured platforms."""
64
+ all_issues = []
65
+
66
+ # Check cache first
67
+ cached_issues = []
68
+ for platform in ['github', 'jira', 'clickup', 'linear']:
69
+ cached = self.cache.get_cached_issues(platform, project_key)
70
+ cached_issues.extend(cached)
71
+
72
+ if cached_issues:
73
+ return cached_issues
74
+
75
+ # Future: Fetch from APIs if not cached
76
+ # This is where we'd add actual API calls to each platform
77
+
78
+ return all_issues
79
+
80
+ def export_to_json(self,
81
+ commits: List[Dict[str, Any]],
82
+ prs: List[Dict[str, Any]],
83
+ developer_stats: List[Dict[str, Any]],
84
+ project_metrics: Dict[str, Any],
85
+ dora_metrics: Dict[str, Any],
86
+ output_path: str) -> str:
87
+ """Export all data to JSON format for API consumption."""
88
+
89
+ # Prepare data for JSON serialization
90
+ def serialize_dates(obj):
91
+ """Convert datetime objects to ISO format strings."""
92
+ if isinstance(obj, datetime):
93
+ return obj.isoformat()
94
+ elif isinstance(obj, dict):
95
+ return {k: serialize_dates(v) for k, v in obj.items()}
96
+ elif isinstance(obj, list):
97
+ return [serialize_dates(item) for item in obj]
98
+ return obj
99
+
100
+ export_data = {
101
+ 'metadata': {
102
+ 'generated_at': datetime.now().isoformat(),
103
+ 'version': '1.0',
104
+ 'total_commits': len(commits),
105
+ 'total_prs': len(prs),
106
+ 'total_developers': len(developer_stats)
107
+ },
108
+ 'commits': serialize_dates(commits),
109
+ 'pull_requests': serialize_dates(prs),
110
+ 'developers': serialize_dates(developer_stats),
111
+ 'project_metrics': serialize_dates(project_metrics),
112
+ 'dora_metrics': serialize_dates(dora_metrics)
113
+ }
114
+
115
+ # Write JSON file
116
+ with open(output_path, 'w') as f:
117
+ json.dump(export_data, f, indent=2)
118
+
119
+ return output_path
File without changes