PyPI - gitflow-analytics - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gitflow-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

gitflow_analytics/__init__.py +22 -0
gitflow_analytics/_version.py +4 -0
gitflow_analytics/cli.py +441 -0
gitflow_analytics/config.py +215 -0
gitflow_analytics/core/__init__.py +0 -0
gitflow_analytics/core/analyzer.py +195 -0
gitflow_analytics/core/branch_mapper.py +221 -0
gitflow_analytics/core/cache.py +275 -0
gitflow_analytics/core/identity.py +402 -0
gitflow_analytics/extractors/__init__.py +0 -0
gitflow_analytics/extractors/base.py +41 -0
gitflow_analytics/extractors/story_points.py +128 -0
gitflow_analytics/extractors/tickets.py +157 -0
gitflow_analytics/integrations/__init__.py +0 -0
gitflow_analytics/integrations/github_integration.py +160 -0
gitflow_analytics/integrations/orchestrator.py +119 -0
gitflow_analytics/metrics/__init__.py +0 -0
gitflow_analytics/metrics/dora.py +327 -0
gitflow_analytics/models/__init__.py +0 -0
gitflow_analytics/models/database.py +171 -0
gitflow_analytics/reports/__init__.py +0 -0
gitflow_analytics/reports/analytics_writer.py +454 -0
gitflow_analytics/reports/csv_writer.py +311 -0
gitflow_analytics/reports/narrative_writer.py +263 -0
gitflow_analytics-1.0.0.dist-info/METADATA +201 -0
gitflow_analytics-1.0.0.dist-info/RECORD +30 -0
gitflow_analytics-1.0.0.dist-info/WHEEL +5 -0
gitflow_analytics-1.0.0.dist-info/entry_points.txt +2 -0
gitflow_analytics-1.0.0.dist-info/licenses/LICENSE +21 -0
gitflow_analytics-1.0.0.dist-info/top_level.txt +1 -0

gitflow_analytics/extractors/tickets.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""Ticket reference extraction for multiple platforms."""
+import re
+from typing import List, Dict, Any, Set
+from collections import defaultdict
+class TicketExtractor:
+    """Extract ticket references from various issue tracking systems."""
+    def __init__(self):
+        """Initialize with patterns for different platforms."""
+        self.patterns = {
+            'jira': [
+                r'([A-Z]{2,10}-\d+)',  # Standard JIRA format: PROJ-123
+            ],
+            'github': [
+                r'#(\d+)',              # GitHub issues: #123
+                r'GH-(\d+)',            # Alternative format: GH-123
+                r'(?:fix|fixes|fixed|close|closes|closed|resolve|resolves|resolved)\s+#(\d+)',
+            ],
+            'clickup': [
+                r'CU-([a-z0-9]+)',      # ClickUp: CU-abc123
+                r'#([a-z0-9]{6,})',     # ClickUp short format
+            ],
+            'linear': [
+                r'([A-Z]{2,5}-\d+)',    # Linear: ENG-123, similar to JIRA
+                r'LIN-(\d+)',           # Alternative: LIN-123
+            ]
+        }
+        # Compile patterns
+        self.compiled_patterns = {}
+        for platform, patterns in self.patterns.items():
+            self.compiled_patterns[platform] = [
+                re.compile(pattern, re.IGNORECASE if platform != 'jira' else 0)
+                for pattern in patterns
+            ]
+    def extract_from_text(self, text: str) -> List[Dict[str, str]]:
+        """Extract all ticket references from text."""
+        if not text:
+            return []
+        tickets = []
+        seen = set()  # Avoid duplicates
+        for platform, patterns in self.compiled_patterns.items():
+            for pattern in patterns:
+                matches = pattern.findall(text)
+                for match in matches:
+                    ticket_id = match if isinstance(match, str) else match[0]
+                    # Normalize ticket ID
+                    if platform == 'jira' or platform == 'linear':
+                        ticket_id = ticket_id.upper()
+                    # Create unique key
+                    key = f"{platform}:{ticket_id}"
+                    if key not in seen:
+                        seen.add(key)
+                        tickets.append({
+                            'platform': platform,
+                            'id': ticket_id,
+                            'full_id': self._format_ticket_id(platform, ticket_id)
+                        })
+        return tickets
+    def extract_by_platform(self, text: str) -> Dict[str, List[str]]:
+        """Extract tickets grouped by platform."""
+        tickets = self.extract_from_text(text)
+        by_platform = defaultdict(list)
+        for ticket in tickets:
+            by_platform[ticket['platform']].append(ticket['id'])
+        return dict(by_platform)
+    def analyze_ticket_coverage(self, commits: List[Dict[str, Any]],
+                               prs: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Analyze ticket reference coverage across commits and PRs."""
+        results = {
+            'total_commits': len(commits),
+            'total_prs': len(prs),
+            'commits_with_tickets': 0,
+            'prs_with_tickets': 0,
+            'ticket_platforms': defaultdict(int),
+            'untracked_commits': [],
+            'ticket_summary': defaultdict(set)
+        }
+        # Analyze commits
+        for commit in commits:
+            ticket_refs = commit.get('ticket_references', [])
+            if ticket_refs:
+                results['commits_with_tickets'] += 1
+                for ticket in ticket_refs:
+                    if isinstance(ticket, dict):
+                        platform = ticket.get('platform', 'unknown')
+                        ticket_id = ticket.get('id', '')
+                    else:
+                        # Legacy format - assume JIRA
+                        platform = 'jira'
+                        ticket_id = ticket
+                    results['ticket_platforms'][platform] += 1
+                    results['ticket_summary'][platform].add(ticket_id)
+            else:
+                # Track significant untracked commits
+                if (not commit.get('is_merge') and
+                    commit.get('files_changed', 0) > 3):
+                    results['untracked_commits'].append({
+                        'hash': commit['hash'][:7],
+                        'message': commit['message'].split('\n')[0][:60],
+                        'files_changed': commit.get('files_changed', 0)
+                    })
+        # Analyze PRs
+        for pr in prs:
+            # Extract tickets from PR title and description
+            pr_text = f"{pr.get('title', '')} {pr.get('description', '')}"
+            tickets = self.extract_from_text(pr_text)
+            if tickets:
+                results['prs_with_tickets'] += 1
+                for ticket in tickets:
+                    platform = ticket['platform']
+                    results['ticket_platforms'][platform] += 1
+                    results['ticket_summary'][platform].add(ticket['id'])
+        # Calculate coverage percentages
+        results['commit_coverage_pct'] = (
+            results['commits_with_tickets'] / results['total_commits'] * 100
+            if results['total_commits'] > 0 else 0
+        )
+        results['pr_coverage_pct'] = (
+            results['prs_with_tickets'] / results['total_prs'] * 100
+            if results['total_prs'] > 0 else 0
+        )
+        # Convert sets to counts for summary
+        results['ticket_summary'] = {
+            platform: len(tickets)
+            for platform, tickets in results['ticket_summary'].items()
+        }
+        return results
+    def _format_ticket_id(self, platform: str, ticket_id: str) -> str:
+        """Format ticket ID for display."""
+        if platform == 'github':
+            return f"#{ticket_id}"
+        elif platform == 'clickup':
+            return f"CU-{ticket_id}" if not ticket_id.startswith('CU-') else ticket_id
+        else:
+            return ticket_id

gitflow_analytics/integrations/__init__.py ADDED Viewed

File without changes

gitflow_analytics/integrations/github_integration.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""GitHub API integration for PR and issue enrichment."""
+from datetime import datetime, timedelta
+from typing import List, Dict, Any, Optional
+import time
+from github import Github
+from github.GithubException import RateLimitExceededException, UnknownObjectException
+from ..core.cache import GitAnalysisCache
+class GitHubIntegration:
+    """Integrate with GitHub API for PR and issue data."""
+    def __init__(self, token: str, cache: GitAnalysisCache,
+                 rate_limit_retries: int = 3, backoff_factor: int = 2):
+        """Initialize GitHub integration."""
+        self.github = Github(token)
+        self.cache = cache
+        self.rate_limit_retries = rate_limit_retries
+        self.backoff_factor = backoff_factor
+    def enrich_repository_with_prs(self, repo_name: str, commits: List[Dict[str, Any]],
+                                  since: datetime) -> List[Dict[str, Any]]:
+        """Enrich repository commits with PR data."""
+        try:
+            repo = self.github.get_repo(repo_name)
+        except UnknownObjectException:
+            print(f"   ⚠️  GitHub repo not found: {repo_name}")
+            return []
+        # Get PRs for the time period
+        prs = self._get_pull_requests(repo, since)
+        # Build commit to PR mapping
+        commit_to_pr = {}
+        for pr in prs:
+            pr_data = self._extract_pr_data(pr)
+            # Cache PR data
+            self.cache.cache_pr(repo_name, pr_data)
+            # Map commits to this PR
+            for commit in pr.get_commits():
+                commit_to_pr[commit.sha] = pr_data
+        # Enrich commits with PR data
+        enriched_prs = []
+        for commit in commits:
+            if commit['hash'] in commit_to_pr:
+                pr_data = commit_to_pr[commit['hash']]
+                # Use PR story points if commit doesn't have them
+                if not commit.get('story_points') and pr_data.get('story_points'):
+                    commit['story_points'] = pr_data['story_points']
+                # Add PR reference
+                commit['pr_number'] = pr_data['number']
+                commit['pr_title'] = pr_data['title']
+                # Add to PR list if not already there
+                if pr_data not in enriched_prs:
+                    enriched_prs.append(pr_data)
+        return enriched_prs
+    def _get_pull_requests(self, repo, since: datetime) -> List[Any]:
+        """Get pull requests with rate limit handling."""
+        prs = []
+        for attempt in range(self.rate_limit_retries):
+            try:
+                # Get all PRs updated since the date
+                for pr in repo.get_pulls(state='all', sort='updated', direction='desc'):
+                    if pr.updated_at < since:
+                        break
+                    # Only include PRs that were merged in our time period
+                    if pr.merged and pr.merged_at >= since:
+                        prs.append(pr)
+                return prs
+            except RateLimitExceededException:
+                if attempt < self.rate_limit_retries - 1:
+                    wait_time = self.backoff_factor ** attempt
+                    print(f"   ⏳ GitHub rate limit hit, waiting {wait_time}s...")
+                    time.sleep(wait_time)
+                else:
+                    print("   ❌ GitHub rate limit exceeded, skipping PR enrichment")
+                    return []
+        return prs
+    def _extract_pr_data(self, pr) -> Dict[str, Any]:
+        """Extract relevant data from a GitHub PR object."""
+        from ..extractors.story_points import StoryPointExtractor
+        from ..extractors.tickets import TicketExtractor
+        sp_extractor = StoryPointExtractor()
+        ticket_extractor = TicketExtractor()
+        # Extract story points from PR title and body
+        pr_text = f"{pr.title} {pr.body or ''}"
+        story_points = sp_extractor.extract_from_text(pr_text)
+        # Extract ticket references
+        tickets = ticket_extractor.extract_from_text(pr_text)
+        # Get commit SHAs
+        commit_hashes = [c.sha for c in pr.get_commits()]
+        return {
+            'number': pr.number,
+            'title': pr.title,
+            'description': pr.body,
+            'author': pr.user.login,
+            'created_at': pr.created_at,
+            'merged_at': pr.merged_at,
+            'story_points': story_points,
+            'labels': [label.name for label in pr.labels],
+            'commit_hashes': commit_hashes,
+            'ticket_references': tickets,
+            'review_comments': pr.review_comments,
+            'changed_files': pr.changed_files,
+            'additions': pr.additions,
+            'deletions': pr.deletions
+        }
+    def calculate_pr_metrics(self, prs: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Calculate PR-level metrics."""
+        if not prs:
+            return {
+                'avg_pr_size': 0,
+                'avg_pr_lifetime_hours': 0,
+                'avg_files_per_pr': 0,
+                'total_review_comments': 0
+            }
+        total_size = sum(pr['additions'] + pr['deletions'] for pr in prs)
+        total_files = sum(pr.get('changed_files', 0) for pr in prs)
+        total_comments = sum(pr.get('review_comments', 0) for pr in prs)
+        # Calculate average PR lifetime
+        lifetimes = []
+        for pr in prs:
+            if pr.get('merged_at') and pr.get('created_at'):
+                lifetime = (pr['merged_at'] - pr['created_at']).total_seconds() / 3600
+                lifetimes.append(lifetime)
+        avg_lifetime = sum(lifetimes) / len(lifetimes) if lifetimes else 0
+        return {
+            'total_prs': len(prs),
+            'avg_pr_size': total_size / len(prs),
+            'avg_pr_lifetime_hours': avg_lifetime,
+            'avg_files_per_pr': total_files / len(prs),
+            'total_review_comments': total_comments,
+            'prs_with_story_points': sum(1 for pr in prs if pr.get('story_points')),
+            'story_point_coverage': sum(1 for pr in prs if pr.get('story_points')) / len(prs) * 100
+        }

gitflow_analytics/integrations/orchestrator.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Integration orchestrator for multiple platforms."""
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+import json
+from ..core.cache import GitAnalysisCache
+from .github_integration import GitHubIntegration
+class IntegrationOrchestrator:
+    """Orchestrate integrations with multiple platforms."""
+    def __init__(self, config: Any, cache: GitAnalysisCache):
+        """Initialize integration orchestrator."""
+        self.config = config
+        self.cache = cache
+        self.integrations = {}
+        # Initialize available integrations
+        if config.github and config.github.token:
+            self.integrations['github'] = GitHubIntegration(
+                config.github.token,
+                cache,
+                config.github.max_retries,
+                config.github.backoff_factor
+            )
+    def enrich_repository_data(self, repo_config: Any, commits: List[Dict[str, Any]],
+                             since: datetime) -> Dict[str, Any]:
+        """Enrich repository data from all available integrations."""
+        enrichment = {
+            'prs': [],
+            'issues': [],
+            'pr_metrics': {}
+        }
+        # GitHub enrichment
+        if 'github' in self.integrations and repo_config.github_repo:
+            github = self.integrations['github']
+            try:
+                # Get PR data
+                prs = github.enrich_repository_with_prs(
+                    repo_config.github_repo, commits, since
+                )
+                enrichment['prs'] = prs
+                # Calculate PR metrics
+                if prs:
+                    enrichment['pr_metrics'] = github.calculate_pr_metrics(prs)
+            except Exception as e:
+                print(f"   ⚠️  GitHub enrichment failed: {e}")
+        # Future: Add other platform integrations here
+        # - ClickUp
+        # - JIRA
+        # - Linear
+        return enrichment
+    def get_platform_issues(self, project_key: str, since: datetime) -> List[Dict[str, Any]]:
+        """Get issues from all configured platforms."""
+        all_issues = []
+        # Check cache first
+        cached_issues = []
+        for platform in ['github', 'jira', 'clickup', 'linear']:
+            cached = self.cache.get_cached_issues(platform, project_key)
+            cached_issues.extend(cached)
+        if cached_issues:
+            return cached_issues
+        # Future: Fetch from APIs if not cached
+        # This is where we'd add actual API calls to each platform
+        return all_issues
+    def export_to_json(self,
+                      commits: List[Dict[str, Any]],
+                      prs: List[Dict[str, Any]],
+                      developer_stats: List[Dict[str, Any]],
+                      project_metrics: Dict[str, Any],
+                      dora_metrics: Dict[str, Any],
+                      output_path: str) -> str:
+        """Export all data to JSON format for API consumption."""
+        # Prepare data for JSON serialization
+        def serialize_dates(obj):
+            """Convert datetime objects to ISO format strings."""
+            if isinstance(obj, datetime):
+                return obj.isoformat()
+            elif isinstance(obj, dict):
+                return {k: serialize_dates(v) for k, v in obj.items()}
+            elif isinstance(obj, list):
+                return [serialize_dates(item) for item in obj]
+            return obj
+        export_data = {
+            'metadata': {
+                'generated_at': datetime.now().isoformat(),
+                'version': '1.0',
+                'total_commits': len(commits),
+                'total_prs': len(prs),
+                'total_developers': len(developer_stats)
+            },
+            'commits': serialize_dates(commits),
+            'pull_requests': serialize_dates(prs),
+            'developers': serialize_dates(developer_stats),
+            'project_metrics': serialize_dates(project_metrics),
+            'dora_metrics': serialize_dates(dora_metrics)
+        }
+        # Write JSON file
+        with open(output_path, 'w') as f:
+            json.dump(export_data, f, indent=2)
+        return output_path

gitflow_analytics/metrics/__init__.py ADDED Viewed

File without changes