PyPI - gitflow-analytics - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

gitflow-analytics 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

gitflow_analytics/__init__.py +2 -0
gitflow_analytics/_version.py +1 -1
gitflow_analytics/cli.py +113 -19
gitflow_analytics/config.py +211 -28
gitflow_analytics/core/analyzer.py +68 -8
gitflow_analytics/core/branch_mapper.py +1 -1
gitflow_analytics/core/cache.py +3 -5
gitflow_analytics/core/identity.py +5 -6
gitflow_analytics/extractors/base.py +1 -1
gitflow_analytics/extractors/story_points.py +1 -1
gitflow_analytics/extractors/tickets.py +13 -4
gitflow_analytics/integrations/github_integration.py +11 -4
gitflow_analytics/integrations/jira_integration.py +272 -0
gitflow_analytics/integrations/orchestrator.py +33 -7
gitflow_analytics/metrics/dora.py +3 -3
gitflow_analytics/models/database.py +4 -4
gitflow_analytics/reports/analytics_writer.py +14 -4
gitflow_analytics/reports/csv_writer.py +12 -4
gitflow_analytics/reports/narrative_writer.py +5 -1
gitflow_analytics-1.0.1.dist-info/METADATA +463 -0
gitflow_analytics-1.0.1.dist-info/RECORD +31 -0
gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/top_level.txt +0 -0

gitflow_analytics/__init__.py CHANGED Viewed

@@ -13,6 +13,8 @@ from .extractors.tickets import TicketExtractor
 from .reports.csv_writer import CSVReportGenerator
 __all__ = [
+    '__version__',
+    '__version_info__',
     'GitAnalyzer',
     'GitAnalysisCache',
     'DeveloperIdentityResolver',

gitflow_analytics/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """Version information for gitflow-analytics."""
-__version__ = "1.0.0"
+__version__ = "1.0.1"
 __version_info__ = tuple(int(x) for x in __version__.split("."))

gitflow_analytics/cli.py CHANGED Viewed

@@ -1,23 +1,23 @@
 """Command-line interface for GitFlow Analytics."""
-import click
-import yaml
-from pathlib import Path
+import sys
 from datetime import datetime, timedelta
+from pathlib import Path
 from typing import Optional
-import sys
+import click
+import git
 import pandas as pd
-from .config import ConfigLoader, Config
-from .core.cache import GitAnalysisCache
+from .config import ConfigLoader
 from .core.analyzer import GitAnalyzer
+from .core.cache import GitAnalysisCache
 from .core.identity import DeveloperIdentityResolver
-from .extractors.story_points import StoryPointExtractor
 from .extractors.tickets import TicketExtractor
-from .reports.csv_writer import CSVReportGenerator
+from .integrations.orchestrator import IntegrationOrchestrator
+from .metrics.dora import DORAMetricsCalculator
 from .reports.analytics_writer import AnalyticsReportGenerator
+from .reports.csv_writer import CSVReportGenerator
 from .reports.narrative_writer import NarrativeReportGenerator
-from .metrics.dora import DORAMetricsCalculator
-from .integrations.orchestrator import IntegrationOrchestrator
 @click.group()
@@ -101,14 +101,36 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
             manual_mappings=cfg.analysis.manual_identity_mappings
         )
-        analyzer = GitAnalyzer(cache, branch_mapping_rules=cfg.analysis.branch_mapping_rules)
+        analyzer = GitAnalyzer(
+            cache,
+            branch_mapping_rules=cfg.analysis.branch_mapping_rules,
+            allowed_ticket_platforms=getattr(cfg.analysis, 'ticket_platforms', None),
+            exclude_paths=cfg.analysis.exclude_paths
+        )
         orchestrator = IntegrationOrchestrator(cfg, cache)
+        # Discovery organization repositories if needed
+        repositories_to_analyze = cfg.repositories
+        if cfg.github.organization and not repositories_to_analyze:
+            click.echo(f"🔍 Discovering repositories from organization: {cfg.github.organization}")
+            try:
+                # Use a 'repos' directory in the config directory for cloned repositories
+                config_dir = Path(config).parent if config else Path.cwd()
+                repos_dir = config_dir / "repos"
+                discovered_repos = cfg.discover_organization_repositories(clone_base_path=repos_dir)
+                repositories_to_analyze = discovered_repos
+                click.echo(f"   ✅ Found {len(discovered_repos)} repositories in organization")
+                for repo in discovered_repos:
+                    click.echo(f"      - {repo.name} ({repo.github_repo})")
+            except Exception as e:
+                click.echo(f"   ❌ Failed to discover repositories: {e}")
+                return
         # Analysis period
         end_date = datetime.now()
         start_date = end_date - timedelta(weeks=weeks)
-        click.echo(f"\n🚀 Analyzing {len(cfg.repositories)} repositories...")
+        click.echo(f"\n🚀 Analyzing {len(repositories_to_analyze)} repositories...")
         click.echo(f"   Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
         # Analyze repositories
@@ -116,13 +138,32 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
         all_prs = []
         all_enrichments = {}
-        for repo_config in cfg.repositories:
+        for repo_config in repositories_to_analyze:
             click.echo(f"\n📁 Analyzing {repo_config.name}...")
-            # Check if repo exists
+            # Check if repo exists, clone if needed
             if not repo_config.path.exists():
-                click.echo(f"   ❌ Repository path not found: {repo_config.path}")
-                continue
+                # Try to clone if we have a github_repo configured
+                if repo_config.github_repo and cfg.github.organization:
+                    click.echo("   📥 Cloning repository from GitHub...")
+                    try:
+                        # Ensure parent directory exists
+                        repo_config.path.parent.mkdir(parents=True, exist_ok=True)
+                        # Clone the repository
+                        clone_url = f"https://github.com/{repo_config.github_repo}.git"
+                        if cfg.github.token:
+                            # Use token for authentication
+                            clone_url = f"https://{cfg.github.token}@github.com/{repo_config.github_repo}.git"
+                        git.Repo.clone_from(clone_url, repo_config.path, branch=repo_config.branch)
+                        click.echo(f"   ✅ Successfully cloned {repo_config.github_repo}")
+                    except Exception as e:
+                        click.echo(f"   ❌ Failed to clone repository: {e}")
+                        continue
+                else:
+                    click.echo(f"   ❌ Repository path not found: {repo_config.path}")
+                    continue
             # Analyze repository
             try:
@@ -174,7 +215,7 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
         # Analyze tickets
         click.echo("\n🎫 Analyzing ticket references...")
-        ticket_extractor = TicketExtractor()
+        ticket_extractor = TicketExtractor(allowed_platforms=getattr(cfg.analysis, 'ticket_platforms', None))
         ticket_analysis = ticket_extractor.analyze_ticket_coverage(all_commits, all_prs)
         for platform, count in ticket_analysis['ticket_summary'].items():
@@ -319,7 +360,7 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
         click.echo(f"   - Total story points: {total_story_points}")
         if dora_metrics:
-            click.echo(f"\n🎯 DORA Metrics:")
+            click.echo("\n🎯 DORA Metrics:")
             click.echo(f"   - Deployment frequency: {dora_metrics['deployment_frequency']['category']}")
             click.echo(f"   - Lead time: {dora_metrics['lead_time_hours']:.1f} hours")
             click.echo(f"   - Change failure rate: {dora_metrics['change_failure_rate']:.1f}%")
@@ -357,7 +398,7 @@ def cache_stats(config: Path):
         # Calculate cache size
         import os
         cache_size = 0
-        for root, dirs, files in os.walk(cfg.cache.directory):
+        for root, _dirs, files in os.walk(cfg.cache.directory):
             for f in files:
                 cache_size += os.path.getsize(os.path.join(root, f))
@@ -392,6 +433,59 @@ def merge_identity(config: Path, dev1: str, dev2: str):
         sys.exit(1)
+@cli.command()
+@click.option('--config', '-c',
+              type=click.Path(exists=True, path_type=Path),
+              required=True,
+              help='Path to YAML configuration file')
+def discover_jira_fields(config: Path):
+    """Discover available JIRA fields, particularly story point fields."""
+    try:
+        cfg = ConfigLoader.load(config)
+        # Check if JIRA is configured
+        if not cfg.jira or not cfg.jira.base_url:
+            click.echo("❌ JIRA is not configured in the configuration file")
+            return
+        # Initialize JIRA integration
+        from .integrations.jira_integration import JIRAIntegration
+        jira = JIRAIntegration(
+            cfg.jira.base_url,
+            cfg.jira.access_user,
+            cfg.jira.access_token,
+            None  # No cache needed for field discovery
+        )
+        # Validate connection
+        click.echo(f"🔗 Connecting to JIRA at {cfg.jira.base_url}...")
+        if not jira.validate_connection():
+            click.echo("❌ Failed to connect to JIRA. Check your credentials.")
+            return
+        click.echo("✅ Connected successfully!\n")
+        click.echo("🔍 Discovering fields with potential story point data...")
+        fields = jira.discover_fields()
+        if not fields:
+            click.echo("No potential story point fields found.")
+        else:
+            click.echo(f"\nFound {len(fields)} potential story point fields:")
+            click.echo("\nAdd these to your configuration under jira_integration.story_point_fields:")
+            click.echo("```yaml")
+            click.echo("jira_integration:")
+            click.echo("  story_point_fields:")
+            for field_id, field_info in fields.items():
+                click.echo(f'    - "{field_id}"  # {field_info["name"]}')
+            click.echo("```")
+    except Exception as e:
+        click.echo(f"❌ Error: {e}", err=True)
+        sys.exit(1)
 @cli.command()
 @click.option('--config', '-c',
               type=click.Path(exists=True, path_type=Path),

gitflow_analytics/config.py CHANGED Viewed

@@ -1,9 +1,12 @@
 """Configuration management for GitFlow Analytics."""
 import os
-import yaml
-from pathlib import Path
-from typing import Dict, Any, Optional, List
 from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import yaml
+from dotenv import load_dotenv
 @dataclass
 class RepositoryConfig:
@@ -24,6 +27,7 @@ class GitHubConfig:
     """GitHub API configuration."""
     token: Optional[str] = None
     owner: Optional[str] = None
+    organization: Optional[str] = None
     base_url: str = "https://api.github.com"
     max_retries: int = 3
     backoff_factor: int = 2
@@ -42,10 +46,12 @@ class AnalysisConfig:
     story_point_patterns: List[str] = field(default_factory=list)
     exclude_authors: List[str] = field(default_factory=list)
     exclude_message_patterns: List[str] = field(default_factory=list)
+    exclude_paths: List[str] = field(default_factory=list)
     similarity_threshold: float = 0.85
     manual_identity_mappings: List[Dict[str, Any]] = field(default_factory=list)
     default_ticket_platform: Optional[str] = None
     branch_mapping_rules: Dict[str, List[str]] = field(default_factory=dict)
+    ticket_platforms: Optional[List[str]] = None
 @dataclass
 class OutputConfig:
@@ -65,6 +71,25 @@ class CacheConfig:
     ttl_hours: int = 168
     max_size_mb: int = 500
+@dataclass
+class JIRAConfig:
+    """JIRA configuration."""
+    access_user: str
+    access_token: str
+    base_url: Optional[str] = None
+@dataclass
+class JIRAIntegrationConfig:
+    """JIRA integration specific configuration."""
+    enabled: bool = True
+    fetch_story_points: bool = True
+    project_keys: List[str] = field(default_factory=list)
+    story_point_fields: List[str] = field(default_factory=lambda: [
+        "customfield_10016",
+        "customfield_10021",
+        "Story Points"
+    ])
 @dataclass
 class Config:
     """Main configuration container."""
@@ -73,14 +98,69 @@ class Config:
     analysis: AnalysisConfig
     output: OutputConfig
     cache: CacheConfig
+    jira: Optional[JIRAConfig] = None
+    jira_integration: Optional[JIRAIntegrationConfig] = None
+    def discover_organization_repositories(self, clone_base_path: Optional[Path] = None) -> List[RepositoryConfig]:
+        """Discover repositories from GitHub organization.
+        Args:
+            clone_base_path: Base directory where repos should be cloned/found.
+                           If None, uses output directory.
+        Returns:
+            List of discovered repository configurations.
+        """
+        if not self.github.organization or not self.github.token:
+            return []
+        from github import Github
+        github_client = Github(self.github.token, base_url=self.github.base_url)
+        try:
+            org = github_client.get_organization(self.github.organization)
+            discovered_repos = []
+            base_path = clone_base_path or self.output.directory
+            if base_path is None:
+                raise ValueError("No base path available for repository cloning")
+            for repo in org.get_repos():
+                # Skip archived repositories
+                if repo.archived:
+                    continue
+                # Create repository configuration
+                repo_path = base_path / repo.name
+                repo_config = RepositoryConfig(
+                    name=repo.name,
+                    path=repo_path,
+                    github_repo=repo.full_name,
+                    project_key=repo.name.upper().replace('-', '_'),
+                    branch=repo.default_branch
+                )
+                discovered_repos.append(repo_config)
+            return discovered_repos
+        except Exception as e:
+            raise ValueError(f"Failed to discover repositories from organization {self.github.organization}: {e}") from e
 class ConfigLoader:
     """Load and validate configuration from YAML files."""
-    @staticmethod
-    def load(config_path: Path) -> Config:
+    @classmethod
+    def load(cls, config_path: Path) -> Config:
         """Load configuration from YAML file."""
-        with open(config_path, 'r') as f:
+        # Load .env file from the same directory as the config file if it exists
+        config_dir = config_path.parent
+        env_file = config_dir / '.env'
+        if env_file.exists():
+            load_dotenv(env_file, override=True)
+            print(f"📋 Loaded environment variables from {env_file}")
+        with open(config_path) as f:
             data = yaml.safe_load(f)
         # Validate version
@@ -90,9 +170,16 @@ class ConfigLoader:
         # Process GitHub config
         github_data = data.get('github', {})
+        # Resolve GitHub token
+        github_token = cls._resolve_env_var(github_data.get('token'))
+        if github_data.get('token') and not github_token:
+            raise ValueError("GitHub is configured but GITHUB_TOKEN environment variable is not set")
         github_config = GitHubConfig(
-            token=ConfigLoader._resolve_env_var(github_data.get('token')),
-            owner=ConfigLoader._resolve_env_var(github_data.get('owner')),
+            token=github_token,
+            owner=cls._resolve_env_var(github_data.get('owner')),
+            organization=cls._resolve_env_var(github_data.get('organization')),
             base_url=github_data.get('base_url', 'https://api.github.com'),
             max_retries=github_data.get('rate_limit', {}).get('max_retries', 3),
             backoff_factor=github_data.get('rate_limit', {}).get('backoff_factor', 2)
@@ -100,26 +187,70 @@ class ConfigLoader:
         # Process repositories
         repositories = []
-        for repo_data in data.get('repositories', []):
-            # Handle github_repo with owner fallback
-            github_repo = repo_data.get('github_repo')
-            if github_repo and github_config.owner and '/' not in github_repo:
-                github_repo = f"{github_config.owner}/{github_repo}"
-            repo_config = RepositoryConfig(
-                name=repo_data['name'],
-                path=repo_data['path'],
-                github_repo=github_repo,
-                project_key=repo_data.get('project_key'),
-                branch=repo_data.get('branch')
-            )
-            repositories.append(repo_config)
-        if not repositories:
-            raise ValueError("No repositories defined in configuration")
+        # Handle organization-based repository discovery
+        if github_config.organization and not data.get('repositories'):
+            # Organization specified but no explicit repositories - will be discovered at runtime
+            pass
+        else:
+            # Process explicitly defined repositories
+            for repo_data in data.get('repositories', []):
+                # Handle github_repo with owner/organization fallback
+                github_repo = repo_data.get('github_repo')
+                if github_repo and '/' not in github_repo:
+                    if github_config.organization:
+                        github_repo = f"{github_config.organization}/{github_repo}"
+                    elif github_config.owner:
+                        github_repo = f"{github_config.owner}/{github_repo}"
+                repo_config = RepositoryConfig(
+                    name=repo_data['name'],
+                    path=repo_data['path'],
+                    github_repo=github_repo,
+                    project_key=repo_data.get('project_key'),
+                    branch=repo_data.get('branch')
+                )
+                repositories.append(repo_config)
+        # Allow empty repositories list if organization is specified
+        if not repositories and not github_config.organization:
+            raise ValueError("No repositories defined and no organization specified for discovery")
         # Process analysis settings
         analysis_data = data.get('analysis', {})
+        # Default exclude paths for common boilerplate/generated files
+        default_exclude_paths = [
+            "**/node_modules/**",
+            "**/vendor/**",
+            "**/dist/**",
+            "**/build/**",
+            "**/.next/**",
+            "**/__pycache__/**",
+            "**/*.min.js",
+            "**/*.min.css",
+            "**/*.bundle.js",
+            "**/*.bundle.css",
+            "**/package-lock.json",
+            "**/yarn.lock",
+            "**/poetry.lock",
+            "**/Pipfile.lock",
+            "**/composer.lock",
+            "**/Gemfile.lock",
+            "**/Cargo.lock",
+            "**/go.sum",
+            "**/*.generated.*",
+            "**/generated/**",
+            "**/coverage/**",
+            "**/.coverage/**",
+            "**/htmlcov/**",
+            "**/*.map"
+        ]
+        # Merge user-provided paths with defaults (user paths take precedence)
+        user_exclude_paths = analysis_data.get('exclude', {}).get('paths', [])
+        exclude_paths = user_exclude_paths if user_exclude_paths else default_exclude_paths
         analysis_config = AnalysisConfig(
             story_point_patterns=analysis_data.get('story_point_patterns', [
                 r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
@@ -131,17 +262,26 @@ class ConfigLoader:
                 "renovate[bot]"
             ]),
             exclude_message_patterns=analysis_data.get('exclude', {}).get('message_patterns', []),
+            exclude_paths=exclude_paths,
             similarity_threshold=analysis_data.get('identity', {}).get('similarity_threshold', 0.85),
             manual_identity_mappings=analysis_data.get('identity', {}).get('manual_mappings', []),
             default_ticket_platform=analysis_data.get('default_ticket_platform'),
-            branch_mapping_rules=analysis_data.get('branch_mapping_rules', {})
+            branch_mapping_rules=analysis_data.get('branch_mapping_rules', {}),
+            ticket_platforms=analysis_data.get('ticket_platforms')
         )
         # Process output settings
         output_data = data.get('output', {})
         output_dir = output_data.get('directory')
         if output_dir:
-            output_dir = Path(output_dir).expanduser().resolve()
+            output_dir = Path(output_dir).expanduser()
+            # If relative path, make it relative to config file directory
+            if not output_dir.is_absolute():
+                output_dir = config_path.parent / output_dir
+            output_dir = output_dir.resolve()
+        else:
+            # Default to config file directory if not specified
+            output_dir = config_path.parent
         output_config = OutputConfig(
             directory=output_dir,
@@ -155,18 +295,61 @@ class ConfigLoader:
         # Process cache settings
         cache_data = data.get('cache', {})
+        cache_dir = cache_data.get('directory', '.gitflow-cache')
+        cache_path = Path(cache_dir)
+        # If relative path, make it relative to config file directory
+        if not cache_path.is_absolute():
+            cache_path = config_path.parent / cache_path
         cache_config = CacheConfig(
-            directory=Path(cache_data.get('directory', '.gitflow-cache')),
+            directory=cache_path.resolve(),
             ttl_hours=cache_data.get('ttl_hours', 168),
             max_size_mb=cache_data.get('max_size_mb', 500)
         )
+        # Process JIRA settings
+        jira_config = None
+        jira_data = data.get('jira', {})
+        if jira_data:
+            access_user = cls._resolve_env_var(jira_data.get('access_user', ''))
+            access_token = cls._resolve_env_var(jira_data.get('access_token', ''))
+            # Validate JIRA credentials if JIRA is configured
+            if jira_data.get('access_user') and jira_data.get('access_token'):
+                if not access_user:
+                    raise ValueError("JIRA is configured but JIRA_ACCESS_USER environment variable is not set")
+                if not access_token:
+                    raise ValueError("JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set")
+            jira_config = JIRAConfig(
+                access_user=access_user,
+                access_token=access_token,
+                base_url=jira_data.get('base_url')
+            )
+        # Process JIRA integration settings
+        jira_integration_config = None
+        jira_integration_data = data.get('jira_integration', {})
+        if jira_integration_data:
+            jira_integration_config = JIRAIntegrationConfig(
+                enabled=jira_integration_data.get('enabled', True),
+                fetch_story_points=jira_integration_data.get('fetch_story_points', True),
+                project_keys=jira_integration_data.get('project_keys', []),
+                story_point_fields=jira_integration_data.get('story_point_fields', [
+                    "customfield_10016",
+                    "customfield_10021",
+                    "Story Points"
+                ])
+            )
         return Config(
             repositories=repositories,
             github=github_config,
             analysis=analysis_config,
             output=output_config,
-            cache=cache_config
+            cache=cache_config,
+            jira=jira_config,
+            jira_integration=jira_integration_config
         )
     @staticmethod

gitflow_analytics/core/analyzer.py CHANGED Viewed

@@ -1,29 +1,33 @@
 """Git repository analyzer with batch processing support."""
-import re
+import fnmatch
 from datetime import datetime
-from typing import List, Dict, Any, Optional, Tuple, Generator
 from pathlib import Path
+from typing import Any, Dict, Generator, List, Optional
 import git
 from git import Repo
 from tqdm import tqdm
-from .cache import GitAnalysisCache
 from ..extractors.story_points import StoryPointExtractor
 from ..extractors.tickets import TicketExtractor
 from .branch_mapper import BranchToProjectMapper
+from .cache import GitAnalysisCache
 class GitAnalyzer:
     """Analyze Git repositories with caching and batch processing."""
     def __init__(self, cache: GitAnalysisCache, batch_size: int = 1000,
-                 branch_mapping_rules: Optional[Dict[str, List[str]]] = None):
+                 branch_mapping_rules: Optional[Dict[str, List[str]]] = None,
+                 allowed_ticket_platforms: Optional[List[str]] = None,
+                 exclude_paths: Optional[List[str]] = None):
         """Initialize analyzer with cache."""
         self.cache = cache
         self.batch_size = batch_size
         self.story_point_extractor = StoryPointExtractor()
-        self.ticket_extractor = TicketExtractor()
+        self.ticket_extractor = TicketExtractor(allowed_platforms=allowed_ticket_platforms)
         self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
+        self.exclude_paths = exclude_paths or []
     def analyze_repository(self, repo_path: Path, since: datetime,
                          branch: Optional[str] = None) -> List[Dict[str, Any]]:
@@ -31,7 +35,7 @@ class GitAnalyzer:
         try:
             repo = Repo(repo_path)
         except Exception as e:
-            raise ValueError(f"Failed to open repository at {repo_path}: {e}")
+            raise ValueError(f"Failed to open repository at {repo_path}: {e}") from e
         # Get commits to analyze
         commits = self._get_commits(repo, since, branch)
@@ -133,12 +137,18 @@ class GitAnalyzer:
             commit_data['branch'], repo_path
         )
-        # Calculate metrics
+        # Calculate metrics - use raw stats for backward compatibility
         stats = commit.stats.total
         commit_data['files_changed'] = stats.get('files', 0)
         commit_data['insertions'] = stats.get('insertions', 0)
         commit_data['deletions'] = stats.get('deletions', 0)
+        # Calculate filtered metrics (excluding boilerplate/generated files)
+        filtered_stats = self._calculate_filtered_stats(commit)
+        commit_data['filtered_files_changed'] = filtered_stats['files']
+        commit_data['filtered_insertions'] = filtered_stats['insertions']
+        commit_data['filtered_deletions'] = filtered_stats['deletions']
         # Extract story points
         commit_data['story_points'] = self.story_point_extractor.extract_from_text(
             commit.message
@@ -192,4 +202,54 @@ class GitAnalyzer:
             '.cs', '.vb', '.r', '.m', '.mm', '.f90', '.f95', '.lua'
         }
-        return any(filepath.endswith(ext) for ext in code_extensions)
+        return any(filepath.endswith(ext) for ext in code_extensions)
+    def _should_exclude_file(self, filepath: str) -> bool:
+        """Check if file should be excluded from line counting."""
+        if not filepath:
+            return False
+        # Normalize path separators for consistent matching
+        filepath = filepath.replace('\\', '/')
+        # Check against exclude patterns
+        return any(fnmatch.fnmatch(filepath, pattern) for pattern in self.exclude_paths)
+    def _calculate_filtered_stats(self, commit: git.Commit) -> Dict[str, int]:
+        """Calculate commit statistics excluding boilerplate/generated files."""
+        filtered_stats = {
+            'files': 0,
+            'insertions': 0,
+            'deletions': 0
+        }
+        # For initial commits or commits without parents
+        parent = commit.parents[0] if commit.parents else None
+        try:
+            for diff in commit.diff(parent):
+                # Get file path
+                file_path = diff.b_path if diff.b_path else diff.a_path
+                if not file_path:
+                    continue
+                # Skip excluded files
+                if self._should_exclude_file(file_path):
+                    continue
+                # Count the file
+                filtered_stats['files'] += 1
+                # Count insertions and deletions
+                if diff.diff:
+                    diff_text = diff.diff.decode('utf-8', errors='ignore')
+                    for line in diff_text.split('\n'):
+                        if line.startswith('+') and not line.startswith('+++'):
+                            filtered_stats['insertions'] += 1
+                        elif line.startswith('-') and not line.startswith('---'):
+                            filtered_stats['deletions'] += 1
+        except Exception:
+            # If we can't calculate filtered stats, return zeros
+            pass
+        return filtered_stats

gitflow_analytics/core/branch_mapper.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Map git branches to projects based on naming conventions."""
 import re
-from typing import Dict, List, Optional, Tuple
 from pathlib import Path
+from typing import Dict, List, Optional
 class BranchToProjectMapper:

gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

gitflow-analytics 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl