PyPI - gitflow-analytics - Versions diffs - 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

gitflow_analytics/__init__.py +11 -11
gitflow_analytics/_version.py +2 -2
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4490 -378
gitflow_analytics/cli_rich.py +503 -0
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -398
gitflow_analytics/core/analyzer.py +1320 -172
gitflow_analytics/core/branch_mapper.py +132 -132
gitflow_analytics/core/cache.py +1554 -175
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +571 -185
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/base.py +13 -11
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +77 -59
gitflow_analytics/extractors/tickets.py +841 -89
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +258 -87
gitflow_analytics/integrations/jira_integration.py +572 -123
gitflow_analytics/integrations/orchestrator.py +206 -82
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +542 -179
gitflow_analytics/models/database.py +986 -59
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +29 -0
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
gitflow_analytics/qualitative/core/__init__.py +13 -0
gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
gitflow_analytics/qualitative/core/processor.py +673 -0
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +25 -0
gitflow_analytics/qualitative/models/schemas.py +306 -0
gitflow_analytics/qualitative/utils/__init__.py +13 -0
gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
gitflow_analytics/qualitative/utils/metrics.py +361 -0
gitflow_analytics/qualitative/utils/text_processing.py +285 -0
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +550 -18
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1700 -216
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2289 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +5 -0
gitflow_analytics/tui/app.py +724 -0
gitflow_analytics/tui/screens/__init__.py +8 -0
gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
gitflow_analytics/tui/screens/configuration_screen.py +523 -0
gitflow_analytics/tui/screens/loading_screen.py +348 -0
gitflow_analytics/tui/screens/main_screen.py +321 -0
gitflow_analytics/tui/screens/results_screen.py +722 -0
gitflow_analytics/tui/widgets/__init__.py +7 -0
gitflow_analytics/tui/widgets/data_table.py +255 -0
gitflow_analytics/tui/widgets/export_modal.py +301 -0
gitflow_analytics/tui/widgets/progress_widget.py +187 -0
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/integrations/orchestrator.py CHANGED Viewed

@@ -1,119 +1,243 @@
 """Integration orchestrator for multiple platforms."""
 import json
 from datetime import datetime
-from typing import Any, Dict, List
+from typing import Any, Union
 from ..core.cache import GitAnalysisCache
+from ..pm_framework.orchestrator import PMFrameworkOrchestrator
 from .github_integration import GitHubIntegration
 from .jira_integration import JIRAIntegration
 class IntegrationOrchestrator:
     """Orchestrate integrations with multiple platforms."""
     def __init__(self, config: Any, cache: GitAnalysisCache):
         """Initialize integration orchestrator."""
+        print("   🔍 IntegrationOrchestrator.__init__ called")
         self.config = config
         self.cache = cache
-        self.integrations = {}
+        self.integrations: dict[str, Union[GitHubIntegration, JIRAIntegration]] = {}
         # Initialize available integrations
         if config.github and config.github.token:
-            self.integrations['github'] = GitHubIntegration(
+            self.integrations["github"] = GitHubIntegration(
                 config.github.token,
                 cache,
                 config.github.max_retries,
                 config.github.backoff_factor,
-                allowed_ticket_platforms=getattr(config.analysis, 'ticket_platforms', None)
+                allowed_ticket_platforms=getattr(config.analysis, "ticket_platforms", None),
             )
         # Initialize JIRA integration if configured
         if config.jira and config.jira.access_user and config.jira.access_token:
             # Get JIRA specific settings if available
-            jira_settings = getattr(config, 'jira_integration', {})
-            if hasattr(jira_settings, 'enabled') and jira_settings.enabled:
-                base_url = getattr(config.jira, 'base_url', None)
+            jira_settings = getattr(config, "jira_integration", {})
+            if hasattr(jira_settings, "enabled") and jira_settings.enabled:
+                base_url = getattr(config.jira, "base_url", None)
                 if base_url:
-                    self.integrations['jira'] = JIRAIntegration(
+                    # Extract network and proxy settings from jira_settings
+                    self.integrations["jira"] = JIRAIntegration(
                         base_url,
                         config.jira.access_user,
                         config.jira.access_token,
                         cache,
-                        story_point_fields=getattr(jira_settings, 'story_point_fields', None)
+                        story_point_fields=getattr(jira_settings, "story_point_fields", None),
+                        dns_timeout=getattr(jira_settings, "dns_timeout", 10),
+                        connection_timeout=getattr(jira_settings, "connection_timeout", 30),
+                        max_retries=getattr(jira_settings, "max_retries", 3),
+                        backoff_factor=getattr(jira_settings, "backoff_factor", 1.0),
+                        enable_proxy=getattr(jira_settings, "enable_proxy", False),
+                        proxy_url=getattr(jira_settings, "proxy_url", None),
                     )
-    def enrich_repository_data(self, repo_config: Any, commits: List[Dict[str, Any]],
-                             since: datetime) -> Dict[str, Any]:
-        """Enrich repository data from all available integrations."""
-        enrichment = {
-            'prs': [],
-            'issues': [],
-            'pr_metrics': {}
-        }
-        # GitHub enrichment
-        if 'github' in self.integrations and repo_config.github_repo:
-            github = self.integrations['github']
+        # Initialize PM framework orchestrator
+        self.pm_orchestrator = None
+        if (
+            hasattr(config, "pm_integration")
+            and config.pm_integration
+            and config.pm_integration.enabled
+        ):
+            print("   🔍 PM Integration detected - building configuration...")
             try:
-                # Get PR data
-                prs = github.enrich_repository_with_prs(
-                    repo_config.github_repo, commits, since
+                # Create PM platform configuration for the orchestrator
+                pm_config = {
+                    "pm_platforms": {},
+                    "analysis": {
+                        "pm_integration": {
+                            "enabled": config.pm_integration.enabled,
+                            "primary_platform": config.pm_integration.primary_platform,
+                            "correlation": config.pm_integration.correlation,
+                        }
+                    },
+                }
+                # Convert PM platform configs to expected format
+                platforms_dict = config.pm_integration.platforms
+                if hasattr(platforms_dict, "__dict__"):
+                    # It's an AttrDict, convert to regular dict
+                    platforms_dict = dict(platforms_dict)
+                for platform_name, platform_config in platforms_dict.items():
+                    if hasattr(platform_config, "enabled") and platform_config.enabled:
+                        # Convert AttrDict to regular dict
+                        platform_config_dict = (
+                            dict(platform_config.config)
+                            if hasattr(platform_config.config, "__dict__")
+                            else platform_config.config
+                        )
+                        platform_settings = {
+                            "enabled": True,
+                            **platform_config_dict,
+                        }
+                        # Special handling for JIRA - use credentials from top-level JIRA config
+                        if platform_name == "jira" and hasattr(config, "jira"):
+                            platform_settings["username"] = config.jira.access_user
+                            platform_settings["api_token"] = config.jira.access_token
+                            # Also ensure base_url matches if not set
+                            if (
+                                not platform_settings.get("base_url")
+                                or platform_settings["base_url"] == "will_be_set_at_runtime"
+                            ):
+                                platform_settings["base_url"] = config.jira.base_url
+                            # Add cache directory for ticket caching (config file directory)
+                            if hasattr(config, "cache") and hasattr(config.cache, "directory"):
+                                platform_settings["cache_dir"] = config.cache.directory
+                            # Debug output to check credentials
+                            print(
+                                f"   🔍 JIRA config: username={platform_settings['username']}, has_token={bool(platform_settings['api_token'])}, base_url={platform_settings['base_url']}, cache_dir={platform_settings.get('cache_dir', 'not_set')}"
+                            )
+                        pm_config["pm_platforms"][platform_name] = platform_settings
+                # Debug output - show final PM config
+                print(
+                    f"   🔍 Final PM config platforms: {list(pm_config.get('pm_platforms', {}).keys())}"
+                )
+                for plat_name, plat_config in pm_config.get("pm_platforms", {}).items():
+                    print(
+                        f"   🔍 {plat_name}: enabled={plat_config.get('enabled')}, has_username={bool(plat_config.get('username'))}, has_token={bool(plat_config.get('api_token'))}"
+                    )
+                self.pm_orchestrator = PMFrameworkOrchestrator(pm_config)
+                print(
+                    f"📋 PM Framework initialized with {len(self.pm_orchestrator.get_active_platforms())} platforms"
                 )
-                enrichment['prs'] = prs
-                # Calculate PR metrics
-                if prs:
-                    enrichment['pr_metrics'] = github.calculate_pr_metrics(prs)
             except Exception as e:
-                print(f"   ⚠️  GitHub enrichment failed: {e}")
+                print(f"⚠️  Failed to initialize PM framework: {e}")
+                self.pm_orchestrator = None
+    def enrich_repository_data(
+        self, repo_config: Any, commits: list[dict[str, Any]], since: datetime
+    ) -> dict[str, Any]:
+        """Enrich repository data from all available integrations."""
+        enrichment: dict[str, Any] = {"prs": [], "issues": [], "pr_metrics": {}, "pm_data": {}}
+        # GitHub enrichment
+        if "github" in self.integrations and repo_config.github_repo:
+            github_integration = self.integrations["github"]
+            if isinstance(github_integration, GitHubIntegration):
+                try:
+                    # Get PR data
+                    prs = github_integration.enrich_repository_with_prs(
+                        repo_config.github_repo, commits, since
+                    )
+                    enrichment["prs"] = prs
+                    # Calculate PR metrics
+                    if prs:
+                        enrichment["pr_metrics"] = github_integration.calculate_pr_metrics(prs)
+                except Exception as e:
+                    import traceback
+                    print(f"   ⚠️  GitHub enrichment failed: {e}")
+                    print(f"   Debug traceback: {traceback.format_exc()}")
         # JIRA enrichment for story points
-        if 'jira' in self.integrations:
-            jira = self.integrations['jira']
+        if "jira" in self.integrations:
+            jira_integration = self.integrations["jira"]
+            if isinstance(jira_integration, JIRAIntegration):
+                try:
+                    # Enrich commits with JIRA story points
+                    jira_integration.enrich_commits_with_jira_data(commits)
+                    # Enrich PRs with JIRA story points
+                    if enrichment["prs"]:
+                        jira_integration.enrich_prs_with_jira_data(enrichment["prs"])
+                except Exception as e:
+                    print(f"   ⚠️  JIRA enrichment failed: {e}")
+        # PM Framework enrichment
+        if self.pm_orchestrator and self.pm_orchestrator.is_enabled():
             try:
-                # Enrich commits with JIRA story points
-                jira.enrich_commits_with_jira_data(commits)
-                # Enrich PRs with JIRA story points
-                if enrichment['prs']:
-                    jira.enrich_prs_with_jira_data(enrichment['prs'])
+                print("   📋 Collecting PM platform data...")
+                # Get all issues from PM platforms
+                pm_issues = self.pm_orchestrator.get_all_issues(since=since)
+                enrichment["pm_data"]["issues"] = pm_issues
+                # Correlate issues with commits
+                correlations = self.pm_orchestrator.correlate_issues_with_commits(
+                    pm_issues, commits
+                )
+                enrichment["pm_data"]["correlations"] = correlations
+                # Calculate enhanced metrics
+                enhanced_metrics = self.pm_orchestrator.calculate_enhanced_metrics(
+                    commits, enrichment["prs"], pm_issues, correlations
+                )
+                enrichment["pm_data"]["metrics"] = enhanced_metrics
+                # Only show correlations message if there are any correlations found
+                if correlations:
+                    print(
+                        f"   ✅ PM correlations found: {len(correlations)} commits linked to issues"
+                    )
+                else:
+                    print("   📋 PM data processed (no correlations found)")
             except Exception as e:
-                print(f"   ⚠️  JIRA enrichment failed: {e}")
+                print(f"   ⚠️  PM framework enrichment failed: {e}")
+                enrichment["pm_data"] = {"error": str(e)}
         return enrichment
-    def get_platform_issues(self, project_key: str, since: datetime) -> List[Dict[str, Any]]:
+    def get_platform_issues(self, project_key: str, since: datetime) -> list[dict[str, Any]]:
         """Get issues from all configured platforms."""
-        all_issues = []
+        all_issues: list[dict[str, Any]] = []
         # Check cache first
         cached_issues = []
-        for platform in ['github', 'jira', 'clickup', 'linear']:
+        for platform in ["github", "jira", "clickup", "linear"]:
             cached = self.cache.get_cached_issues(platform, project_key)
             cached_issues.extend(cached)
         if cached_issues:
             return cached_issues
         # Future: Fetch from APIs if not cached
         # This is where we'd add actual API calls to each platform
         return all_issues
-    def export_to_json(self,
-                      commits: List[Dict[str, Any]],
-                      prs: List[Dict[str, Any]],
-                      developer_stats: List[Dict[str, Any]],
-                      project_metrics: Dict[str, Any],
-                      dora_metrics: Dict[str, Any],
-                      output_path: str) -> str:
+    def export_to_json(
+        self,
+        commits: list[dict[str, Any]],
+        prs: list[dict[str, Any]],
+        developer_stats: list[dict[str, Any]],
+        project_metrics: dict[str, Any],
+        dora_metrics: dict[str, Any],
+        output_path: str,
+    ) -> str:
         """Export all data to JSON format for API consumption."""
         # Prepare data for JSON serialization
-        def serialize_dates(obj):
+        def serialize_dates(obj: Any) -> Any:
             """Convert datetime objects to ISO format strings."""
             if isinstance(obj, datetime):
                 return obj.isoformat()
@@ -122,24 +246,24 @@ class IntegrationOrchestrator:
             elif isinstance(obj, list):
                 return [serialize_dates(item) for item in obj]
             return obj
         export_data = {
-            'metadata': {
-                'generated_at': datetime.now().isoformat(),
-                'version': '1.0',
-                'total_commits': len(commits),
-                'total_prs': len(prs),
-                'total_developers': len(developer_stats)
+            "metadata": {
+                "generated_at": datetime.now().isoformat(),
+                "version": "1.0",
+                "total_commits": len(commits),
+                "total_prs": len(prs),
+                "total_developers": len(developer_stats),
             },
-            'commits': serialize_dates(commits),
-            'pull_requests': serialize_dates(prs),
-            'developers': serialize_dates(developer_stats),
-            'project_metrics': serialize_dates(project_metrics),
-            'dora_metrics': serialize_dates(dora_metrics)
+            "commits": serialize_dates(commits),
+            "pull_requests": serialize_dates(prs),
+            "developers": serialize_dates(developer_stats),
+            "project_metrics": serialize_dates(project_metrics),
+            "dora_metrics": serialize_dates(dora_metrics),
         }
         # Write JSON file
-        with open(output_path, 'w') as f:
+        with open(output_path, "w") as f:
             json.dump(export_data, f, indent=2)
-        return output_path
+        return output_path

gitflow_analytics/metrics/activity_scoring.py ADDED Viewed

@@ -0,0 +1,322 @@
+"""Developer activity scoring module using balanced metrics.
+Based on research and best practices for measuring developer productivity in 2024,
+this module implements a balanced scoring approach that considers:
+- Commits (baseline activity)
+- Pull Requests (collaboration and review)
+- Lines of Code (impact, with diminishing returns)
+- Code churn (deletions valued for refactoring)
+"""
+import math
+from typing import Any
+class ActivityScorer:
+    """Calculate balanced developer activity scores based on multiple metrics."""
+    # Weights based on research indicating balanced approach
+    WEIGHTS = {
+        "commits": 0.25,  # Each commit represents baseline effort
+        "prs": 0.30,  # PRs indicate collaboration and review effort
+        "code_impact": 0.30,  # Lines changed with diminishing returns
+        "complexity": 0.15,  # File changes and complexity
+    }
+    # Scaling factors based on research
+    COMMIT_BASE_SCORE = 10  # Each commit worth base 10 points
+    PR_BASE_SCORE = 50  # Each PR worth base 50 points (5x commit)
+    OPTIMAL_PR_SIZE = 200  # Research shows PRs under 200 lines are optimal
+    def calculate_activity_score(self, metrics: dict[str, Any]) -> dict[str, Any]:
+        """Calculate balanced activity score for a developer.
+        Args:
+            metrics: Dictionary containing:
+                - commits: Number of commits
+                - prs_involved: Number of PRs
+                - lines_added: Lines added
+                - lines_removed: Lines removed
+                - files_changed: Number of files changed
+                - complexity_delta: Complexity change
+        Returns:
+            Dictionary with:
+                - raw_score: Unscaled activity score
+                - normalized_score: Score normalized to 0-100
+                - components: Breakdown of score components
+        """
+        # Extract metrics with defaults
+        commits = metrics.get("commits", 0)
+        prs = metrics.get("prs_involved", 0)
+        lines_added = metrics.get("lines_added", 0)
+        lines_removed = metrics.get("lines_removed", 0)
+        files_changed = metrics.get(
+            "files_changed_count",
+            (
+                metrics.get("files_changed", 0)
+                if isinstance(metrics.get("files_changed"), int)
+                else len(metrics.get("files_changed", []))
+            ),
+        )
+        complexity = metrics.get("complexity_delta", 0)
+        # Calculate component scores
+        commit_score = self._calculate_commit_score(commits)
+        pr_score = self._calculate_pr_score(prs, lines_added + lines_removed)
+        code_score = self._calculate_code_impact_score(lines_added, lines_removed)
+        complexity_score = self._calculate_complexity_score(files_changed, complexity)
+        # Weighted total
+        components = {
+            "commit_score": commit_score,
+            "pr_score": pr_score,
+            "code_impact_score": code_score,
+            "complexity_score": complexity_score,
+        }
+        raw_score = (
+            commit_score * self.WEIGHTS["commits"]
+            + pr_score * self.WEIGHTS["prs"]
+            + code_score * self.WEIGHTS["code_impact"]
+            + complexity_score * self.WEIGHTS["complexity"]
+        )
+        return {
+            "raw_score": raw_score,
+            "normalized_score": self._normalize_score(raw_score),
+            "components": components,
+            "activity_level": self._get_activity_level(raw_score),
+        }
+    def _calculate_commit_score(self, commits: int) -> float:
+        """Calculate score from commit count with diminishing returns."""
+        if commits == 0:
+            return 0
+        # Use logarithmic scaling for diminishing returns
+        # First 10 commits worth full value, then diminishing
+        if commits <= 10:
+            return commits * self.COMMIT_BASE_SCORE
+        else:
+            base = 10 * self.COMMIT_BASE_SCORE
+            extra = math.log10(commits - 9) * self.COMMIT_BASE_SCORE * 5
+            return base + extra
+    def _calculate_pr_score(self, prs: int, total_lines: int) -> float:
+        """Calculate PR score considering optimal PR sizes."""
+        if prs == 0:
+            return 0
+        base_score = prs * self.PR_BASE_SCORE
+        # Bonus for maintaining optimal PR size
+        avg_pr_size = total_lines / prs if prs > 0 else 0
+        if avg_pr_size <= self.OPTIMAL_PR_SIZE:
+            size_bonus = 1.2  # 20% bonus for optimal size
+        else:
+            # Penalty for oversized PRs
+            size_bonus = max(0.7, 1 - (avg_pr_size - self.OPTIMAL_PR_SIZE) / 1000)
+        return base_score * size_bonus
+    def _calculate_code_impact_score(self, lines_added: int, lines_removed: int) -> float:
+        """Calculate code impact score with balanced add/remove consideration and enhanced diminishing returns.
+        WHY: Massive single commits can unfairly inflate scores. This implementation
+        uses stronger diminishing returns to prevent score inflation from extremely
+        large commits while still rewarding meaningful contributions.
+        """
+        # Research shows deletions are valuable (refactoring, cleanup)
+        # Weight deletions at 70% of additions
+        effective_lines = lines_added + (lines_removed * 0.7)
+        if effective_lines == 0:
+            return 0
+        # Logarithmic scaling to prevent gaming with massive changes
+        # First 500 lines worth full value
+        if effective_lines <= 500:
+            return effective_lines * 0.2
+        else:
+            base = 500 * 0.2
+            # Enhanced diminishing returns for massive commits
+            if effective_lines <= 2000:
+                extra = math.log10(effective_lines - 499) * 15  # Reduced multiplier
+            else:
+                # Very large commits get even more aggressive diminishing returns
+                medium_extra = math.log10(2000 - 499) * 15
+                large_extra = math.log10(effective_lines - 1999) * 8  # Much smaller multiplier
+                extra = medium_extra + large_extra
+            return base + extra
+    def _calculate_complexity_score(self, files_changed: int, complexity_delta: float) -> float:
+        """Calculate score based on breadth and complexity of changes."""
+        if files_changed == 0:
+            return 0
+        # Base score from files touched (breadth of impact)
+        file_score = min(files_changed * 5, 50)  # Cap at 50 points
+        # Complexity factor (can be negative for simplification)
+        # Reward simplification (negative complexity delta)
+        if complexity_delta < 0:
+            complexity_bonus = abs(complexity_delta) * 0.5  # Reward simplification
+        else:
+            complexity_bonus = -min(
+                complexity_delta * 0.2, 10
+            )  # Small penalty for added complexity
+        return max(0, file_score + complexity_bonus)
+    def _normalize_score(self, raw_score: float) -> float:
+        """Normalize score to 0-100 range."""
+        # Based on research, a highly productive week might have:
+        # - 15 commits (150 points after scaling)
+        # - 3 PRs of optimal size (180 points)
+        # - 1000 effective lines (120 points)
+        # - 20 files changed (50 points)
+        # Total: ~500 points = 100 normalized
+        normalized = (raw_score / 500) * 100
+        return min(100, normalized)  # Cap at 100
+    def _get_activity_level(self, raw_score: float) -> str:
+        """Categorize activity level based on score."""
+        normalized = self._normalize_score(raw_score)
+        if normalized >= 80:
+            return "exceptional"
+        elif normalized >= 60:
+            return "high"
+        elif normalized >= 40:
+            return "moderate"
+        elif normalized >= 20:
+            return "low"
+        else:
+            return "minimal"
+    def calculate_team_relative_score(
+        self, individual_score: float, team_scores: list[float]
+    ) -> dict[str, Any]:
+        """Calculate relative performance within team context.
+        Args:
+            individual_score: Individual's raw activity score
+            team_scores: List of all team members' raw scores
+        Returns:
+            Dictionary with percentile and relative metrics
+        """
+        if not team_scores:
+            return {"percentile": 50, "relative_score": 1.0, "team_position": "average"}
+        # Calculate percentile
+        scores_below = sum(1 for score in team_scores if score < individual_score)
+        percentile = (scores_below / len(team_scores)) * 100
+        # Calculate relative to team average
+        team_avg = sum(team_scores) / len(team_scores)
+        relative_score = individual_score / team_avg if team_avg > 0 else 1.0
+        # Determine position
+        if percentile >= 90:
+            position = "top_performer"
+        elif percentile >= 75:
+            position = "above_average"
+        elif percentile >= 25:
+            position = "average"
+        else:
+            position = "below_average"
+        return {
+            "percentile": round(percentile, 1),
+            "relative_score": round(relative_score, 2),
+            "team_position": position,
+            "team_average": round(team_avg, 1),
+        }
+    def normalize_scores_on_curve(
+        self, developer_scores: dict[str, float], curve_mean: float = 50.0, curve_std: float = 15.0
+    ) -> dict[str, dict[str, Any]]:
+        """Normalize activity scores on a bell curve with quintile grouping.
+        Args:
+            developer_scores: Dictionary mapping developer IDs to raw scores
+            curve_mean: Target mean for the normalized distribution (default: 50)
+            curve_std: Target standard deviation for the distribution (default: 15)
+        Returns:
+            Dictionary with normalized scores and quintile groupings
+        """
+        if not developer_scores:
+            return {}
+        # Get all scores
+        scores = list(developer_scores.values())
+        # Calculate current statistics
+        current_mean = sum(scores) / len(scores)
+        variance = sum((x - current_mean) ** 2 for x in scores) / len(scores)
+        current_std = math.sqrt(variance) if variance > 0 else 1.0
+        # Normalize to bell curve
+        normalized_scores = {}
+        for dev_id, raw_score in developer_scores.items():
+            # Z-score normalization
+            z_score = (raw_score - current_mean) / current_std if current_std > 0 else 0
+            # Transform to target distribution
+            curved_score = curve_mean + (z_score * curve_std)
+            # Ensure scores stay in reasonable range (0-100)
+            curved_score = max(0, min(100, curved_score))
+            normalized_scores[dev_id] = curved_score
+        # Sort developers by normalized score for quintile assignment
+        sorted_devs = sorted(normalized_scores.items(), key=lambda x: x[1])
+        # Assign quintiles
+        results = {}
+        quintile_size = len(sorted_devs) / 5
+        for idx, (dev_id, curved_score) in enumerate(sorted_devs):
+            # Determine quintile (1-5)
+            quintile = min(5, int(idx / quintile_size) + 1)
+            # Determine activity level based on quintile
+            if quintile == 5:
+                activity_level = "exceptional"
+                level_description = "Top 20%"
+            elif quintile == 4:
+                activity_level = "high"
+                level_description = "60-80th percentile"
+            elif quintile == 3:
+                activity_level = "moderate"
+                level_description = "40-60th percentile"
+            elif quintile == 2:
+                activity_level = "low"
+                level_description = "20-40th percentile"
+            else:  # quintile == 1
+                activity_level = "minimal"
+                level_description = "Bottom 20%"
+            # Calculate exact percentile
+            percentile = ((idx + 0.5) / len(sorted_devs)) * 100
+            results[dev_id] = {
+                "raw_score": developer_scores[dev_id],
+                "curved_score": round(curved_score, 1),
+                "quintile": quintile,
+                "activity_level": activity_level,
+                "level_description": level_description,
+                "percentile": round(percentile, 0),
+                "z_score": (
+                    round((developer_scores[dev_id] - current_mean) / current_std, 2)
+                    if current_std > 0
+                    else 0
+                ),
+            }
+        return results

gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl