PyPI - gitflow-analytics - Versions diffs - 3.3.0__py3-none-any.whl → 3.5.2__py3-none-any.whl - Mend

gitflow-analytics 3.3.0py3-none-any.whl → 3.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/cli.py +517 -15
gitflow_analytics/cli_wizards/__init__.py +10 -0
gitflow_analytics/cli_wizards/install_wizard.py +1181 -0
gitflow_analytics/cli_wizards/run_launcher.py +433 -0
gitflow_analytics/config/__init__.py +3 -0
gitflow_analytics/config/aliases.py +306 -0
gitflow_analytics/config/loader.py +35 -1
gitflow_analytics/config/schema.py +13 -0
gitflow_analytics/constants.py +75 -0
gitflow_analytics/core/cache.py +7 -3
gitflow_analytics/core/data_fetcher.py +66 -30
gitflow_analytics/core/git_timeout_wrapper.py +6 -4
gitflow_analytics/core/progress.py +2 -4
gitflow_analytics/core/subprocess_git.py +31 -5
gitflow_analytics/identity_llm/analysis_pass.py +13 -3
gitflow_analytics/identity_llm/analyzer.py +14 -2
gitflow_analytics/identity_llm/models.py +7 -1
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +5 -3
gitflow_analytics/security/config.py +6 -6
gitflow_analytics/security/extractors/dependency_checker.py +14 -14
gitflow_analytics/security/extractors/secret_detector.py +8 -14
gitflow_analytics/security/extractors/vulnerability_scanner.py +9 -9
gitflow_analytics/security/llm_analyzer.py +10 -10
gitflow_analytics/security/security_analyzer.py +17 -17
gitflow_analytics/tui/screens/analysis_progress_screen.py +1 -1
gitflow_analytics/ui/progress_display.py +36 -29
gitflow_analytics/verify_activity.py +23 -26
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/METADATA +1 -1
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/RECORD +34 -31
gitflow_analytics/security/reports/__init__.py +0 -5
gitflow_analytics/security/reports/security_report.py +0 -358
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/WHEEL +0 -0
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/top_level.txt +0 -0

gitflow_analytics/config/aliases.py ADDED Viewed

@@ -0,0 +1,306 @@
+"""Developer identity aliases management.
+This module provides functionality for managing developer identity aliases
+across multiple configuration files. Aliases can be shared to maintain
+consistent identity resolution across different analysis configurations.
+"""
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+import yaml
+logger = logging.getLogger(__name__)
+@dataclass
+class DeveloperAlias:
+    """A developer alias configuration.
+    Represents a single developer with their primary email and all known aliases.
+    Supports both manual and LLM-generated alias configurations with confidence scores.
+    """
+    primary_email: str
+    aliases: list[str] = field(default_factory=list)
+    name: Optional[str] = None
+    confidence: float = 1.0
+    reasoning: str = ""
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary format for YAML serialization.
+        Returns:
+            Dictionary representation with optional fields omitted if not set
+        """
+        result: dict[str, Any] = {
+            "primary_email": self.primary_email,
+            "aliases": self.aliases,
+        }
+        if self.name:
+            result["name"] = self.name
+        # Only include confidence and reasoning for LLM-generated aliases
+        if self.confidence < 1.0:
+            result["confidence"] = round(self.confidence, 2)
+            if self.reasoning:
+                result["reasoning"] = self.reasoning
+        return result
+class AliasesManager:
+    """Manages developer identity aliases.
+    Provides functionality to load, save, and manipulate developer identity aliases.
+    Supports both manual aliases (confidence=1.0) and LLM-generated aliases with
+    confidence scores and reasoning.
+    Example:
+        >>> manager = AliasesManager(Path("aliases.yaml"))
+        >>> manager.load()
+        >>> manager.add_alias(DeveloperAlias(
+        ...     primary_email="john@company.com",
+        ...     aliases=["jdoe@gmail.com"],
+        ...     name="John Doe"
+        ... ))
+        >>> manager.save()
+    """
+    def __init__(self, aliases_path: Optional[Path] = None):
+        """Initialize aliases manager.
+        Args:
+            aliases_path: Path to aliases.yaml file. If None, aliases must be
+                         added programmatically or loaded from another source.
+        """
+        self.aliases_path = aliases_path
+        self.aliases: list[DeveloperAlias] = []
+        if aliases_path and aliases_path.exists():
+            self.load()
+    def load(self) -> None:
+        """Load aliases from file.
+        Loads developer aliases from the configured YAML file. If the file
+        doesn't exist or is empty, initializes with an empty alias list.
+        Raises:
+            yaml.YAMLError: If the YAML file is malformed
+        """
+        if not self.aliases_path or not self.aliases_path.exists():
+            logger.debug("No aliases file found or path not set")
+            return
+        try:
+            with open(self.aliases_path) as f:
+                data = yaml.safe_load(f) or {}
+            self.aliases = []
+            for alias_data in data.get("developer_aliases", []):
+                # Support both 'primary_email' (new) and 'canonical_email' (old)
+                primary_email = alias_data.get("primary_email") or alias_data.get("canonical_email")
+                if not primary_email:
+                    logger.warning(f"Skipping alias entry without primary_email: {alias_data}")
+                    continue
+                self.aliases.append(
+                    DeveloperAlias(
+                        primary_email=primary_email,
+                        aliases=alias_data.get("aliases", []),
+                        name=alias_data.get("name"),
+                        confidence=alias_data.get("confidence", 1.0),
+                        reasoning=alias_data.get("reasoning", ""),
+                    )
+                )
+            logger.info(f"Loaded {len(self.aliases)} developer aliases from {self.aliases_path}")
+        except yaml.YAMLError as e:
+            logger.error(f"Error parsing aliases file {self.aliases_path}: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"Error loading aliases file {self.aliases_path}: {e}")
+            raise
+    def save(self) -> None:
+        """Save aliases to file.
+        Writes all developer aliases to the configured YAML file with proper
+        formatting and comments. Creates the parent directory if it doesn't exist.
+        Raises:
+            OSError: If file cannot be written
+        """
+        if not self.aliases_path:
+            logger.warning("No aliases path configured, cannot save")
+            return
+        # Ensure directory exists
+        self.aliases_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            # Build data structure with comments
+            data = {
+                "# Developer Identity Aliases": None,
+                "# Generated by GitFlow Analytics": None,
+                "# Share this file across multiple config files": None,
+                "# Each alias maps multiple email addresses to a single developer": None,
+                "developer_aliases": [alias.to_dict() for alias in self.aliases],
+            }
+            with open(self.aliases_path, "w") as f:
+                # Custom YAML dump to preserve comments
+                f.write("# Developer Identity Aliases\n")
+                f.write("# Generated by GitFlow Analytics\n")
+                f.write("# Share this file across multiple config files\n")
+                f.write("# Each alias maps multiple email addresses to a single developer\n\n")
+                # Write the aliases list
+                yaml.dump(
+                    {"developer_aliases": data["developer_aliases"]},
+                    f,
+                    default_flow_style=False,
+                    sort_keys=False,
+                    allow_unicode=True,
+                )
+            logger.info(f"Saved {len(self.aliases)} developer aliases to {self.aliases_path}")
+        except Exception as e:
+            logger.error(f"Error saving aliases file {self.aliases_path}: {e}")
+            raise
+    def add_alias(self, alias: DeveloperAlias) -> None:
+        """Add or update a developer alias.
+        If an alias with the same primary email already exists, it will be replaced.
+        This ensures there is only one alias configuration per developer.
+        Args:
+            alias: The developer alias to add or update
+        """
+        # Remove existing alias for same primary email
+        self.aliases = [a for a in self.aliases if a.primary_email != alias.primary_email]
+        self.aliases.append(alias)
+        logger.debug(f"Added/updated alias for {alias.primary_email}")
+    def remove_alias(self, primary_email: str) -> bool:
+        """Remove a developer alias by primary email.
+        Args:
+            primary_email: The primary email of the alias to remove
+        Returns:
+            True if an alias was removed, False if not found
+        """
+        original_count = len(self.aliases)
+        self.aliases = [a for a in self.aliases if a.primary_email != primary_email]
+        removed = len(self.aliases) < original_count
+        if removed:
+            logger.debug(f"Removed alias for {primary_email}")
+        return removed
+    def get_alias(self, primary_email: str) -> Optional[DeveloperAlias]:
+        """Get a developer alias by primary email.
+        Args:
+            primary_email: The primary email to look up
+        Returns:
+            The developer alias if found, None otherwise
+        """
+        for alias in self.aliases:
+            if alias.primary_email == primary_email:
+                return alias
+        return None
+    def to_manual_mappings(self) -> list[dict[str, Any]]:
+        """Convert aliases to config manual_identity_mappings format.
+        Converts the internal alias representation to the format expected
+        by the GitFlow Analytics configuration's manual_identity_mappings field.
+        Returns:
+            List of manual identity mapping dictionaries
+        """
+        mappings = []
+        for alias in self.aliases:
+            mapping: dict[str, Any] = {"primary_email": alias.primary_email}
+            if alias.name:
+                mapping["name"] = alias.name
+            mapping["aliases"] = alias.aliases
+            # Include confidence and reasoning for LLM-generated mappings
+            if alias.confidence < 1.0:
+                mapping["confidence"] = alias.confidence
+                if alias.reasoning:
+                    mapping["reasoning"] = alias.reasoning
+            mappings.append(mapping)
+        return mappings
+    def merge_from_mappings(self, mappings: list[dict[str, Any]]) -> None:
+        """Merge aliases from manual identity mappings.
+        Takes manual identity mappings from a config file and merges them
+        into the current alias set. Existing aliases are preserved unless
+        they conflict with the new mappings.
+        Args:
+            mappings: List of manual identity mapping dictionaries
+        """
+        for mapping in mappings:
+            # Support both field name variants
+            primary_email = mapping.get("primary_email") or mapping.get("canonical_email")
+            if not primary_email:
+                logger.warning(f"Skipping mapping without primary_email: {mapping}")
+                continue
+            alias = DeveloperAlias(
+                primary_email=primary_email,
+                aliases=mapping.get("aliases", []),
+                name=mapping.get("name"),
+                confidence=mapping.get("confidence", 1.0),
+                reasoning=mapping.get("reasoning", ""),
+            )
+            self.add_alias(alias)
+    def get_statistics(self) -> dict[str, Any]:
+        """Get statistics about the aliases.
+        Returns:
+            Dictionary with statistics including total aliases, manual vs LLM-generated,
+            average confidence, etc.
+        """
+        if not self.aliases:
+            return {
+                "total_aliases": 0,
+                "manual_aliases": 0,
+                "llm_aliases": 0,
+                "avg_confidence": 0.0,
+                "total_email_addresses": 0,
+            }
+        manual_count = sum(1 for a in self.aliases if a.confidence == 1.0)
+        llm_count = len(self.aliases) - manual_count
+        avg_confidence = sum(a.confidence for a in self.aliases) / len(self.aliases)
+        total_emails = sum(len(a.aliases) + 1 for a in self.aliases)  # +1 for primary
+        return {
+            "total_aliases": len(self.aliases),
+            "manual_aliases": manual_count,
+            "llm_aliases": llm_count,
+            "avg_confidence": round(avg_confidence, 3),
+            "total_email_addresses": total_emails,
+        }

gitflow_analytics/config/loader.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """YAML configuration loading and environment variable expansion."""
+import logging
 import os
 from pathlib import Path
 from typing import Any, Optional, Union
@@ -33,6 +34,8 @@ from .schema import (
 )
 from .validator import ConfigValidator
+logger = logging.getLogger(__name__)
 class ConfigLoader:
     """Load and validate configuration from YAML files."""
@@ -533,6 +536,36 @@ class ConfigLoader:
             BranchAnalysisConfig(**branch_data) if branch_data else BranchAnalysisConfig()
         )
+        # Process aliases file and manual identity mappings
+        manual_mappings = list(analysis_data.get("identity", {}).get("manual_mappings", []))
+        aliases_file_path = None
+        # Load aliases from external file if specified
+        aliases_file = analysis_data.get("identity", {}).get("aliases_file")
+        if aliases_file:
+            aliases_path = Path(aliases_file).expanduser()
+            # Make relative paths relative to config file directory
+            if not aliases_path.is_absolute():
+                aliases_path = config_path.parent / aliases_path
+            aliases_file_path = aliases_path
+            # Load and merge aliases if file exists
+            if aliases_path.exists():
+                try:
+                    from .aliases import AliasesManager
+                    aliases_mgr = AliasesManager(aliases_path)
+                    # Merge aliases with existing manual mappings
+                    manual_mappings.extend(aliases_mgr.to_manual_mappings())
+                    logger.info(
+                        f"Loaded {len(aliases_mgr.aliases)} identity aliases from {aliases_path}"
+                    )
+                except Exception as e:
+                    logger.warning(f"Could not load aliases file {aliases_path}: {e}")
+            else:
+                logger.warning(f"Aliases file not found: {aliases_path}")
         return AnalysisConfig(
             story_point_patterns=analysis_data.get(
                 "story_point_patterns",
@@ -550,7 +583,8 @@ class ConfigLoader:
             similarity_threshold=analysis_data.get("identity", {}).get(
                 "similarity_threshold", 0.85
             ),
-            manual_identity_mappings=analysis_data.get("identity", {}).get("manual_mappings", []),
+            manual_identity_mappings=manual_mappings,
+            aliases_file=aliases_file_path,
             default_ticket_platform=analysis_data.get("default_ticket_platform"),
             branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
             ticket_platforms=analysis_data.get("ticket_platforms"),

gitflow_analytics/config/schema.py CHANGED Viewed

@@ -301,6 +301,7 @@ class AnalysisConfig:
     exclude_paths: list[str] = field(default_factory=list)
     similarity_threshold: float = 0.85
     manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
+    aliases_file: Optional[Path] = None  # Path to shared aliases.yaml file
     default_ticket_platform: Optional[str] = None
     branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
     ticket_platforms: Optional[list[str]] = None
@@ -379,6 +380,17 @@ class PMIntegrationConfig:
     platforms: dict[str, PMPlatformConfig] = field(default_factory=dict)
+@dataclass
+class LauncherPreferences:
+    """Interactive launcher preferences."""
+    last_selected_repos: list[str] = field(default_factory=list)
+    default_weeks: int = 4
+    auto_clear_cache: bool = False
+    skip_identity_analysis: bool = False
+    last_run: Optional[str] = None
 @dataclass
 class Config:
     """Main configuration container."""
@@ -393,6 +405,7 @@ class Config:
     pm: Optional[Any] = None  # Modern PM framework config
     pm_integration: Optional[PMIntegrationConfig] = None
     qualitative: Optional["QualitativeConfig"] = None
+    launcher: Optional[LauncherPreferences] = None
     def discover_organization_repositories(
         self, clone_base_path: Optional[Path] = None

gitflow_analytics/constants.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Application-wide constants and configuration values.
+This module centralizes magic numbers and configuration defaults to improve
+code maintainability and readability. Constants are organized by functional
+area for easy navigation and updates.
+"""
+class Timeouts:
+    """Timeout values in seconds for various git operations.
+    These timeouts protect against hanging operations when repositories
+    require authentication or have network issues.
+    """
+    # Git remote operations
+    GIT_FETCH = 30  # Fetch from remote repository
+    GIT_PULL = 30  # Pull latest changes
+    # Git local operations
+    GIT_BRANCH_ITERATION = 15  # Iterate commits for a branch/day
+    GIT_DIFF = 10  # Calculate diff statistics
+    GIT_CONFIG = 2  # Read git configuration
+    GIT_REMOTE_LIST = 5  # List remote branches
+    # Default timeout for generic git operations
+    DEFAULT_GIT_OPERATION = 30
+    # Process-level timeouts
+    SUBPROCESS_DEFAULT = 5  # Default subprocess timeout
+    THREAD_JOIN = 1  # Thread join timeout
+class BatchSizes:
+    """Batch processing sizes for efficient data handling.
+    These sizes balance memory usage with performance gains from bulk operations.
+    Tunable based on repository size and system capabilities.
+    """
+    COMMIT_STORAGE = 1000  # Commits per bulk insert operation
+    TICKET_FETCH = 50  # Tickets fetched per JIRA batch
+    CACHE_WARMUP = 100  # Commits per cache warmup batch
+    # Estimation constants
+    COMMITS_PER_WEEK_ESTIMATE = 50  # Estimated commits for progress tracking
+    DEFAULT_PROGRESS_ESTIMATE = 100  # Default when estimation fails
+class CacheTTL:
+    """Cache time-to-live values.
+    These values control how long cached data remains valid before
+    requiring refresh. Measured in hours unless otherwise specified.
+    """
+    ONE_WEEK_HOURS = 168  # Standard cache TTL (7 days * 24 hours)
+    IDENTITY_CACHE_DAYS = 7  # Developer identity analysis cache (in days)
+class Thresholds:
+    """Various threshold values for analysis and reporting."""
+    # Cache performance
+    CACHE_HIT_RATE_GOOD = 50  # Percentage threshold for good cache performance
+    # Percentage calculations
+    PERCENTAGE_MULTIPLIER = 100  # Standard percentage calculation multiplier
+class Estimations:
+    """Estimation constants for progress tracking and metrics."""
+    COMMITS_PER_WEEK = 50  # Estimated commits per week for progress bars
+    DEFAULT_ESTIMATE = 100  # Default estimate when actual count unavailable

gitflow_analytics/core/cache.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import Any, Optional, Union
 import git
 from sqlalchemy import and_
+from ..constants import BatchSizes, CacheTTL, Thresholds
 from ..models.database import (
     CachedCommit,
     Database,
@@ -27,7 +28,10 @@ class GitAnalysisCache:
     """Cache for Git analysis results."""
     def __init__(
-        self, cache_dir: Union[Path, str], ttl_hours: int = 168, batch_size: int = 1000
+        self,
+        cache_dir: Union[Path, str],
+        ttl_hours: int = CacheTTL.ONE_WEEK_HOURS,
+        batch_size: int = BatchSizes.COMMIT_STORAGE,
     ) -> None:
         """Initialize cache with SQLite backend and configurable batch size.
@@ -37,7 +41,7 @@ class GitAnalysisCache:
         Args:
             cache_dir: Directory for cache database
-            ttl_hours: Time-to-live for cache entries in hours
+            ttl_hours: Time-to-live for cache entries in hours (default: 168 = 1 week)
             batch_size: Default batch size for bulk operations (default: 1000)
         """
         self.cache_dir = Path(cache_dir)  # Ensure it's a Path object
@@ -643,7 +647,7 @@ class GitAnalysisCache:
         # Performance insights
         if stats["hit_rate_percent"] > 80:
             print("   ✅ Excellent cache performance!")
-        elif stats["hit_rate_percent"] > 50:
+        elif stats["hit_rate_percent"] > Thresholds.CACHE_HIT_RATE_GOOD:
             print("   👍 Good cache performance")
         elif stats["total_requests"] > 0:
             print("   ⚠️  Consider clearing stale cache entries")

gitflow-analytics 3.3.0__py3-none-any.whl → 3.5.2__py3-none-any.whl

gitflow-analytics 3.3.0py3-none-any.whl → 3.5.2py3-none-any.whl