PyPI - gitflow-analytics - Versions diffs - 3.3.0__py3-none-any.whl → 3.5.2__py3-none-any.whl - Mend

gitflow-analytics 3.3.0py3-none-any.whl → 3.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/cli.py +517 -15
gitflow_analytics/cli_wizards/__init__.py +10 -0
gitflow_analytics/cli_wizards/install_wizard.py +1181 -0
gitflow_analytics/cli_wizards/run_launcher.py +433 -0
gitflow_analytics/config/__init__.py +3 -0
gitflow_analytics/config/aliases.py +306 -0
gitflow_analytics/config/loader.py +35 -1
gitflow_analytics/config/schema.py +13 -0
gitflow_analytics/constants.py +75 -0
gitflow_analytics/core/cache.py +7 -3
gitflow_analytics/core/data_fetcher.py +66 -30
gitflow_analytics/core/git_timeout_wrapper.py +6 -4
gitflow_analytics/core/progress.py +2 -4
gitflow_analytics/core/subprocess_git.py +31 -5
gitflow_analytics/identity_llm/analysis_pass.py +13 -3
gitflow_analytics/identity_llm/analyzer.py +14 -2
gitflow_analytics/identity_llm/models.py +7 -1
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +5 -3
gitflow_analytics/security/config.py +6 -6
gitflow_analytics/security/extractors/dependency_checker.py +14 -14
gitflow_analytics/security/extractors/secret_detector.py +8 -14
gitflow_analytics/security/extractors/vulnerability_scanner.py +9 -9
gitflow_analytics/security/llm_analyzer.py +10 -10
gitflow_analytics/security/security_analyzer.py +17 -17
gitflow_analytics/tui/screens/analysis_progress_screen.py +1 -1
gitflow_analytics/ui/progress_display.py +36 -29
gitflow_analytics/verify_activity.py +23 -26
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/METADATA +1 -1
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/RECORD +34 -31
gitflow_analytics/security/reports/__init__.py +0 -5
gitflow_analytics/security/reports/security_report.py +0 -358
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/WHEEL +0 -0
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/top_level.txt +0 -0

gitflow_analytics/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """Version information for gitflow-analytics."""
-__version__ = "3.3.0"
+__version__ = "3.5.2"
 __version_info__ = tuple(int(x) for x in __version__.split("."))

gitflow_analytics/cli.py CHANGED Viewed

@@ -33,6 +33,8 @@ from .reports.weekly_trends_writer import WeeklyTrendsWriter
 from .training.pipeline import CommitClassificationTrainer
 from .ui.progress_display import create_progress_display
+logger = logging.getLogger(__name__)
 class RichHelpFormatter:
     """Rich help formatter for enhanced CLI help display."""
@@ -48,8 +50,19 @@ class RichHelpFormatter:
         return help_text
     @staticmethod
-    def format_option_help(description: str, default: Any = None, choices: list = None) -> str:
-        """Format option help with default and choices."""
+    def format_option_help(
+        description: str, default: Optional[str] = None, choices: Optional[list[str]] = None
+    ) -> str:
+        """Format option help with default and choices.
+        Args:
+            description: Option description text
+            default: Default value to display (optional)
+            choices: List of valid choices (optional)
+        Returns:
+            Formatted help text string
+        """
         help_text = description
         if default is not None:
             help_text += f" [default: {default}]"
@@ -281,12 +294,18 @@ class TUIAsDefaultGroup(click.Group):
         if args and args[0].startswith("-"):
             # Check if TUI dependencies are available
             try:
-                import textual
+                import importlib.util
+                textual_spec = importlib.util.find_spec("textual")
                 # TUI is available - route to TUI
-                new_args = ["tui"] + args
-                return super().parse_args(ctx, new_args)
-            except ImportError:
+                if textual_spec is not None:
+                    new_args = ["tui"] + args
+                    return super().parse_args(ctx, new_args)
+                else:
+                    # TUI not available - fallback to analyze
+                    new_args = ["analyze"] + args
+                    return super().parse_args(ctx, new_args)
+            except (ImportError, ValueError):
                 # TUI not available - fallback to analyze
                 new_args = ["analyze"] + args
                 return super().parse_args(ctx, new_args)
@@ -323,11 +342,28 @@ def cli(ctx: click.Context) -> None:
     \b
     COMMANDS:
       analyze    Analyze repositories and generate reports (default)
+      install    Interactive installation wizard
+      run        Interactive launcher with preferences
+      aliases    Generate developer identity aliases using LLM
       identities Manage developer identity resolution
       train      Train ML models for commit classification
       fetch      Fetch external data (GitHub PRs, PM tickets)
       help       Show detailed help and documentation
+    \b
+    EXAMPLES:
+      # Interactive installation
+      gitflow-analytics install
+      # Interactive launcher
+      gitflow-analytics run -c config.yaml
+      # Generate developer aliases
+      gitflow-analytics aliases -c config.yaml --apply
+      # Run analysis
+      gitflow-analytics -c config.yaml --weeks 4
     \b
     For detailed command help: gitflow-analytics COMMAND --help
     For documentation: https://github.com/yourusername/gitflow-analytics
@@ -1066,10 +1102,7 @@ def analyze(
                 )
                 # Extract commits from the raw data
-                if raw_data and raw_data.get("commits"):
-                    commits = raw_data["commits"]
-                else:
-                    commits = []
+                commits = raw_data["commits"] if raw_data and raw_data.get("commits") else []
                 all_commits.extend(commits)
             if not all_commits:
@@ -4460,6 +4493,114 @@ def merge_identity(config: Path, dev1: str, dev2: str) -> None:
         sys.exit(1)
+@cli.command(name="run")
+@click.option(
+    "--config",
+    "-c",
+    type=click.Path(exists=True, path_type=Path),
+    help="Path to configuration file (optional, will search for default)",
+)
+def run_launcher(config: Optional[Path]) -> None:
+    """Interactive launcher for gitflow-analytics.
+    \b
+    This interactive command guides you through:
+      • Repository selection (multi-select)
+      • Analysis period configuration
+      • Cache management
+      • Identity analysis preferences
+      • Preferences storage
+    \b
+    EXAMPLES:
+      # Launch interactive mode
+      gitflow-analytics run
+      # Launch with specific config
+      gitflow-analytics run -c config.yaml
+    \b
+    PREFERENCES:
+      Your selections are saved to the launcher section
+      in your configuration file for future use.
+    \b
+    WORKFLOW:
+      1. Select repositories to analyze
+      2. Choose analysis period (weeks)
+      3. Configure cache clearing
+      4. Set identity analysis preference
+      5. Run analysis with your selections
+    """
+    try:
+        from .cli_wizards.run_launcher import run_interactive_launcher
+        success = run_interactive_launcher(config_path=config)
+        sys.exit(0 if success else 1)
+    except Exception as e:
+        click.echo(f"❌ Launcher failed: {e}", err=True)
+        logger.error(f"Launcher error: {type(e).__name__}")
+        sys.exit(1)
+@cli.command(name="install")
+@click.option(
+    "--output-dir",
+    type=click.Path(path_type=Path),
+    default=".",
+    help="Directory for config files (default: current directory)",
+)
+@click.option(
+    "--skip-validation",
+    is_flag=True,
+    help="Skip credential validation (for testing)",
+)
+def install_command(output_dir: Path, skip_validation: bool) -> None:
+    """Interactive installation wizard for GitFlow Analytics.
+    \b
+    This wizard will guide you through setting up GitFlow Analytics:
+    • GitHub credentials and repository configuration
+    • Optional JIRA integration
+    • Optional AI-powered insights (OpenRouter/ChatGPT)
+    • Analysis settings and defaults
+    \b
+    EXAMPLES:
+      # Run installation wizard in current directory
+      gitflow-analytics install
+      # Install to specific directory
+      gitflow-analytics install --output-dir ./my-config
+    \b
+    The wizard will:
+    1. Validate all credentials before saving
+    2. Generate config.yaml and .env files
+    3. Set secure permissions on .env (0600)
+    4. Update .gitignore if in a git repository
+    5. Test the configuration
+    6. Optionally run initial analysis
+    \b
+    SECURITY NOTES:
+      • .env file contains sensitive credentials
+      • Never commit .env to version control
+      • File permissions set to owner-only (0600)
+    """
+    try:
+        from .cli_wizards.install_wizard import InstallWizard
+        wizard = InstallWizard(output_dir=Path(output_dir), skip_validation=skip_validation)
+        success = wizard.run()
+        sys.exit(0 if success else 1)
+    except Exception as e:
+        click.echo(f"❌ Installation failed: {e}", err=True)
+        sys.exit(1)
 @cli.command(name="discover-storypoint-fields")
 @click.option(
     "--config",
@@ -4672,16 +4813,41 @@ def identities(config: Path, weeks: int, apply: bool) -> None:
             # Show suggestions
             click.echo(f"\n⚠️  Found {len(identity_result.clusters)} potential identity clusters:")
-            # Display all mappings
+            # Display all mappings with confidence scores
             if suggested_config.get("analysis", {}).get("manual_identity_mappings"):
                 click.echo("\n📋 Suggested identity mappings:")
-                for mapping in suggested_config["analysis"]["manual_identity_mappings"]:
-                    canonical = mapping["canonical_email"]
+                for i, mapping in enumerate(
+                    suggested_config["analysis"]["manual_identity_mappings"], 1
+                ):
+                    canonical = mapping["primary_email"]
                     aliases = mapping.get("aliases", [])
+                    confidence = mapping.get("confidence", 0.0)
+                    reasoning = mapping.get("reasoning", "")
+                    # Color-code based on confidence (90%+ threshold)
+                    if confidence >= 0.95:
+                        confidence_indicator = "🟢"  # Very high confidence
+                    elif confidence >= 0.90:
+                        confidence_indicator = "🟡"  # High confidence (above threshold)
+                    else:
+                        confidence_indicator = "🟠"  # Medium confidence (below threshold)
                     if aliases:
-                        click.echo(f"   {canonical}")
+                        click.echo(
+                            f"\n   {confidence_indicator} Cluster {i} "
+                            f"(Confidence: {confidence:.1%}):"
+                        )
+                        click.echo(f"      Primary: {canonical}")
                         for alias in aliases:
-                            click.echo(f"     → {alias}")
+                            click.echo(f"      Alias:   {alias}")
+                        # Show reasoning if available
+                        if reasoning:
+                            # Truncate reasoning for display
+                            display_reasoning = (
+                                reasoning if len(reasoning) <= 80 else reasoning[:77] + "..."
+                            )
+                            click.echo(f"      Reason:  {display_reasoning}")
             # Check for bot exclusions
             if suggested_config.get("exclude", {}).get("authors"):
@@ -4708,6 +4874,342 @@ def identities(config: Path, weeks: int, apply: bool) -> None:
         sys.exit(1)
+@cli.command(name="aliases")
+@click.option(
+    "--config",
+    "-c",
+    type=click.Path(exists=True, path_type=Path),
+    required=True,
+    help="Path to configuration file",
+)
+@click.option(
+    "--output",
+    "-o",
+    type=click.Path(path_type=Path),
+    help="Output path for aliases.yaml (default: same dir as config)",
+)
+@click.option(
+    "--confidence-threshold",
+    type=float,
+    default=0.9,
+    help="Minimum confidence threshold for LLM matches (default: 0.9)",
+)
+@click.option(
+    "--apply", is_flag=True, help="Automatically update config to use generated aliases file"
+)
+@click.option(
+    "--weeks", type=int, default=12, help="Number of weeks of history to analyze (default: 12)"
+)
+def aliases_command(
+    config: Path,
+    output: Optional[Path],
+    confidence_threshold: float,
+    apply: bool,
+    weeks: int,
+) -> None:
+    """Generate developer identity aliases using LLM analysis.
+    \b
+    This command analyzes commit history and uses LLM to identify
+    developer aliases (same person with different email addresses).
+    Results are saved to aliases.yaml which can be shared across
+    multiple config files.
+    \b
+    EXAMPLES:
+        # Generate aliases and review
+        gitflow-analytics aliases -c config.yaml
+        # Generate and apply automatically
+        gitflow-analytics aliases -c config.yaml --apply
+        # Save to specific location
+        gitflow-analytics aliases -c config.yaml -o ~/shared/aliases.yaml
+        # Use longer history for better accuracy
+        gitflow-analytics aliases -c config.yaml --weeks 24
+    \b
+    CONFIGURATION:
+        Aliases are saved to aliases.yaml and can be referenced in
+        multiple config files for consistent identity resolution.
+    """
+    try:
+        from .config.aliases import AliasesManager, DeveloperAlias
+        from .identity_llm.analyzer import LLMIdentityAnalyzer
+        # Load configuration
+        click.echo(f"\n📋 Loading configuration from {config}...")
+        cfg = ConfigLoader.load(config)
+        # Determine output path
+        if not output:
+            output = config.parent / "aliases.yaml"
+        click.echo(f"🔍 Analyzing developer identities (last {weeks} weeks)")
+        click.echo(f"📊 Confidence threshold: {confidence_threshold:.0%}")
+        click.echo(f"💾 Output: {output}\n")
+        # Set up date range
+        end_date = datetime.now(timezone.utc)
+        start_date = end_date - timedelta(weeks=weeks)
+        # Analyze repositories to collect commits
+        click.echo("📥 Fetching commit history...\n")
+        cache = GitAnalysisCache(cfg.cache.directory)
+        # Prepare ML categorization config for analyzer
+        ml_config = None
+        if hasattr(cfg.analysis, "ml_categorization"):
+            ml_config = {
+                "enabled": cfg.analysis.ml_categorization.enabled,
+                "min_confidence": cfg.analysis.ml_categorization.min_confidence,
+                "semantic_weight": cfg.analysis.ml_categorization.semantic_weight,
+                "file_pattern_weight": cfg.analysis.ml_categorization.file_pattern_weight,
+                "hybrid_threshold": cfg.analysis.ml_categorization.hybrid_threshold,
+                "cache_duration_days": cfg.analysis.ml_categorization.cache_duration_days,
+                "batch_size": cfg.analysis.ml_categorization.batch_size,
+                "enable_caching": cfg.analysis.ml_categorization.enable_caching,
+                "spacy_model": cfg.analysis.ml_categorization.spacy_model,
+            }
+        # LLM classification configuration
+        llm_config = {
+            "enabled": cfg.analysis.llm_classification.enabled,
+            "api_key": cfg.analysis.llm_classification.api_key,
+            "model": cfg.analysis.llm_classification.model,
+            "confidence_threshold": cfg.analysis.llm_classification.confidence_threshold,
+            "max_tokens": cfg.analysis.llm_classification.max_tokens,
+            "temperature": cfg.analysis.llm_classification.temperature,
+            "timeout_seconds": cfg.analysis.llm_classification.timeout_seconds,
+            "cache_duration_days": cfg.analysis.llm_classification.cache_duration_days,
+            "enable_caching": cfg.analysis.llm_classification.enable_caching,
+            "max_daily_requests": cfg.analysis.llm_classification.max_daily_requests,
+            "domain_terms": cfg.analysis.llm_classification.domain_terms,
+        }
+        # Configure branch analysis
+        branch_analysis_config = {
+            "strategy": cfg.analysis.branch_analysis.strategy,
+            "max_branches_per_repo": cfg.analysis.branch_analysis.max_branches_per_repo,
+            "active_days_threshold": cfg.analysis.branch_analysis.active_days_threshold,
+            "include_main_branches": cfg.analysis.branch_analysis.include_main_branches,
+            "always_include_patterns": cfg.analysis.branch_analysis.always_include_patterns,
+            "always_exclude_patterns": cfg.analysis.branch_analysis.always_exclude_patterns,
+            "enable_progress_logging": cfg.analysis.branch_analysis.enable_progress_logging,
+            "branch_commit_limit": cfg.analysis.branch_analysis.branch_commit_limit,
+        }
+        analyzer = GitAnalyzer(
+            cache,
+            branch_mapping_rules=cfg.analysis.branch_mapping_rules,
+            allowed_ticket_platforms=getattr(
+                cfg.analysis, "ticket_platforms", ["jira", "github", "clickup", "linear"]
+            ),
+            exclude_paths=cfg.analysis.exclude_paths,
+            story_point_patterns=cfg.analysis.story_point_patterns,
+            ml_categorization_config=ml_config,
+            llm_config=llm_config,
+            branch_analysis_config=branch_analysis_config,
+        )
+        all_commits = []
+        # Get repositories to analyze
+        repositories = cfg.repositories if cfg.repositories else []
+        if not repositories:
+            click.echo("❌ No repositories configured", err=True)
+            sys.exit(1)
+        # Collect commits from all repositories
+        with click.progressbar(
+            repositories,
+            label="Analyzing repositories",
+            item_show_func=lambda r: r.name if r else "",
+        ) as repos:
+            for repo_config in repos:
+                try:
+                    if not repo_config.path.exists():
+                        continue
+                    # Fetch commits
+                    repo_commits = analyzer.analyze_repository(
+                        repo_config.path, start_date=start_date, branch=repo_config.branch
+                    )
+                    if repo_commits:
+                        all_commits.extend(repo_commits)
+                except Exception as e:
+                    click.echo(f"\n⚠️  Warning: Failed to analyze repository: {e}", err=True)
+                    continue
+        click.echo(f"\n✅ Collected {len(all_commits)} commits\n")
+        if not all_commits:
+            click.echo("❌ No commits found to analyze", err=True)
+            sys.exit(1)
+        # Initialize LLM identity analyzer
+        click.echo("🤖 Running LLM identity analysis...\n")
+        # Get OpenRouter API key from config
+        api_key = None
+        if cfg.chatgpt and cfg.chatgpt.api_key:
+            # Resolve environment variable if needed
+            api_key_value = cfg.chatgpt.api_key
+            if api_key_value.startswith("${") and api_key_value.endswith("}"):
+                var_name = api_key_value[2:-1]
+                api_key = os.getenv(var_name)
+            else:
+                api_key = api_key_value
+        if not api_key:
+            click.echo(
+                "⚠️  No OpenRouter API key configured - using heuristic analysis only", err=True
+            )
+        llm_analyzer = LLMIdentityAnalyzer(
+            api_key=api_key, confidence_threshold=confidence_threshold
+        )
+        # Run analysis
+        result = llm_analyzer.analyze_identities(all_commits)
+        click.echo("✅ Analysis complete:")
+        click.echo(f"   - Found {len(result.clusters)} identity clusters")
+        click.echo(f"   - {len(result.unresolved_identities)} unresolved identities")
+        click.echo(f"   - Method: {result.analysis_metadata.get('analysis_method', 'unknown')}\n")
+        # Create aliases manager and add clusters
+        aliases_mgr = AliasesManager(output)
+        # Load existing aliases if file exists
+        if output.exists():
+            click.echo(f"📂 Loading existing aliases from {output}...")
+            aliases_mgr.load()
+            existing_count = len(aliases_mgr.aliases)
+            click.echo(f"   Found {existing_count} existing aliases\n")
+        # Add new clusters
+        new_count = 0
+        updated_count = 0
+        for cluster in result.clusters:
+            # Check if this is a new or updated alias
+            existing = aliases_mgr.get_alias(cluster.canonical_email)
+            alias = DeveloperAlias(
+                name=cluster.preferred_display_name or cluster.canonical_name,
+                primary_email=cluster.canonical_email,
+                aliases=[a.email for a in cluster.aliases],
+                confidence=cluster.confidence,
+                reasoning=(
+                    cluster.reasoning[:200] if cluster.reasoning else ""
+                ),  # Truncate for readability
+            )
+            if existing:
+                updated_count += 1
+            else:
+                new_count += 1
+            aliases_mgr.add_alias(alias)
+        # Save aliases
+        click.echo("💾 Saving aliases...\n")
+        aliases_mgr.save()
+        click.echo(f"✅ Saved to {output}")
+        click.echo(f"   - New aliases: {new_count}")
+        click.echo(f"   - Updated aliases: {updated_count}")
+        click.echo(f"   - Total aliases: {len(aliases_mgr.aliases)}\n")
+        # Display summary
+        if aliases_mgr.aliases:
+            click.echo("📋 Generated Aliases:\n")
+            for alias in sorted(aliases_mgr.aliases, key=lambda a: a.primary_email):
+                name_display = (
+                    f"{alias.name} <{alias.primary_email}>" if alias.name else alias.primary_email
+                )
+                click.echo(f"  • {name_display}")
+                if alias.aliases:
+                    for alias_email in alias.aliases:
+                        click.echo(f"    → {alias_email}")
+                if alias.confidence < 1.0:
+                    confidence_color = (
+                        "green"
+                        if alias.confidence >= 0.9
+                        else "yellow" if alias.confidence >= 0.8 else "red"
+                    )
+                    click.echo("    Confidence: ", nl=False)
+                    click.secho(f"{alias.confidence:.0%}", fg=confidence_color)
+                click.echo()  # Blank line between aliases
+        # Apply to config if requested
+        if apply:
+            click.echo(f"🔄 Updating {config} to reference aliases file...\n")
+            # Read current config
+            with open(config) as f:
+                config_data = yaml.safe_load(f)
+            # Ensure analysis section exists
+            if "analysis" not in config_data:
+                config_data["analysis"] = {}
+            if "identity" not in config_data["analysis"]:
+                config_data["analysis"]["identity"] = {}
+            # Calculate relative path from config to aliases file
+            try:
+                rel_path = output.relative_to(config.parent)
+                config_data["analysis"]["identity"]["aliases_file"] = str(rel_path)
+            except ValueError:
+                # Not relative, use absolute
+                config_data["analysis"]["identity"]["aliases_file"] = str(output)
+            # Remove manual_mappings if present (now in aliases file)
+            if "manual_identity_mappings" in config_data["analysis"].get("identity", {}):
+                del config_data["analysis"]["identity"]["manual_identity_mappings"]
+                click.echo("   Removed inline manual_identity_mappings (now in aliases file)")
+            # Save updated config
+            with open(config, "w") as f:
+                yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
+            click.echo(f"✅ Updated {config}")
+            click.echo(
+                f"   Added: analysis.identity.aliases_file = "
+                f"{config_data['analysis']['identity']['aliases_file']}\n"
+            )
+        # Summary and next steps
+        click.echo("✨ Identity alias generation complete!\n")
+        if not apply:
+            click.echo("💡 Next steps:")
+            click.echo(f"   1. Review the aliases in {output}")
+            click.echo("   2. Update your config.yaml to reference the aliases file:")
+            click.echo("      analysis:")
+            click.echo("        identity:")
+            click.echo(f"          aliases_file: {output.name}")
+            click.echo("   3. Or run with --apply flag to update automatically\n")
+    except Exception as e:
+        click.echo(f"\n❌ Error generating aliases: {e}", err=True)
+        import traceback
+        if os.getenv("GITFLOW_DEBUG"):
+            traceback.print_exc()
+        sys.exit(1)
 @cli.command()
 @click.option(
     "--config",

gitflow_analytics/cli_wizards/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""CLI subpackage for GitFlow Analytics.
+This package contains CLI-related modules including the installation wizard
+and interactive launcher.
+"""
+from .install_wizard import InstallWizard
+from .run_launcher import InteractiveLauncher, run_interactive_launcher
+__all__ = ["InstallWizard", "InteractiveLauncher", "run_interactive_launcher"]

gitflow-analytics 3.3.0__py3-none-any.whl → 3.5.2__py3-none-any.whl

gitflow-analytics 3.3.0py3-none-any.whl → 3.5.2py3-none-any.whl