PyPI - gitflow-analytics - Versions diffs - 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl - Mend

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4158 -350
gitflow_analytics/cli_rich.py +198 -48
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +905 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +444 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -508
gitflow_analytics/core/analyzer.py +1209 -98
gitflow_analytics/core/cache.py +1337 -29
gitflow_analytics/core/data_fetcher.py +1285 -0
gitflow_analytics/core/identity.py +363 -14
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +8 -1
gitflow_analytics/extractors/tickets.py +749 -11
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +175 -11
gitflow_analytics/integrations/jira_integration.py +461 -24
gitflow_analytics/integrations/orchestrator.py +124 -1
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +379 -20
gitflow_analytics/models/database.py +843 -53
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +9 -10
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
gitflow_analytics/qualitative/core/__init__.py +4 -4
gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
gitflow_analytics/qualitative/core/processor.py +381 -248
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +7 -7
gitflow_analytics/qualitative/models/schemas.py +155 -121
gitflow_analytics/qualitative/utils/__init__.py +4 -4
gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
gitflow_analytics/qualitative/utils/metrics.py +172 -158
gitflow_analytics/qualitative/utils/text_processing.py +146 -104
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +539 -14
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1676 -212
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2287 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +1 -1
gitflow_analytics/tui/app.py +129 -126
gitflow_analytics/tui/screens/__init__.py +3 -3
gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
gitflow_analytics/tui/screens/configuration_screen.py +154 -178
gitflow_analytics/tui/screens/loading_screen.py +100 -110
gitflow_analytics/tui/screens/main_screen.py +89 -72
gitflow_analytics/tui/screens/results_screen.py +305 -281
gitflow_analytics/tui/widgets/__init__.py +2 -2
gitflow_analytics/tui/widgets/data_table.py +67 -69
gitflow_analytics/tui/widgets/export_modal.py +76 -76
gitflow_analytics/tui/widgets/progress_widget.py +41 -46
gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0

gitflow_analytics/reports/branch_health_writer.py ADDED Viewed

@@ -0,0 +1,322 @@
+"""Branch health report generation for GitFlow Analytics."""
+import csv
+import logging
+from io import StringIO
+from pathlib import Path
+from typing import Any
+import pandas as pd
+logger = logging.getLogger(__name__)
+class BranchHealthReportGenerator:
+    """Generate branch health reports in CSV and markdown formats."""
+    def __init__(self):
+        """Initialize branch health report generator."""
+        pass
+    def generate_csv_report(
+        self, branch_health_metrics: dict[str, dict[str, Any]], output_path: Path
+    ) -> Path:
+        """Generate CSV report for branch health metrics.
+        Args:
+            branch_health_metrics: Dictionary mapping repo names to their branch health metrics
+            output_path: Path where the CSV should be written
+        Returns:
+            Path to the generated CSV file
+        """
+        rows = []
+        for repo_name, metrics in branch_health_metrics.items():
+            # Add summary row for the repository
+            summary = metrics.get("summary", {})
+            health = metrics.get("health_indicators", {})
+            summary_row = {
+                "repository": repo_name,
+                "branch_name": "[SUMMARY]",
+                "total_branches": summary.get("total_branches", 0),
+                "active_branches": summary.get("active_branches", 0),
+                "stale_branches": summary.get("stale_branches", 0),
+                "long_lived_branches": summary.get("long_lived_branches", 0),
+                "overall_health": health.get("overall_health", "unknown"),
+                "stale_percentage": health.get("stale_branch_percentage", 0),
+                "branch_creation_rate_weekly": summary.get("branch_creation_rate_per_week", 0),
+                "average_branch_age_days": summary.get("average_branch_age_days", 0),
+                "average_commits_per_branch": summary.get("average_commits_per_branch", 0),
+            }
+            rows.append(summary_row)
+            # Add individual branch rows
+            branches = metrics.get("branches", {})
+            for branch_name, branch_data in branches.items():
+                branch_row = {
+                    "repository": repo_name,
+                    "branch_name": branch_name,
+                    "age_days": branch_data.get("age_days", 0),
+                    "is_stale": branch_data.get("is_stale", False),
+                    "is_merged": branch_data.get("is_merged", False),
+                    "total_commits": branch_data.get("total_commits", 0),
+                    "unique_authors": branch_data.get("unique_authors", 0),
+                    "ahead_of_main": branch_data.get("ahead_of_main", 0),
+                    "behind_main": branch_data.get("behind_main", 0),
+                    "divergence_score": branch_data.get("divergence_score", 0),
+                    "health_score": branch_data.get("health_score", 0),
+                    "latest_activity": branch_data.get("latest_activity", ""),
+                    "daily_commit_average": branch_data.get("commit_frequency", {}).get(
+                        "daily_average", 0
+                    ),
+                }
+                rows.append(branch_row)
+        # Write CSV
+        if rows:
+            df = pd.DataFrame(rows)
+            df.to_csv(output_path, index=False)
+        else:
+            # Write empty CSV with headers
+            with open(output_path, "w", newline="") as f:
+                writer = csv.DictWriter(
+                    f,
+                    fieldnames=[
+                        "repository",
+                        "branch_name",
+                        "total_branches",
+                        "active_branches",
+                        "stale_branches",
+                        "long_lived_branches",
+                        "overall_health",
+                        "stale_percentage",
+                        "branch_creation_rate_weekly",
+                        "average_branch_age_days",
+                        "average_commits_per_branch",
+                        "age_days",
+                        "is_stale",
+                        "is_merged",
+                        "total_commits",
+                        "unique_authors",
+                        "ahead_of_main",
+                        "behind_main",
+                        "divergence_score",
+                        "health_score",
+                        "latest_activity",
+                        "daily_commit_average",
+                    ],
+                )
+                writer.writeheader()
+        return output_path
+    def generate_markdown_section(self, branch_health_metrics: dict[str, dict[str, Any]]) -> str:
+        """Generate markdown section for branch health to include in narrative reports.
+        Args:
+            branch_health_metrics: Dictionary mapping repo names to their branch health metrics
+        Returns:
+            Markdown formatted string with branch health insights
+        """
+        if not branch_health_metrics:
+            return ""
+        report = StringIO()
+        report.write("\n## Branch Health Analysis\n\n")
+        # Overall summary across all repositories
+        total_repos = len(branch_health_metrics)
+        total_branches_all = sum(
+            m.get("summary", {}).get("total_branches", 0) for m in branch_health_metrics.values()
+        )
+        total_stale_all = sum(
+            m.get("summary", {}).get("stale_branches", 0) for m in branch_health_metrics.values()
+        )
+        report.write("### Overview\n\n")
+        report.write(
+            f"Analyzed **{total_repos} repositories** with a total of **{total_branches_all} branches**.\n\n"
+        )
+        if total_stale_all > 0:
+            stale_pct = (
+                (total_stale_all / total_branches_all * 100) if total_branches_all > 0 else 0
+            )
+            report.write(
+                f"⚠️ Found **{total_stale_all} stale branches** ({stale_pct:.1f}% of total)\n\n"
+            )
+        # Repository breakdown
+        report.write("### Repository Branch Health\n\n")
+        for repo_name, metrics in branch_health_metrics.items():
+            summary = metrics.get("summary", {})
+            health = metrics.get("health_indicators", {})
+            # Repository header
+            health_emoji = self._get_health_emoji(health.get("overall_health", "unknown"))
+            report.write(f"#### {repo_name} {health_emoji}\n\n")
+            # Key metrics
+            report.write(f"- **Total Branches**: {summary.get('total_branches', 0)}\n")
+            report.write(f"- **Active**: {summary.get('active_branches', 0)}\n")
+            report.write(f"- **Stale**: {summary.get('stale_branches', 0)}\n")
+            report.write(f"- **Long-lived**: {summary.get('long_lived_branches', 0)}\n")
+            report.write(
+                f"- **Average Age**: {summary.get('average_branch_age_days', 0):.1f} days\n"
+            )
+            report.write(
+                f"- **Creation Rate**: {summary.get('branch_creation_rate_per_week', 0):.1f} branches/week\n"
+            )
+            report.write(
+                f"- **Health Status**: {health.get('overall_health', 'unknown').title()}\n\n"
+            )
+            # Top unhealthy branches
+            branches = metrics.get("branches", {})
+            unhealthy_branches = [
+                (name, data)
+                for name, data in branches.items()
+                if data.get("health_score", 100) < 60 and not data.get("is_merged", False)
+            ]
+            if unhealthy_branches:
+                report.write("**Branches Needing Attention**:\n")
+                # Sort by health score (lowest first)
+                unhealthy_branches.sort(key=lambda x: x[1].get("health_score", 100))
+                for branch_name, branch_data in unhealthy_branches[:5]:  # Top 5
+                    age = branch_data.get("age_days", 0)
+                    behind = branch_data.get("behind_main", 0)
+                    score = branch_data.get("health_score", 0)
+                    issues = []
+                    if age > 30:
+                        issues.append(f"{age} days old")
+                    if behind > 50:
+                        issues.append(f"{behind} commits behind")
+                    report.write(f"- `{branch_name}` (score: {score:.0f}) - {', '.join(issues)}\n")
+                if len(unhealthy_branches) > 5:
+                    report.write(f"- ...and {len(unhealthy_branches) - 5} more\n")
+                report.write("\n")
+        # Recommendations section
+        report.write("### Recommendations\n\n")
+        all_recommendations = []
+        for metrics in branch_health_metrics.values():
+            all_recommendations.extend(metrics.get("recommendations", []))
+        # Deduplicate and prioritize recommendations
+        unique_recommendations = []
+        seen = set()
+        for rec in all_recommendations:
+            # Create a simplified key for deduplication
+            key = rec.split()[0]  # Use emoji as key
+            if key not in seen:
+                seen.add(key)
+                unique_recommendations.append(rec)
+        if unique_recommendations:
+            for rec in unique_recommendations[:5]:  # Top 5 recommendations
+                report.write(f"- {rec}\n")
+        else:
+            report.write("- ✅ All repositories show healthy branch management practices\n")
+        report.write("\n")
+        # Best practices reminder
+        report.write("### Best Practices (2025 Standards)\n\n")
+        report.write("- 🎯 **Elite teams** maintain <3% rework rate and <26 hour cycle times\n")
+        report.write(
+            "- 📏 **Small PRs** (<200 lines) correlate with better quality and faster reviews\n"
+        )
+        report.write(
+            "- 🔄 **Frequent integration** reduces merge conflicts and improves deployment readiness\n"
+        )
+        report.write(
+            "- 🧹 **Regular cleanup** of merged and stale branches keeps repositories manageable\n"
+        )
+        return report.getvalue()
+    def _get_health_emoji(self, health_status: str) -> str:
+        """Get emoji for health status."""
+        emoji_map = {
+            "excellent": "🟢",
+            "good": "🟢",
+            "fair": "🟡",
+            "poor": "🔴",
+            "unknown": "⚪",
+        }
+        return emoji_map.get(health_status.lower(), "⚪")
+    def generate_detailed_branch_report(
+        self, branch_health_metrics: dict[str, dict[str, Any]], output_path: Path
+    ) -> Path:
+        """Generate detailed branch-by-branch CSV report.
+        Args:
+            branch_health_metrics: Dictionary mapping repo names to their branch health metrics
+            output_path: Path where the CSV should be written
+        Returns:
+            Path to the generated CSV file
+        """
+        rows = []
+        for repo_name, metrics in branch_health_metrics.items():
+            branches = metrics.get("branches", {})
+            main_branch = metrics.get("main_branch", "main")
+            for branch_name, branch_data in branches.items():
+                # Skip main branch in detailed report
+                if branch_name == main_branch:
+                    continue
+                freq = branch_data.get("commit_frequency", {})
+                row = {
+                    "repository": repo_name,
+                    "branch": branch_name,
+                    "age_days": branch_data.get("age_days", 0),
+                    "health_score": round(branch_data.get("health_score", 0), 1),
+                    "status": self._get_branch_status(branch_data),
+                    "total_commits": branch_data.get("total_commits", 0),
+                    "unique_authors": branch_data.get("unique_authors", 0),
+                    "commits_ahead": branch_data.get("ahead_of_main", 0),
+                    "commits_behind": branch_data.get("behind_main", 0),
+                    "divergence_total": branch_data.get("divergence_score", 0),
+                    "daily_commit_avg": round(freq.get("daily_average", 0), 2),
+                    "weekly_commit_avg": round(freq.get("weekly_average", 0), 2),
+                    "latest_activity": branch_data.get("latest_activity", ""),
+                    "is_merged": branch_data.get("is_merged", False),
+                    "is_stale": branch_data.get("is_stale", False),
+                }
+                rows.append(row)
+        # Sort by repository and health score
+        rows.sort(key=lambda x: (x["repository"], x["health_score"]))
+        # Write CSV
+        if rows:
+            df = pd.DataFrame(rows)
+            df.to_csv(output_path, index=False)
+        return output_path
+    def _get_branch_status(self, branch_data: dict[str, Any]) -> str:
+        """Determine branch status based on metrics."""
+        if branch_data.get("is_merged", False):
+            return "merged"
+        elif branch_data.get("is_stale", False):
+            return "stale"
+        elif branch_data.get("age_days", 0) > 14:
+            return "long-lived"
+        else:
+            return "active"

gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.11py3-none-any.whl