PyPI - gitflow-analytics - Versions diffs - 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4108 -350
gitflow_analytics/cli_rich.py +198 -48
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -508
gitflow_analytics/core/analyzer.py +1209 -98
gitflow_analytics/core/cache.py +1337 -29
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +363 -14
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +8 -1
gitflow_analytics/extractors/tickets.py +749 -11
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +175 -11
gitflow_analytics/integrations/jira_integration.py +461 -24
gitflow_analytics/integrations/orchestrator.py +124 -1
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +379 -20
gitflow_analytics/models/database.py +843 -53
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +9 -10
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
gitflow_analytics/qualitative/core/__init__.py +4 -4
gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
gitflow_analytics/qualitative/core/processor.py +381 -248
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +7 -7
gitflow_analytics/qualitative/models/schemas.py +155 -121
gitflow_analytics/qualitative/utils/__init__.py +4 -4
gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
gitflow_analytics/qualitative/utils/metrics.py +172 -158
gitflow_analytics/qualitative/utils/text_processing.py +146 -104
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +539 -14
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1676 -212
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2287 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +1 -1
gitflow_analytics/tui/app.py +129 -126
gitflow_analytics/tui/screens/__init__.py +3 -3
gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
gitflow_analytics/tui/screens/configuration_screen.py +154 -178
gitflow_analytics/tui/screens/loading_screen.py +100 -110
gitflow_analytics/tui/screens/main_screen.py +89 -72
gitflow_analytics/tui/screens/results_screen.py +305 -281
gitflow_analytics/tui/widgets/__init__.py +2 -2
gitflow_analytics/tui/widgets/data_table.py +67 -69
gitflow_analytics/tui/widgets/export_modal.py +76 -76
gitflow_analytics/tui/widgets/progress_widget.py +41 -46
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/reports/weekly_trends_writer.py ADDED Viewed

@@ -0,0 +1,389 @@
+"""Weekly classification trends CSV report generation."""
+import logging
+from collections import defaultdict
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import pandas as pd
+logger = logging.getLogger(__name__)
+class WeeklyTrendsWriter:
+    """Generate weekly classification trends CSV reports.
+    WHY: Week-over-week classification trends provide insights into changing
+    development patterns, helping identify evolving team practices, seasonal
+    patterns, and the impact of process changes on development work types.
+    DESIGN DECISION: Generate separate developer and project trend reports
+    to allow analysis at different granularities. Include percentage changes
+    to highlight velocity and pattern shifts.
+    """
+    def __init__(self) -> None:
+        """Initialize weekly trends writer."""
+        self.classification_categories = [
+            'feature', 'bug_fix', 'refactor', 'documentation',
+            'maintenance', 'test', 'style', 'build', 'other'
+        ]
+    def generate_weekly_trends_reports(
+        self,
+        commits: List[Dict[str, Any]],
+        output_dir: Path,
+        weeks: int = 12,
+        date_suffix: str = ""
+    ) -> Dict[str, Path]:
+        """Generate both developer and project weekly trends reports.
+        WHY: Providing both perspectives allows analysis of individual developer
+        patterns as well as project-level trend analysis. This enables both
+        personal development tracking and project health monitoring.
+        Args:
+            commits: List of commit data with classifications and timestamps
+            output_dir: Directory to write CSV reports to
+            weeks: Number of weeks to analyze (for validation)
+            date_suffix: Date suffix for output filenames
+        Returns:
+            Dictionary mapping report type to output file paths
+        """
+        output_paths = {}
+        # Generate developer trends report
+        developer_trends_path = output_dir / f"developer_weekly_trends{date_suffix}.csv"
+        self._generate_developer_weekly_trends(commits, developer_trends_path, weeks)
+        output_paths['developer_trends'] = developer_trends_path
+        # Generate project trends report
+        project_trends_path = output_dir / f"project_weekly_trends{date_suffix}.csv"
+        self._generate_project_weekly_trends(commits, project_trends_path, weeks)
+        output_paths['project_trends'] = project_trends_path
+        logger.info(f"Generated weekly trends reports: {len(output_paths)} files")
+        return output_paths
+    def _generate_developer_weekly_trends(
+        self,
+        commits: List[Dict[str, Any]],
+        output_path: Path,
+        weeks: int
+    ) -> None:
+        """Generate developer weekly classification trends CSV.
+        WHY: Developer-level trends help identify individual development patterns,
+        skill progression, and changing work focus over time. This enables
+        targeted coaching and recognition of evolving expertise.
+        Args:
+            commits: List of commit data with developer and classification info
+            output_path: Path to write the CSV file
+            weeks: Number of weeks for trend analysis
+        """
+        # Group commits by developer and week
+        developer_weeks = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
+        # Find the date range for analysis
+        if not commits:
+            logger.warning("No commits provided for developer weekly trends analysis")
+            self._write_empty_developer_trends_csv(output_path)
+            return
+        # Sort commits by timestamp for consistent week calculation
+        sorted_commits = sorted(
+            [c for c in commits if c.get('timestamp')],
+            key=lambda x: x['timestamp'],
+            reverse=True  # Most recent first
+        )
+        if not sorted_commits:
+            logger.warning("No commits with timestamps for developer weekly trends analysis")
+            self._write_empty_developer_trends_csv(output_path)
+            return
+        # Calculate week boundaries
+        latest_date = sorted_commits[0]['timestamp']
+        if hasattr(latest_date, 'date'):
+            latest_date = latest_date.date()
+        # Group commits by developer, week, and classification
+        for commit in sorted_commits:
+            timestamp = commit.get('timestamp')
+            if not timestamp:
+                continue
+            # Get week number (0 = current week, 1 = last week, etc.)
+            if hasattr(timestamp, 'date'):
+                commit_date = timestamp.date()
+            else:
+                commit_date = timestamp
+            days_diff = (latest_date - commit_date).days
+            week_num = days_diff // 7
+            # Only include commits within the analysis period
+            if week_num >= weeks:
+                continue
+            # Extract developer info
+            developer = (
+                commit.get('canonical_id') or
+                commit.get('author_email') or
+                commit.get('author_name', 'Unknown')
+            )
+            # Get classification - try multiple possible fields
+            classification = self._get_commit_classification(commit)
+            # Increment count
+            developer_weeks[developer][week_num][classification] += 1
+        # Build DataFrame
+        rows = []
+        for developer, weeks_data in developer_weeks.items():
+            # Sort weeks in chronological order (most recent = week 0)
+            sorted_weeks = sorted(weeks_data.keys())
+            for i, week_num in enumerate(sorted_weeks):
+                week_data = weeks_data[week_num]
+                # Calculate week start date
+                week_start = latest_date - timedelta(days=(week_num * 7))
+                # Base row data
+                row = {
+                    'week_start': week_start.strftime('%Y-%m-%d'),
+                    'developer': developer,
+                    'week_number': week_num,
+                }
+                # Add counts for each classification category
+                total_commits = sum(week_data.values())
+                row['total_commits'] = total_commits
+                for category in self.classification_categories:
+                    count = week_data.get(category, 0)
+                    row[f'{category}_count'] = count
+                    # Calculate percentage change from previous week
+                    if i < len(sorted_weeks) - 1:  # Not the oldest week
+                        prev_week_num = sorted_weeks[i + 1]
+                        prev_week_data = weeks_data[prev_week_num]
+                        prev_count = prev_week_data.get(category, 0)
+                        if prev_count > 0:
+                            pct_change = ((count - prev_count) / prev_count) * 100
+                        elif count > 0:
+                            pct_change = 100.0  # New activity
+                        else:
+                            pct_change = 0.0
+                    else:
+                        pct_change = 0.0  # No previous data
+                    row[f'{category}_pct_change'] = round(pct_change, 1)
+                rows.append(row)
+        # Create DataFrame and sort by developer and week
+        df = pd.DataFrame(rows)
+        if not df.empty:
+            df = df.sort_values(['developer', 'week_number'])
+        # Write to CSV
+        df.to_csv(output_path, index=False)
+        logger.info(f"Generated developer weekly trends CSV: {output_path} ({len(df)} rows)")
+    def _generate_project_weekly_trends(
+        self,
+        commits: List[Dict[str, Any]],
+        output_path: Path,
+        weeks: int
+    ) -> None:
+        """Generate project weekly classification trends CSV.
+        WHY: Project-level trends reveal changing development patterns within
+        specific codebases, helping identify technical debt accumulation,
+        feature development cycles, and maintenance patterns.
+        Args:
+            commits: List of commit data with project and classification info
+            output_path: Path to write the CSV file
+            weeks: Number of weeks for trend analysis
+        """
+        # Group commits by project and week
+        project_weeks = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
+        # Find the date range for analysis
+        if not commits:
+            logger.warning("No commits provided for project weekly trends analysis")
+            self._write_empty_project_trends_csv(output_path)
+            return
+        # Sort commits by timestamp for consistent week calculation
+        sorted_commits = sorted(
+            [c for c in commits if c.get('timestamp')],
+            key=lambda x: x['timestamp'],
+            reverse=True  # Most recent first
+        )
+        if not sorted_commits:
+            logger.warning("No commits with timestamps for project weekly trends analysis")
+            self._write_empty_project_trends_csv(output_path)
+            return
+        # Calculate week boundaries
+        latest_date = sorted_commits[0]['timestamp']
+        if hasattr(latest_date, 'date'):
+            latest_date = latest_date.date()
+        # Group commits by project, week, and classification
+        for commit in sorted_commits:
+            timestamp = commit.get('timestamp')
+            if not timestamp:
+                continue
+            # Get week number (0 = current week, 1 = last week, etc.)
+            if hasattr(timestamp, 'date'):
+                commit_date = timestamp.date()
+            else:
+                commit_date = timestamp
+            days_diff = (latest_date - commit_date).days
+            week_num = days_diff // 7
+            # Only include commits within the analysis period
+            if week_num >= weeks:
+                continue
+            # Extract project info
+            project = commit.get('project_key', 'UNKNOWN')
+            # Get classification
+            classification = self._get_commit_classification(commit)
+            # Increment count
+            project_weeks[project][week_num][classification] += 1
+        # Build DataFrame
+        rows = []
+        for project, weeks_data in project_weeks.items():
+            # Sort weeks in chronological order (most recent = week 0)
+            sorted_weeks = sorted(weeks_data.keys())
+            for i, week_num in enumerate(sorted_weeks):
+                week_data = weeks_data[week_num]
+                # Calculate week start date
+                week_start = latest_date - timedelta(days=(week_num * 7))
+                # Base row data
+                row = {
+                    'week_start': week_start.strftime('%Y-%m-%d'),
+                    'project': project,
+                    'week_number': week_num,
+                }
+                # Add counts for each classification category
+                total_commits = sum(week_data.values())
+                row['total_commits'] = total_commits
+                for category in self.classification_categories:
+                    count = week_data.get(category, 0)
+                    row[f'{category}_count'] = count
+                    # Calculate percentage change from previous week
+                    if i < len(sorted_weeks) - 1:  # Not the oldest week
+                        prev_week_num = sorted_weeks[i + 1]
+                        prev_week_data = weeks_data[prev_week_num]
+                        prev_count = prev_week_data.get(category, 0)
+                        if prev_count > 0:
+                            pct_change = ((count - prev_count) / prev_count) * 100
+                        elif count > 0:
+                            pct_change = 100.0  # New activity
+                        else:
+                            pct_change = 0.0
+                    else:
+                        pct_change = 0.0  # No previous data
+                    row[f'{category}_pct_change'] = round(pct_change, 1)
+                rows.append(row)
+        # Create DataFrame and sort by project and week
+        df = pd.DataFrame(rows)
+        if not df.empty:
+            df = df.sort_values(['project', 'week_number'])
+        # Write to CSV
+        df.to_csv(output_path, index=False)
+        logger.info(f"Generated project weekly trends CSV: {output_path} ({len(df)} rows)")
+    def _get_commit_classification(self, commit: Dict[str, Any]) -> str:
+        """Extract commit classification from commit data.
+        WHY: Commits may have classification data in different fields depending
+        on the extraction method used (ML vs rule-based vs cached). This method
+        provides a consistent way to extract the classification.
+        DESIGN DECISION: Priority order for classification sources:
+        1. predicted_class (from ML classification)
+        2. category (from rule-based classification)
+        3. 'other' (fallback for unclassified commits)
+        Args:
+            commit: Commit data dictionary
+        Returns:
+            Classification category string
+        """
+        # Try ML classification first
+        if commit.get('predicted_class'):
+            return commit['predicted_class']
+        # Try rule-based classification
+        if commit.get('category'):
+            return commit['category']
+        # Try to extract from ticket extractor categorization
+        if 'classification' in commit:
+            return commit['classification']
+        # Fallback to 'other'
+        return 'other'
+    def _write_empty_developer_trends_csv(self, output_path: Path) -> None:
+        """Write an empty developer trends CSV with proper headers.
+        Args:
+            output_path: Path to write the empty CSV file
+        """
+        columns = ['week_start', 'developer', 'week_number', 'total_commits']
+        # Add count and percentage change columns for each category
+        for category in self.classification_categories:
+            columns.extend([f'{category}_count', f'{category}_pct_change'])
+        empty_df = pd.DataFrame(columns=columns)
+        empty_df.to_csv(output_path, index=False)
+        logger.info(f"Generated empty developer weekly trends CSV: {output_path}")
+    def _write_empty_project_trends_csv(self, output_path: Path) -> None:
+        """Write an empty project trends CSV with proper headers.
+        Args:
+            output_path: Path to write the empty CSV file
+        """
+        columns = ['week_start', 'project', 'week_number', 'total_commits']
+        # Add count and percentage change columns for each category
+        for category in self.classification_categories:
+            columns.extend([f'{category}_count', f'{category}_pct_change'])
+        empty_df = pd.DataFrame(columns=columns)
+        empty_df.to_csv(output_path, index=False)
+        logger.info(f"Generated empty project weekly trends CSV: {output_path}")

gitflow_analytics/training/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Training module for commit classification."""
+from .pipeline import CommitClassificationTrainer
+__all__ = ["CommitClassificationTrainer"]

gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl