PyPI - gitflow-analytics - Versions diffs - 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4108 -350
gitflow_analytics/cli_rich.py +198 -48
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -508
gitflow_analytics/core/analyzer.py +1209 -98
gitflow_analytics/core/cache.py +1337 -29
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +363 -14
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +8 -1
gitflow_analytics/extractors/tickets.py +749 -11
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +175 -11
gitflow_analytics/integrations/jira_integration.py +461 -24
gitflow_analytics/integrations/orchestrator.py +124 -1
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +379 -20
gitflow_analytics/models/database.py +843 -53
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +9 -10
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
gitflow_analytics/qualitative/core/__init__.py +4 -4
gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
gitflow_analytics/qualitative/core/processor.py +381 -248
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +7 -7
gitflow_analytics/qualitative/models/schemas.py +155 -121
gitflow_analytics/qualitative/utils/__init__.py +4 -4
gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
gitflow_analytics/qualitative/utils/metrics.py +172 -158
gitflow_analytics/qualitative/utils/text_processing.py +146 -104
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +539 -14
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1676 -212
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2287 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +1 -1
gitflow_analytics/tui/app.py +129 -126
gitflow_analytics/tui/screens/__init__.py +3 -3
gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
gitflow_analytics/tui/screens/configuration_screen.py +154 -178
gitflow_analytics/tui/screens/loading_screen.py +100 -110
gitflow_analytics/tui/screens/main_screen.py +89 -72
gitflow_analytics/tui/screens/results_screen.py +305 -281
gitflow_analytics/tui/widgets/__init__.py +2 -2
gitflow_analytics/tui/widgets/data_table.py +67 -69
gitflow_analytics/tui/widgets/export_modal.py +76 -76
gitflow_analytics/tui/widgets/progress_widget.py +41 -46
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/reports/analytics_writer.py CHANGED Viewed

@@ -1,26 +1,232 @@
 """Advanced analytics report generation with percentage and qualitative metrics."""
 import csv
+import logging
+from collections import defaultdict
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import List, Dict, Any, Tuple
-from collections import defaultdict
-import pandas as pd
+from typing import Any, Dict, List, Tuple
 import numpy as np
+import pandas as pd
+# Get logger for this module
+logger = logging.getLogger(__name__)
 class AnalyticsReportGenerator:
     """Generate advanced analytics reports with percentage breakdowns and qualitative insights."""
-    def __init__(self, anonymize: bool = False):
+    def __init__(self, anonymize: bool = False, exclude_authors: list[str] = None, identity_resolver=None):
         """Initialize analytics report generator."""
         self.anonymize = anonymize
         self._anonymization_map = {}
         self._anonymous_counter = 0
+        self.exclude_authors = exclude_authors or []
+        self.identity_resolver = identity_resolver
+    def _filter_excluded_authors(self, data_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        """
+        Filter out excluded authors from any data list using canonical_id.
+        WHY: Bot exclusion happens in Phase 2 (reporting) instead of Phase 1 (data collection)
+        to ensure manual identity mappings work correctly. This allows the system to see
+        consolidated bot identities via canonical_id instead of just original author_email/author_name.
+        Args:
+            data_list: List of data dictionaries containing canonical_id field
+        Returns:
+            Filtered list with excluded authors removed
+        """
+        if not self.exclude_authors:
+            return data_list
+        logger.debug(f"DEBUG EXCLUSION: Starting filter with {len(self.exclude_authors)} excluded authors: {self.exclude_authors}")
+        logger.debug(f"DEBUG EXCLUSION: Filtering {len(data_list)} items from data list")
+        excluded_lower = [author.lower() for author in self.exclude_authors]
+        logger.debug(f"DEBUG EXCLUSION: Excluded authors (lowercase): {excluded_lower}")
+        filtered_data = []
+        excluded_count = 0
+        # Sample first 5 items to see data structure
+        for i, item in enumerate(data_list[:5]):
+            logger.debug(f"DEBUG EXCLUSION: Sample item {i}: canonical_id='{item.get('canonical_id', '')}', "
+                        f"author_email='{item.get('author_email', '')}', author_name='{item.get('author_name', '')}', "
+                        f"author='{item.get('author', '')}', primary_name='{item.get('primary_name', '')}', "
+                        f"name='{item.get('name', '')}', developer='{item.get('developer', '')}', "
+                        f"display_name='{item.get('display_name', '')}'")
+        for item in data_list:
+            canonical_id = item.get("canonical_id", "")
+            # Also check original author fields as fallback for data without canonical_id
+            author_email = item.get("author_email", "")
+            author_name = item.get("author_name", "")
+            # Check all possible author fields to ensure we catch every variation
+            author = item.get("author", "")
+            primary_name = item.get("primary_name", "")
+            name = item.get("name", "")
+            developer = item.get("developer", "")  # Common in analytics data
+            display_name = item.get("display_name", "")  # Common in some data structures
+            # Check canonical_id FIRST - this is the primary exclusion check
+            should_exclude = False
+            if canonical_id and canonical_id.lower() in excluded_lower:
+                should_exclude = True
+            # CRITICAL: Also check primary_email for manual mappings (e.g. bots mapped to bot@excluded.local)
+            elif item.get("primary_email", "") and item.get("primary_email", "").lower() in excluded_lower:
+                should_exclude = True
+            # Fall back to checking other fields only if canonical_id and primary_email don't match
+            elif not should_exclude:
+                should_exclude = (
+                    (author_email and author_email.lower() in excluded_lower) or
+                    (author_name and author_name.lower() in excluded_lower) or
+                    (author and author.lower() in excluded_lower) or
+                    (primary_name and primary_name.lower() in excluded_lower) or
+                    (name and name.lower() in excluded_lower) or
+                    (developer and developer.lower() in excluded_lower) or
+                    (display_name and display_name.lower() in excluded_lower)
+                )
+            if should_exclude:
+                excluded_count += 1
+                logger.debug(f"DEBUG EXCLUSION: EXCLUDING item - canonical_id='{canonical_id}', "
+                           f"primary_email='{item.get('primary_email', '')}', "
+                           f"author_email='{author_email}', author_name='{author_name}', author='{author}', "
+                           f"primary_name='{primary_name}', name='{name}', developer='{developer}', "
+                           f"display_name='{display_name}'")
+            else:
+                filtered_data.append(item)
+        logger.debug(f"DEBUG EXCLUSION: Excluded {excluded_count} items, kept {len(filtered_data)} items")
+        return filtered_data
+    def _get_canonical_display_name(self, canonical_id: str, fallback_name: str) -> str:
+        """
+        Get the canonical display name for a developer.
+        WHY: Manual identity mappings may have updated display names that aren't
+        reflected in the developer_stats data passed to report generators. This
+        method ensures we get the most current display name from the identity resolver.
+        Args:
+            canonical_id: The canonical ID to get the display name for
+            fallback_name: The fallback name to use if identity resolver is not available
+        Returns:
+            The canonical display name or fallback name
+        """
+        if self.identity_resolver and canonical_id:
+            try:
+                canonical_name = self.identity_resolver.get_canonical_name(canonical_id)
+                if canonical_name and canonical_name != "Unknown":
+                    return canonical_name
+            except Exception as e:
+                logger.debug(f"Error getting canonical name for {canonical_id}: {e}")
+        return fallback_name
+    def _get_files_changed_count(self, commit: Dict[str, Any]) -> int:
+        """Safely extract files_changed count from commit data.
+        WHY: The files_changed field can be either an int (count) or list (file names).
+        This helper ensures we always get an integer count for calculations.
+        Args:
+            commit: Commit dictionary with files_changed field
+        Returns:
+            Integer count of files changed
+        """
+        files_changed = commit.get('files_changed', 0)
+        if isinstance(files_changed, int):
+            return files_changed
+        elif isinstance(files_changed, list):
+            return len(files_changed)
+        else:
+            # Fallback for unexpected types
+            logger.warning(f"Unexpected files_changed type: {type(files_changed)}, defaulting to 0")
+            return 0
+    def _log_datetime_comparison(self, dt1: datetime, dt2: datetime, operation: str, location: str) -> None:
+        """Log datetime comparison details for debugging timezone issues."""
+        logger.debug(f"Comparing dates in {location} ({operation}):")
+        logger.debug(f"  dt1: {dt1} (tzinfo: {dt1.tzinfo}, aware: {dt1.tzinfo is not None})")
+        logger.debug(f"  dt2: {dt2} (tzinfo: {dt2.tzinfo}, aware: {dt2.tzinfo is not None})")
+    def _safe_datetime_compare(self, dt1: datetime, dt2: datetime, operation: str, location: str) -> bool:
+        """Safely compare datetimes with logging and error handling."""
+        try:
+            self._log_datetime_comparison(dt1, dt2, operation, location)
+            if operation == 'lt':
+                result = dt1 < dt2
+            elif operation == 'gt':
+                result = dt1 > dt2
+            elif operation == 'le':
+                result = dt1 <= dt2
+            elif operation == 'ge':
+                result = dt1 >= dt2
+            elif operation == 'eq':
+                result = dt1 == dt2
+            else:
+                raise ValueError(f"Unknown operation: {operation}")
+            logger.debug(f"  Result: {result}")
+            return result
+        except TypeError as e:
+            logger.error(f"Timezone comparison error in {location}:")
+            logger.error(f"  dt1: {dt1} (type: {type(dt1)}, tzinfo: {getattr(dt1, 'tzinfo', 'N/A')})")
+            logger.error(f"  dt2: {dt2} (type: {type(dt2)}, tzinfo: {getattr(dt2, 'tzinfo', 'N/A')})")
+            logger.error(f"  Operation: {operation}")
+            logger.error(f"  Error: {e}")
+            # Import traceback for detailed error info
+            import traceback
+            logger.error(f"  Full traceback:\n{traceback.format_exc()}")
+            # Try to fix by making both timezone-aware in UTC
+            try:
+                if dt1.tzinfo is None:
+                    dt1 = dt1.replace(tzinfo=timezone.utc)
+                    logger.debug(f"  Fixed dt1 to UTC: {dt1}")
+                if dt2.tzinfo is None:
+                    dt2 = dt2.replace(tzinfo=timezone.utc)
+                    logger.debug(f"  Fixed dt2 to UTC: {dt2}")
+                # Retry comparison
+                if operation == 'lt':
+                    result = dt1 < dt2
+                elif operation == 'gt':
+                    result = dt1 > dt2
+                elif operation == 'le':
+                    result = dt1 <= dt2
+                elif operation == 'ge':
+                    result = dt1 >= dt2
+                elif operation == 'eq':
+                    result = dt1 == dt2
+                else:
+                    raise ValueError(f"Unknown operation: {operation}")
+                logger.info(f"  Fixed comparison result: {result}")
+                return result
+            except Exception as fix_error:
+                logger.error(f"  Failed to fix timezone issue: {fix_error}")
+                raise
     def generate_activity_distribution_report(self, commits: List[Dict[str, Any]],
                                             developer_stats: List[Dict[str, Any]],
                                             output_path: Path) -> Path:
         """Generate activity distribution report with percentage breakdowns."""
+        # Apply exclusion filtering in Phase 2
+        commits = self._filter_excluded_authors(commits)
+        developer_stats = self._filter_excluded_authors(developer_stats)
         # Build lookup maps
         dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
@@ -31,7 +237,7 @@ class AnalyticsReportGenerator:
             c.get('filtered_deletions', c.get('deletions', 0))
             for c in commits
         )
-        total_files = sum(c['files_changed'] for c in commits)
+        total_files = sum(self._get_files_changed_count(c) for c in commits)
         # Group by developer and project
         dev_project_activity = defaultdict(lambda: defaultdict(lambda: {
@@ -47,7 +253,16 @@ class AnalyticsReportGenerator:
                 commit.get('filtered_insertions', commit.get('insertions', 0)) +
                 commit.get('filtered_deletions', commit.get('deletions', 0))
             )
-            dev_project_activity[dev_id][project]['files'] += commit.get('filtered_files_changed', commit.get('files_changed', 0))
+            # Handle files_changed safely - could be int or list
+            files_changed = commit.get('filtered_files_changed')
+            if files_changed is None:
+                files_changed = self._get_files_changed_count(commit)
+            elif isinstance(files_changed, list):
+                files_changed = len(files_changed)
+            elif not isinstance(files_changed, int):
+                files_changed = 0
+            dev_project_activity[dev_id][project]['files'] += files_changed
             dev_project_activity[dev_id][project]['story_points'] += commit.get('story_points', 0) or 0
         # Build report data
@@ -55,7 +270,12 @@ class AnalyticsReportGenerator:
         for dev_id, projects in dev_project_activity.items():
             developer = dev_lookup.get(dev_id, {})
-            dev_name = self._anonymize_value(developer.get('primary_name', 'Unknown'), 'name')
+            dev_name = self._anonymize_value(
+                self._get_canonical_display_name(
+                    dev_id,
+                    developer.get('primary_name', 'Unknown')
+                ), 'name'
+            )
             # Calculate developer totals
             dev_total_commits = sum(p['commits'] for p in projects.values())
@@ -98,6 +318,9 @@ class AnalyticsReportGenerator:
                                            ticket_analysis: Dict[str, Any],
                                            output_path: Path) -> Path:
         """Generate qualitative insights and patterns report."""
+        # Apply exclusion filtering in Phase 2
+        commits = self._filter_excluded_authors(commits)
+        developer_stats = self._filter_excluded_authors(developer_stats)
         insights = []
         # Analyze commit patterns
@@ -127,10 +350,18 @@ class AnalyticsReportGenerator:
                                       output_path: Path,
                                       weeks: int = 12) -> Path:
         """Generate developer focus analysis showing concentration patterns and activity across all projects."""
+        # Apply exclusion filtering in Phase 2
+        commits = self._filter_excluded_authors(commits)
+        developer_stats = self._filter_excluded_authors(developer_stats)
         # Calculate week boundaries (timezone-aware to match commit timestamps)
         end_date = datetime.now(timezone.utc)
         start_date = end_date - timedelta(weeks=weeks)
+        logger.debug(f"Developer focus report date range:")
+        logger.debug(f"  start_date: {start_date} (tzinfo: {start_date.tzinfo})")
+        logger.debug(f"  end_date: {end_date} (tzinfo: {end_date.tzinfo})")
         # Build developer lookup
         dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
@@ -149,7 +380,12 @@ class AnalyticsReportGenerator:
         for dev in developer_stats:
             dev_id = dev['canonical_id']
-            dev_name = self._anonymize_value(dev['primary_name'], 'name')
+            dev_name = self._anonymize_value(
+                self._get_canonical_display_name(
+                    dev_id,
+                    dev['primary_name']
+                ), 'name'
+            )
             # Get developer's commits
             dev_commits = [c for c in commits if c.get('canonical_id') == dev_id]
@@ -164,6 +400,10 @@ class AnalyticsReportGenerator:
             commit_hours = []
             for commit in dev_commits:
+                # Log commit processing
+                logger.debug(f"Processing commit for developer {dev_name}: {commit.get('hash', 'unknown')[:8]}")
+                logger.debug(f"  timestamp: {commit['timestamp']} (tzinfo: {getattr(commit['timestamp'], 'tzinfo', 'N/A')})")
                 # Project distribution
                 project_key = commit.get('project_key', 'UNKNOWN')
                 projects[project_key] += 1
@@ -182,8 +422,10 @@ class AnalyticsReportGenerator:
                 # Commit size
                 commit_sizes.append(lines_changed)
-                # Time of day
-                if hasattr(commit['timestamp'], 'hour'):
+                # Time of day (use local hour if available, fallback to UTC)
+                if 'local_hour' in commit:
+                    commit_hours.append(commit['local_hour'])
+                elif hasattr(commit['timestamp'], 'hour'):
                     commit_hours.append(commit['timestamp'].hour)
             # Calculate metrics
@@ -276,12 +518,284 @@ class AnalyticsReportGenerator:
         return output_path
+    def generate_weekly_trends_report(self, commits: List[Dict[str, Any]],
+                                    developer_stats: List[Dict[str, Any]],
+                                    output_path: Path,
+                                    weeks: int = 12) -> Path:
+        """Generate weekly trends analysis showing changes in activity patterns."""
+        # Apply exclusion filtering in Phase 2
+        commits = self._filter_excluded_authors(commits)
+        developer_stats = self._filter_excluded_authors(developer_stats)
+        # Calculate week boundaries
+        end_date = datetime.now(timezone.utc)
+        start_date = end_date - timedelta(weeks=weeks)
+        # Build developer lookup
+        dev_lookup = {dev['canonical_id']: dev for dev in developer_stats}
+        # Initialize data structures
+        weekly_data = defaultdict(lambda: {
+            'commits': 0,
+            'developers': set(),
+            'projects': defaultdict(int),
+            'lines_changed': 0,
+            'story_points': 0
+        })
+        developer_weekly = defaultdict(lambda: defaultdict(lambda: {
+            'commits': 0, 'lines': 0, 'story_points': 0
+        }))
+        project_weekly = defaultdict(lambda: defaultdict(lambda: {
+            'commits': 0, 'lines': 0, 'developers': set(), 'story_points': 0
+        }))
+        # Process commits
+        for commit in commits:
+            week_start = self._get_week_start(commit['timestamp'])
+            week_key = week_start.strftime('%Y-%m-%d')
+            # Overall weekly metrics
+            weekly_data[week_key]['commits'] += 1
+            weekly_data[week_key]['developers'].add(commit.get('canonical_id'))
+            weekly_data[week_key]['projects'][commit.get('project_key', 'UNKNOWN')] += 1
+            lines = (
+                commit.get('filtered_insertions', commit.get('insertions', 0)) +
+                commit.get('filtered_deletions', commit.get('deletions', 0))
+            )
+            weekly_data[week_key]['lines_changed'] += lines
+            weekly_data[week_key]['story_points'] += commit.get('story_points', 0) or 0
+            # Developer-specific weekly data
+            dev_id = commit.get('canonical_id')
+            developer_weekly[dev_id][week_key]['commits'] += 1
+            developer_weekly[dev_id][week_key]['lines'] += lines
+            developer_weekly[dev_id][week_key]['story_points'] += commit.get('story_points', 0) or 0
+            # Project-specific weekly data
+            project = commit.get('project_key', 'UNKNOWN')
+            project_weekly[project][week_key]['commits'] += 1
+            project_weekly[project][week_key]['lines'] += lines
+            project_weekly[project][week_key]['developers'].add(dev_id)
+            project_weekly[project][week_key]['story_points'] += commit.get('story_points', 0) or 0
+        # Convert to rows for CSV
+        rows = []
+        sorted_weeks = sorted(weekly_data.keys())
+        # Track developer and project trends
+        dev_activity_changes = defaultdict(list)  # dev_id -> list of weekly changes
+        project_activity_changes = defaultdict(list)  # project -> list of weekly changes
+        for i, week in enumerate(sorted_weeks):
+            data = weekly_data[week]
+            # Calculate week-over-week changes
+            prev_week = sorted_weeks[i-1] if i > 0 else None
+            commits_change = 0
+            developers_change = 0
+            if prev_week:
+                prev_data = weekly_data[prev_week]
+                commits_change = data['commits'] - prev_data['commits']
+                developers_change = len(data['developers']) - len(prev_data['developers'])
+            # Top project and developer this week
+            top_project = max(data['projects'].items(), key=lambda x: x[1])[0] if data['projects'] else 'NONE'
+            # Find top developer this week
+            top_dev_id = None
+            top_dev_commits = 0
+            for dev_id in data['developers']:
+                dev_commits = developer_weekly[dev_id][week]['commits']
+                if dev_commits > top_dev_commits:
+                    top_dev_commits = dev_commits
+                    top_dev_id = dev_id
+            top_dev_name = self._anonymize_value(
+                self._get_canonical_display_name(
+                    top_dev_id,
+                    dev_lookup.get(top_dev_id, {}).get('primary_name', 'Unknown')
+                ), 'name'
+            ) if top_dev_id else 'None'
+            # Calculate developer trends for active developers this week
+            dev_trend_summary = []
+            for dev_id in data['developers']:
+                dev_data = developer_weekly[dev_id][week]
+                prev_dev_data = developer_weekly[dev_id].get(prev_week, {'commits': 0}) if prev_week else {'commits': 0}
+                change = dev_data['commits'] - prev_dev_data['commits']
+                if change != 0:
+                    dev_name = self._anonymize_value(
+                        self._get_canonical_display_name(
+                            dev_id,
+                            dev_lookup.get(dev_id, {}).get('primary_name', 'Unknown')
+                        ), 'name'
+                    )
+                    dev_activity_changes[dev_name].append(change)
+                    if abs(change) >= 3:  # Significant changes only
+                        dev_trend_summary.append(f"{dev_name}({'+' if change > 0 else ''}{change})")
+            # Calculate project trends
+            project_trend_summary = []
+            for project, count in data['projects'].items():
+                prev_count = weekly_data[prev_week]['projects'].get(project, 0) if prev_week else 0
+                change = count - prev_count
+                if change != 0:
+                    project_activity_changes[project].append(change)
+                    if abs(change) >= 3:  # Significant changes only
+                        project_trend_summary.append(f"{project}({'+' if change > 0 else ''}{change})")
+            row = {
+                'week_start': week,
+                'commits': data['commits'],
+                'active_developers': len(data['developers']),
+                'active_projects': len(data['projects']),
+                'lines_changed': data['lines_changed'],
+                'story_points': data['story_points'],
+                'commits_change': commits_change,
+                'developers_change': developers_change,
+                'top_project': top_project,
+                'top_developer': top_dev_name,
+                'avg_commits_per_dev': round(data['commits'] / max(len(data['developers']), 1), 1),
+                'avg_lines_per_commit': round(data['lines_changed'] / max(data['commits'], 1), 1),
+                'developer_trends': '; '.join(dev_trend_summary[:5]) if dev_trend_summary else 'stable',
+                'project_trends': '; '.join(project_trend_summary[:5]) if project_trend_summary else 'stable'
+            }
+            rows.append(row)
+        # Write main CSV
+        df = pd.DataFrame(rows)
+        df.to_csv(output_path, index=False)
+        # Also generate detailed developer trends CSV with weekly columns
+        dev_trends_path = output_path.parent / f'developer_trends_{output_path.stem.split("_")[-1]}.csv'
+        dev_trend_rows = []
+        # Build developer activity by week
+        for dev_id, weekly_commits in developer_weekly.items():
+            dev_info = dev_lookup.get(dev_id, {})
+            dev_name = self._anonymize_value(
+                self._get_canonical_display_name(
+                    dev_id,
+                    dev_info.get('primary_name', 'Unknown')
+                ), 'name'
+            )
+            # Calculate summary statistics
+            weekly_values = []
+            for week in sorted_weeks:
+                commits = weekly_commits.get(week, {}).get('commits', 0)
+                weekly_values.append(commits)
+            # Only include developers with any activity
+            if sum(weekly_values) > 0:
+                # Calculate trend metrics
+                changes = []
+                for i in range(1, len(weekly_values)):
+                    changes.append(weekly_values[i] - weekly_values[i-1])
+                avg_change = sum(changes) / len(changes) if changes else 0
+                volatility = np.std(changes) if len(changes) > 1 else 0
+                trend = 'increasing' if avg_change > 1 else 'decreasing' if avg_change < -1 else 'stable'
+                row = {
+                    'developer': dev_name,
+                    'total_commits': sum(weekly_values),
+                    'avg_weekly_commits': round(sum(weekly_values) / len(weekly_values), 1),
+                    'avg_weekly_change': round(avg_change, 1),
+                    'volatility': round(volatility, 1),
+                    'trend': trend,
+                    'total_weeks_active': len([v for v in weekly_values if v > 0]),
+                    'max_week': max(weekly_values),
+                    'min_week': min([v for v in weekly_values if v > 0]) if any(v > 0 for v in weekly_values) else 0
+                }
+                # Add weekly columns
+                for i, week in enumerate(sorted_weeks):
+                    week_label = f'week_{i+1}_{week}'
+                    row[week_label] = weekly_values[i]
+                dev_trend_rows.append(row)
+        if dev_trend_rows:
+            dev_trends_df = pd.DataFrame(dev_trend_rows)
+            # Sort by total commits to show most active developers first
+            dev_trends_df.sort_values('total_commits', ascending=False, inplace=True)
+            dev_trends_df.to_csv(dev_trends_path, index=False)
+        # Also generate detailed project trends CSV with weekly columns
+        proj_trends_path = output_path.parent / f'project_trends_{output_path.stem.split("_")[-1]}.csv'
+        proj_trend_rows = []
+        # Build project activity by week
+        for project, weekly_commits in project_weekly.items():
+            # Calculate summary statistics
+            weekly_values = []
+            weekly_developers = []
+            for week in sorted_weeks:
+                commits = weekly_commits.get(week, {}).get('commits', 0)
+                weekly_values.append(commits)
+                # Count unique developers for this project this week
+                devs = weekly_commits.get(week, {}).get('developers', set())
+                weekly_developers.append(len(devs))
+            # Only include projects with any activity
+            if sum(weekly_values) > 0:
+                # Calculate trend metrics
+                changes = []
+                for i in range(1, len(weekly_values)):
+                    changes.append(weekly_values[i] - weekly_values[i-1])
+                avg_change = sum(changes) / len(changes) if changes else 0
+                volatility = np.std(changes) if len(changes) > 1 else 0
+                trend = 'growing' if avg_change > 2 else 'shrinking' if avg_change < -2 else 'stable'
+                row = {
+                    'project': project,
+                    'total_commits': sum(weekly_values),
+                    'avg_weekly_commits': round(sum(weekly_values) / len(weekly_values), 1),
+                    'avg_weekly_developers': round(sum(weekly_developers) / len(weekly_developers), 1),
+                    'avg_weekly_change': round(avg_change, 1),
+                    'volatility': round(volatility, 1),
+                    'trend': trend,
+                    'total_weeks_active': len([v for v in weekly_values if v > 0]),
+                    'max_week': max(weekly_values),
+                    'min_week': min([v for v in weekly_values if v > 0]) if any(v > 0 for v in weekly_values) else 0
+                }
+                # Add weekly columns for commits
+                for i, week in enumerate(sorted_weeks):
+                    week_label = f'week_{i+1}_{week}'
+                    row[week_label] = weekly_values[i]
+                # Add weekly columns for developer count
+                for i, week in enumerate(sorted_weeks):
+                    week_label = f'devs_week_{i+1}'
+                    row[week_label] = weekly_developers[i]
+                proj_trend_rows.append(row)
+        if proj_trend_rows:
+            proj_trends_df = pd.DataFrame(proj_trend_rows)
+            # Sort by total commits to show most active projects first
+            proj_trends_df.sort_values('total_commits', ascending=False, inplace=True)
+            proj_trends_df.to_csv(proj_trends_path, index=False)
+        return output_path
     def _analyze_commit_patterns(self, commits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Analyze patterns in commit data."""
         insights = []
-        # Time-based patterns
-        commit_hours = [c['timestamp'].hour for c in commits if hasattr(c['timestamp'], 'hour')]
+        # Time-based patterns (use local hour if available)
+        commit_hours = []
+        for c in commits:
+            if 'local_hour' in c:
+                commit_hours.append(c['local_hour'])
+            elif hasattr(c['timestamp'], 'hour'):
+                commit_hours.append(c['timestamp'].hour)
         if commit_hours:
             peak_hour = max(set(commit_hours), key=commit_hours.count)
             insights.append({
@@ -410,7 +924,11 @@ class AnalyticsReportGenerator:
         insights = []
         # File change patterns
-        file_changes = [c['files_changed'] for c in commits if c['files_changed'] > 0]
+        file_changes = []
+        for c in commits:
+            files_count = self._get_files_changed_count(c)
+            if files_count > 0:
+                file_changes.append(files_count)
         if file_changes:
             avg_files = np.mean(file_changes)
@@ -442,18 +960,25 @@ class AnalyticsReportGenerator:
     def _get_week_start(self, date: datetime) -> datetime:
         """Get Monday of the week for a given date."""
+        logger.debug(f"Getting week start for date: {date} (tzinfo: {getattr(date, 'tzinfo', 'N/A')})")
         # Ensure consistent timezone handling - keep timezone info
         if hasattr(date, 'tzinfo') and date.tzinfo is not None:
             # Keep timezone-aware but ensure it's UTC
             if date.tzinfo != timezone.utc:
                 date = date.astimezone(timezone.utc)
+                logger.debug(f"  Converted to UTC: {date}")
         else:
             # Convert naive datetime to UTC timezone-aware
             date = date.replace(tzinfo=timezone.utc)
+            logger.debug(f"  Made timezone-aware: {date}")
         days_since_monday = date.weekday()
         monday = date - timedelta(days=days_since_monday)
-        return monday.replace(hour=0, minute=0, second=0, microsecond=0)
+        result = monday.replace(hour=0, minute=0, second=0, microsecond=0)
+        logger.debug(f"  Week start result: {result} (tzinfo: {result.tzinfo})")
+        return result
     def _anonymize_value(self, value: str, field_type: str) -> str:
         """Anonymize a value if anonymization is enabled."""

gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl