PyPI - gitflow-analytics - Versions diffs - 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

gitflow_analytics/__init__.py +11 -11
gitflow_analytics/_version.py +2 -2
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4490 -378
gitflow_analytics/cli_rich.py +503 -0
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -398
gitflow_analytics/core/analyzer.py +1320 -172
gitflow_analytics/core/branch_mapper.py +132 -132
gitflow_analytics/core/cache.py +1554 -175
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +571 -185
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/base.py +13 -11
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +77 -59
gitflow_analytics/extractors/tickets.py +841 -89
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +258 -87
gitflow_analytics/integrations/jira_integration.py +572 -123
gitflow_analytics/integrations/orchestrator.py +206 -82
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +542 -179
gitflow_analytics/models/database.py +986 -59
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +29 -0
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
gitflow_analytics/qualitative/core/__init__.py +13 -0
gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
gitflow_analytics/qualitative/core/processor.py +673 -0
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +25 -0
gitflow_analytics/qualitative/models/schemas.py +306 -0
gitflow_analytics/qualitative/utils/__init__.py +13 -0
gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
gitflow_analytics/qualitative/utils/metrics.py +361 -0
gitflow_analytics/qualitative/utils/text_processing.py +285 -0
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +550 -18
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1700 -216
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2289 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +5 -0
gitflow_analytics/tui/app.py +724 -0
gitflow_analytics/tui/screens/__init__.py +8 -0
gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
gitflow_analytics/tui/screens/configuration_screen.py +523 -0
gitflow_analytics/tui/screens/loading_screen.py +348 -0
gitflow_analytics/tui/screens/main_screen.py +321 -0
gitflow_analytics/tui/screens/results_screen.py +722 -0
gitflow_analytics/tui/widgets/__init__.py +7 -0
gitflow_analytics/tui/widgets/data_table.py +255 -0
gitflow_analytics/tui/widgets/export_modal.py +301 -0
gitflow_analytics/tui/widgets/progress_widget.py +187 -0
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/qualitative/utils/text_processing.py ADDED Viewed

@@ -0,0 +1,285 @@
+"""Text processing utilities for qualitative analysis."""
+import hashlib
+import re
+class TextProcessor:
+    """Utility class for text preprocessing and feature extraction.
+    This class provides common text processing operations needed across
+    the qualitative analysis pipeline, including normalization, feature
+    extraction, and similarity calculations.
+    """
+    def __init__(self) -> None:
+        """Initialize text processor with common patterns."""
+        # Common patterns for normalization
+        self.url_pattern = re.compile(r"https?://[^\s]+")
+        self.email_pattern = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")
+        self.hash_pattern = re.compile(r"\b[a-f0-9]{7,40}\b")  # Git hashes
+        self.ticket_pattern = re.compile(r"\b(?:JIRA|TICKET|ISSUE|BUG|TASK)-?\d+\b", re.IGNORECASE)
+        # Stop words for feature extraction
+        self.stop_words: set[str] = {
+            "the",
+            "a",
+            "an",
+            "and",
+            "or",
+            "but",
+            "in",
+            "on",
+            "at",
+            "to",
+            "for",
+            "of",
+            "with",
+            "by",
+            "is",
+            "are",
+            "was",
+            "were",
+            "be",
+            "been",
+            "being",
+            "have",
+            "has",
+            "had",
+            "do",
+            "does",
+            "did",
+            "will",
+            "would",
+            "could",
+            "should",
+            "may",
+            "might",
+            "can",
+            "this",
+            "that",
+            "these",
+            "those",
+        }
+    def normalize_message(self, message: str) -> str:
+        """Normalize commit message for consistent processing.
+        This method standardizes commit messages by removing URLs, emails,
+        hashes, and other variable content that doesn't contribute to
+        semantic classification.
+        Args:
+            message: Raw commit message
+        Returns:
+            Normalized message suitable for classification
+        """
+        if not message:
+            return ""
+        # Convert to lowercase for consistency
+        normalized = message.lower().strip()
+        # Remove URLs, emails, and hashes
+        normalized = self.url_pattern.sub("[URL]", normalized)
+        normalized = self.email_pattern.sub("[EMAIL]", normalized)
+        normalized = self.hash_pattern.sub("[HASH]", normalized)
+        # Normalize ticket references
+        normalized = self.ticket_pattern.sub("[TICKET]", normalized)
+        # Remove extra whitespace
+        normalized = re.sub(r"\s+", " ", normalized)
+        return normalized.strip()
+    def extract_keywords(self, text: str, min_length: int = 3) -> list[str]:
+        """Extract meaningful keywords from text.
+        Extracts keywords by removing stop words, punctuation, and short words
+        that are unlikely to be semantically meaningful.
+        Args:
+            text: Input text to extract keywords from
+            min_length: Minimum length for keywords
+        Returns:
+            List of extracted keywords
+        """
+        if not text:
+            return []
+        # Split into words and clean
+        words = re.findall(r"\b[a-zA-Z]+\b", text.lower())
+        # Filter stop words and short words
+        keywords = [
+            word for word in words if word not in self.stop_words and len(word) >= min_length
+        ]
+        return keywords
+    def create_semantic_fingerprint(self, message: str, files: list[str]) -> str:
+        """Create a semantic fingerprint for similarity matching.
+        Creates a hash-based fingerprint that captures the semantic essence
+        of a commit for pattern matching and caching.
+        Args:
+            message: Commit message
+            files: List of changed files
+        Returns:
+            Hex-encoded fingerprint string
+        """
+        # Normalize message for consistent fingerprinting
+        normalized_msg = self.normalize_message(message)
+        keywords = self.extract_keywords(normalized_msg)
+        # Extract file patterns (extensions, directories)
+        file_patterns = []
+        for file_path in files[:10]:  # Limit to prevent huge fingerprints
+            # Get file extension
+            if "." in file_path:
+                ext = file_path.split(".")[-1].lower()
+                file_patterns.append(f"ext:{ext}")
+            # Get directory patterns
+            parts = file_path.split("/")
+            if len(parts) > 1:
+                # First directory
+                file_patterns.append(f"dir:{parts[0]}")
+                # Last directory before file
+                if len(parts) > 2:
+                    file_patterns.append(f"dir:{parts[-2]}")
+        # Combine keywords and file patterns
+        semantic_elements = sorted(keywords[:10]) + sorted(set(file_patterns))
+        # Create fingerprint
+        fingerprint_text = "|".join(semantic_elements)
+        return hashlib.md5(fingerprint_text.encode()).hexdigest()
+    def calculate_message_similarity(self, msg1: str, msg2: str) -> float:
+        """Calculate semantic similarity between two commit messages.
+        Uses keyword overlap to estimate semantic similarity between
+        commit messages for grouping similar commits.
+        Args:
+            msg1: First commit message
+            msg2: Second commit message
+        Returns:
+            Similarity score between 0.0 and 1.0
+        """
+        if not msg1 or not msg2:
+            return 0.0
+        # Extract keywords from both messages
+        keywords1 = set(self.extract_keywords(self.normalize_message(msg1)))
+        keywords2 = set(self.extract_keywords(self.normalize_message(msg2)))
+        if not keywords1 or not keywords2:
+            return 0.0
+        # Calculate Jaccard similarity
+        intersection = len(keywords1.intersection(keywords2))
+        union = len(keywords1.union(keywords2))
+        return intersection / union if union > 0 else 0.0
+    def extract_file_patterns(self, files: list[str]) -> dict[str, int]:
+        """Extract file patterns for domain classification.
+        Analyzes file paths to extract patterns useful for determining
+        the technical domain of changes.
+        Args:
+            files: List of file paths
+        Returns:
+            Dictionary mapping pattern types to counts
+        """
+        patterns = {
+            "extensions": {},
+            "directories": {},
+            "special_files": {},
+        }
+        for file_path in files:
+            # File extensions
+            if "." in file_path:
+                ext = file_path.split(".")[-1].lower()
+                patterns["extensions"][ext] = patterns["extensions"].get(ext, 0) + 1
+            # Directory patterns
+            parts = file_path.split("/")
+            for part in parts[:-1]:  # Exclude filename
+                if part:  # Skip empty parts
+                    patterns["directories"][part] = patterns["directories"].get(part, 0) + 1
+            # Special files
+            filename = parts[-1].lower()
+            special_files = [
+                "dockerfile",
+                "makefile",
+                "readme",
+                "license",
+                "changelog",
+                "package.json",
+                "requirements.txt",
+                "setup.py",
+                "pom.xml",
+            ]
+            for special in special_files:
+                if special in filename:
+                    patterns["special_files"][special] = (
+                        patterns["special_files"].get(special, 0) + 1
+                    )
+        return patterns
+    def calculate_commit_complexity(
+        self, message: str, files: list[str], insertions: int, deletions: int
+    ) -> dict[str, float]:
+        """Calculate various complexity metrics for a commit.
+        Estimates the complexity of a commit based on message content,
+        file changes, and line changes to help with risk assessment.
+        Args:
+            message: Commit message
+            files: List of changed files
+            insertions: Number of lines inserted
+            deletions: Number of lines deleted
+        Returns:
+            Dictionary of complexity metrics
+        """
+        metrics = {}
+        # Message complexity (length, keywords)
+        metrics["message_length"] = len(message)
+        keywords = self.extract_keywords(message)
+        metrics["keyword_count"] = len(keywords)
+        metrics["message_complexity"] = min(1.0, len(keywords) / 10.0)
+        # File complexity
+        metrics["files_changed"] = len(files)
+        metrics["file_complexity"] = min(1.0, len(files) / 20.0)
+        # Line change complexity
+        total_changes = insertions + deletions
+        metrics["total_changes"] = total_changes
+        metrics["change_complexity"] = min(1.0, total_changes / 500.0)
+        # Overall complexity score (0.0 to 1.0)
+        metrics["overall_complexity"] = (
+            metrics["message_complexity"] * 0.2
+            + metrics["file_complexity"] * 0.3
+            + metrics["change_complexity"] * 0.5
+        )
+        return metrics

gitflow_analytics/reports/__init__.py CHANGED Viewed

@@ -0,0 +1,100 @@
+# Reports package
+# Legacy imports for backward compatibility
+from .analytics_writer import AnalyticsReportGenerator
+# New abstraction layer components
+from .base import (
+    BaseReportGenerator,
+    ChainedReportGenerator,
+    CompositeReportGenerator,
+    ReportData,
+    ReportMetadata,
+    ReportOutput,
+)
+from .csv_writer import CSVReportGenerator
+from .data_models import (
+    CommitData,
+    CommitType,
+    DeveloperIdentity,
+    DeveloperMetrics,
+    DORAMetrics,
+    ProjectMetrics,
+    PullRequestData,
+    ReportSummary,
+    TicketMetrics,
+    WeeklyMetrics,
+    WorkStyle,
+)
+from .factory import (
+    ReportBuilder,
+    ReportFactory,
+    create_multiple_reports,
+    create_report,
+    get_default_factory,
+)
+from .formatters import (
+    CSVFormatter,
+    DateFormatter,
+    JSONFormatter,
+    MarkdownFormatter,
+    MetricFormatter,
+    NumberFormatter,
+    TextFormatter,
+)
+from .html_generator import HTMLReportGenerator
+from .interfaces import ReportField, ReportFormat, ReportSchema, ReportType
+from .json_exporter import ComprehensiveJSONExporter
+from .narrative_writer import NarrativeReportGenerator
+__all__ = [
+    # Legacy generators
+    'CSVReportGenerator',
+    'AnalyticsReportGenerator',
+    'NarrativeReportGenerator',
+    'ComprehensiveJSONExporter',
+    'HTMLReportGenerator',
+    # Base classes
+    'BaseReportGenerator',
+    'CompositeReportGenerator',
+    'ChainedReportGenerator',
+    'ReportData',
+    'ReportOutput',
+    'ReportMetadata',
+    # Interfaces
+    'ReportFormat',
+    'ReportType',
+    'ReportField',
+    'ReportSchema',
+    # Factory
+    'ReportFactory',
+    'ReportBuilder',
+    'create_report',
+    'create_multiple_reports',
+    'get_default_factory',
+    # Formatters
+    'DateFormatter',
+    'NumberFormatter',
+    'TextFormatter',
+    'MarkdownFormatter',
+    'CSVFormatter',
+    'JSONFormatter',
+    'MetricFormatter',
+    # Data models
+    'CommitData',
+    'PullRequestData',
+    'DeveloperMetrics',
+    'ProjectMetrics',
+    'WeeklyMetrics',
+    'TicketMetrics',
+    'DORAMetrics',
+    'ReportSummary',
+    'DeveloperIdentity',
+    'CommitType',
+    'WorkStyle'
+]

gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl