PyPI - gitflow-analytics - Versions diffs - 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

gitflow_analytics/__init__.py +11 -11
gitflow_analytics/_version.py +2 -2
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4490 -378
gitflow_analytics/cli_rich.py +503 -0
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -398
gitflow_analytics/core/analyzer.py +1320 -172
gitflow_analytics/core/branch_mapper.py +132 -132
gitflow_analytics/core/cache.py +1554 -175
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +571 -185
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/base.py +13 -11
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +77 -59
gitflow_analytics/extractors/tickets.py +841 -89
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +258 -87
gitflow_analytics/integrations/jira_integration.py +572 -123
gitflow_analytics/integrations/orchestrator.py +206 -82
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +542 -179
gitflow_analytics/models/database.py +986 -59
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +29 -0
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
gitflow_analytics/qualitative/core/__init__.py +13 -0
gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
gitflow_analytics/qualitative/core/processor.py +673 -0
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +25 -0
gitflow_analytics/qualitative/models/schemas.py +306 -0
gitflow_analytics/qualitative/utils/__init__.py +13 -0
gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
gitflow_analytics/qualitative/utils/metrics.py +361 -0
gitflow_analytics/qualitative/utils/text_processing.py +285 -0
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +550 -18
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1700 -216
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2289 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +5 -0
gitflow_analytics/tui/app.py +724 -0
gitflow_analytics/tui/screens/__init__.py +8 -0
gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
gitflow_analytics/tui/screens/configuration_screen.py +523 -0
gitflow_analytics/tui/screens/loading_screen.py +348 -0
gitflow_analytics/tui/screens/main_screen.py +321 -0
gitflow_analytics/tui/screens/results_screen.py +722 -0
gitflow_analytics/tui/widgets/__init__.py +7 -0
gitflow_analytics/tui/widgets/data_table.py +255 -0
gitflow_analytics/tui/widgets/export_modal.py +301 -0
gitflow_analytics/tui/widgets/progress_widget.py +187 -0
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/qualitative/classifiers/llm/cache.py ADDED Viewed

@@ -0,0 +1,479 @@
+"""LLM-specific caching layer for classification results.
+This module provides persistent caching of LLM classification results
+to minimize API calls and reduce costs.
+WHY: LLM API calls are expensive and slow. Caching results for identical
+inputs dramatically reduces costs and improves performance.
+DESIGN DECISIONS:
+- Use SQLite for persistence and efficient lookups
+- Hash-based keys for fast matching
+- Configurable expiration for cache freshness
+- Statistics tracking for cache effectiveness
+- Support for cache warming and export
+"""
+import contextlib
+import hashlib
+import json
+import logging
+import sqlite3
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any, Optional
+logger = logging.getLogger(__name__)
+class LLMCache:
+    """SQLite-based cache for LLM classification results.
+    WHY: Persistent caching reduces API costs by 90%+ for repeated
+    classifications while maintaining result consistency.
+    """
+    def __init__(self, cache_path: Path, expiration_days: int = 90, max_cache_size_mb: int = 500):
+        """Initialize LLM cache.
+        Args:
+            cache_path: Path to SQLite cache database
+            expiration_days: Days before cache entries expire
+            max_cache_size_mb: Maximum cache size in megabytes
+        """
+        self.cache_path = cache_path
+        self.expiration_days = expiration_days
+        self.max_cache_size_mb = max_cache_size_mb
+        # Ensure cache directory exists
+        self.cache_path.parent.mkdir(parents=True, exist_ok=True)
+        # Initialize database
+        self._init_database()
+        # Track cache statistics
+        self.hits = 0
+        self.misses = 0
+        self.stores = 0
+    def _init_database(self) -> None:
+        """Initialize SQLite database with cache tables.
+        WHY: Structured database enables efficient lookups and
+        management of cached results.
+        """
+        with sqlite3.connect(self.cache_path) as conn:
+            # Main cache table
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS llm_cache (
+                    cache_key TEXT PRIMARY KEY,
+                    message_hash TEXT NOT NULL,
+                    files_hash TEXT NOT NULL,
+                    category TEXT NOT NULL,
+                    confidence REAL NOT NULL,
+                    method TEXT NOT NULL,
+                    reasoning TEXT,
+                    model TEXT,
+                    alternatives TEXT,  -- JSON array
+                    processing_time_ms REAL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL,
+                    access_count INTEGER DEFAULT 0,
+                    last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """
+            )
+            # Indices for efficient operations
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON llm_cache(expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_message_hash ON llm_cache(message_hash)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_created_at ON llm_cache(created_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_access_count ON llm_cache(access_count)")
+            # Metadata table for cache management
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS cache_metadata (
+                    key TEXT PRIMARY KEY,
+                    value TEXT,
+                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """
+            )
+            conn.commit()
+    def get(
+        self, message: str, files_changed: Optional[list[str]] = None
+    ) -> Optional[dict[str, Any]]:
+        """Get cached classification if available.
+        Args:
+            message: Commit message
+            files_changed: Optional list of changed files
+        Returns:
+            Cached classification result or None
+        """
+        cache_key, _, _ = self._generate_cache_key(message, files_changed or [])
+        try:
+            with sqlite3.connect(self.cache_path) as conn:
+                conn.row_factory = sqlite3.Row
+                cursor = conn.execute(
+                    """
+                    SELECT category, confidence, reasoning, model, alternatives,
+                           method, processing_time_ms
+                    FROM llm_cache
+                    WHERE cache_key = ? AND expires_at > datetime('now')
+                """,
+                    (cache_key,),
+                )
+                row = cursor.fetchone()
+                if row:
+                    # Update access statistics
+                    conn.execute(
+                        """
+                        UPDATE llm_cache
+                        SET access_count = access_count + 1,
+                            last_accessed = CURRENT_TIMESTAMP
+                        WHERE cache_key = ?
+                    """,
+                        (cache_key,),
+                    )
+                    conn.commit()
+                    self.hits += 1
+                    # Parse alternatives from JSON
+                    alternatives = []
+                    if row["alternatives"]:
+                        with contextlib.suppress(json.JSONDecodeError):
+                            alternatives = json.loads(row["alternatives"])
+                    return {
+                        "category": row["category"],
+                        "confidence": row["confidence"],
+                        "method": "cached",
+                        "reasoning": row["reasoning"] or "Cached result",
+                        "model": row["model"] or "unknown",
+                        "alternatives": alternatives,
+                        "processing_time_ms": row["processing_time_ms"] or 0.0,
+                        "cache_hit": True,
+                    }
+                self.misses += 1
+        except Exception as e:
+            logger.warning(f"Cache lookup failed: {e}")
+            self.misses += 1
+        return None
+    def store(
+        self, message: str, files_changed: Optional[list[str]], result: dict[str, Any]
+    ) -> bool:
+        """Store classification result in cache.
+        Args:
+            message: Commit message
+            files_changed: Optional list of changed files
+            result: Classification result to cache
+        Returns:
+            True if stored successfully
+        """
+        cache_key, message_hash, files_hash = self._generate_cache_key(message, files_changed or [])
+        try:
+            expires_at = datetime.now() + timedelta(days=self.expiration_days)
+            # Serialize alternatives
+            alternatives_json = json.dumps(result.get("alternatives", []))
+            with sqlite3.connect(self.cache_path) as conn:
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO llm_cache
+                    (cache_key, message_hash, files_hash, category, confidence,
+                     method, reasoning, model, alternatives, processing_time_ms, expires_at)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                    (
+                        cache_key,
+                        message_hash,
+                        files_hash,
+                        result.get("category", "maintenance"),
+                        result.get("confidence", 0.5),
+                        result.get("method", "llm"),
+                        result.get("reasoning", ""),
+                        result.get("model", ""),
+                        alternatives_json,
+                        result.get("processing_time_ms", 0.0),
+                        expires_at,
+                    ),
+                )
+                conn.commit()
+                self.stores += 1
+                # Check cache size and cleanup if needed
+                self._check_cache_size(conn)
+                return True
+        except Exception as e:
+            logger.warning(f"Cache storage failed: {e}")
+            return False
+    def _generate_cache_key(self, message: str, files_changed: list[str]) -> tuple[str, str, str]:
+        """Generate cache key components.
+        Args:
+            message: Commit message
+            files_changed: List of changed files
+        Returns:
+            Tuple of (cache_key, message_hash, files_hash)
+        """
+        # Normalize message
+        normalized_message = message.strip().lower()
+        message_hash = hashlib.md5(normalized_message.encode("utf-8")).hexdigest()
+        # Normalize and hash files
+        normalized_files = "|".join(sorted(f.lower() for f in files_changed))
+        files_hash = hashlib.md5(normalized_files.encode("utf-8")).hexdigest()
+        # Combined cache key
+        cache_key = f"{message_hash}:{files_hash}"
+        return cache_key, message_hash, files_hash
+    def _check_cache_size(self, conn: sqlite3.Connection) -> None:
+        """Check cache size and cleanup if needed.
+        WHY: Prevents cache from growing unbounded and consuming
+        excessive disk space.
+        Args:
+            conn: SQLite connection
+        """
+        # Get current database size
+        db_size_bytes = self.cache_path.stat().st_size if self.cache_path.exists() else 0
+        db_size_mb = db_size_bytes / (1024 * 1024)
+        if db_size_mb > self.max_cache_size_mb:
+            logger.info(
+                f"Cache size {db_size_mb:.1f}MB exceeds limit {self.max_cache_size_mb}MB, cleaning up"
+            )
+            # Remove expired entries first
+            deleted = self.cleanup_expired()
+            logger.info(f"Removed {deleted} expired entries")
+            # If still too large, remove least recently accessed
+            db_size_bytes = self.cache_path.stat().st_size
+            db_size_mb = db_size_bytes / (1024 * 1024)
+            if db_size_mb > self.max_cache_size_mb * 0.9:  # Keep 10% buffer
+                # Delete 20% of least recently accessed entries
+                cursor = conn.execute(
+                    """
+                    SELECT COUNT(*) FROM llm_cache
+                """
+                )
+                total_entries = cursor.fetchone()[0]
+                if total_entries > 0:
+                    to_delete = int(total_entries * 0.2)
+                    conn.execute(
+                        """
+                        DELETE FROM llm_cache
+                        WHERE cache_key IN (
+                            SELECT cache_key FROM llm_cache
+                            ORDER BY last_accessed ASC, access_count ASC
+                            LIMIT ?
+                        )
+                    """,
+                        (to_delete,),
+                    )
+                    conn.commit()
+                    logger.info(f"Removed {to_delete} least recently used entries")
+    def cleanup_expired(self) -> int:
+        """Remove expired cache entries.
+        Returns:
+            Number of entries removed
+        """
+        try:
+            with sqlite3.connect(self.cache_path) as conn:
+                cursor = conn.execute(
+                    """
+                    DELETE FROM llm_cache
+                    WHERE expires_at <= datetime('now')
+                """
+                )
+                conn.commit()
+                return cursor.rowcount
+        except Exception as e:
+            logger.warning(f"Cache cleanup failed: {e}")
+            return 0
+    def get_statistics(self) -> dict[str, Any]:
+        """Get cache usage statistics.
+        Returns:
+            Dictionary with cache statistics
+        """
+        try:
+            with sqlite3.connect(self.cache_path) as conn:
+                cursor = conn.execute(
+                    """
+                    SELECT
+                        COUNT(*) as total_entries,
+                        COUNT(CASE WHEN expires_at > datetime('now') THEN 1 END) as active_entries,
+                        COUNT(CASE WHEN expires_at <= datetime('now') THEN 1 END) as expired_entries,
+                        AVG(access_count) as avg_access_count,
+                        MAX(access_count) as max_access_count,
+                        COUNT(DISTINCT model) as unique_models
+                    FROM llm_cache
+                """
+                )
+                row = cursor.fetchone()
+                if row:
+                    # Calculate hit rate
+                    total_requests = self.hits + self.misses
+                    hit_rate = self.hits / total_requests if total_requests > 0 else 0.0
+                    # Get cache file size
+                    cache_size_mb = (
+                        self.cache_path.stat().st_size / (1024 * 1024)
+                        if self.cache_path.exists()
+                        else 0
+                    )
+                    return {
+                        "total_entries": row[0],
+                        "active_entries": row[1],
+                        "expired_entries": row[2],
+                        "avg_access_count": row[3] or 0,
+                        "max_access_count": row[4] or 0,
+                        "unique_models": row[5],
+                        "cache_hits": self.hits,
+                        "cache_misses": self.misses,
+                        "cache_stores": self.stores,
+                        "hit_rate": hit_rate,
+                        "cache_file_size_mb": cache_size_mb,
+                        "max_cache_size_mb": self.max_cache_size_mb,
+                    }
+        except Exception as e:
+            logger.warning(f"Failed to get cache statistics: {e}")
+        return {
+            "error": "Failed to retrieve statistics",
+            "cache_hits": self.hits,
+            "cache_misses": self.misses,
+            "cache_stores": self.stores,
+        }
+    def warm_cache(
+        self, classifications: list[tuple[str, Optional[list[str]], dict[str, Any]]]
+    ) -> int:
+        """Warm cache with pre-computed classifications.
+        WHY: Cache warming allows bulk import of classifications,
+        useful for migrations or pre-processing.
+        Args:
+            classifications: List of (message, files, result) tuples
+        Returns:
+            Number of entries added
+        """
+        added = 0
+        for message, files, result in classifications:
+            if self.store(message, files, result):
+                added += 1
+        logger.info(f"Warmed cache with {added} entries")
+        return added
+    def export_cache(self, output_file: Path) -> int:
+        """Export cache contents to JSON file.
+        Args:
+            output_file: Path to export file
+        Returns:
+            Number of entries exported
+        """
+        try:
+            with sqlite3.connect(self.cache_path) as conn:
+                conn.row_factory = sqlite3.Row
+                cursor = conn.execute(
+                    """
+                    SELECT * FROM llm_cache
+                    WHERE expires_at > datetime('now')
+                    ORDER BY access_count DESC
+                """
+                )
+                entries = []
+                for row in cursor:
+                    entry = dict(row)
+                    # Parse JSON fields
+                    if entry["alternatives"]:
+                        try:
+                            entry["alternatives"] = json.loads(entry["alternatives"])
+                        except json.JSONDecodeError:
+                            entry["alternatives"] = []
+                    entries.append(entry)
+                with open(output_file, "w") as f:
+                    json.dump(
+                        {
+                            "cache_entries": entries,
+                            "statistics": self.get_statistics(),
+                            "exported_at": datetime.now().isoformat(),
+                        },
+                        f,
+                        indent=2,
+                        default=str,
+                    )
+                logger.info(f"Exported {len(entries)} cache entries to {output_file}")
+                return len(entries)
+        except Exception as e:
+            logger.error(f"Cache export failed: {e}")
+            return 0
+    def clear(self) -> int:
+        """Clear all cache entries.
+        Returns:
+            Number of entries cleared
+        """
+        try:
+            with sqlite3.connect(self.cache_path) as conn:
+                cursor = conn.execute("DELETE FROM llm_cache")
+                conn.commit()
+                cleared = cursor.rowcount
+                # Reset statistics
+                self.hits = 0
+                self.misses = 0
+                self.stores = 0
+                logger.info(f"Cleared {cleared} cache entries")
+                return cleared
+        except Exception as e:
+            logger.error(f"Cache clear failed: {e}")
+            return 0

gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl