PyPI - gitflow-analytics - Versions diffs - 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

gitflow_analytics/__init__.py +11 -11
gitflow_analytics/_version.py +2 -2
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4490 -378
gitflow_analytics/cli_rich.py +503 -0
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -398
gitflow_analytics/core/analyzer.py +1320 -172
gitflow_analytics/core/branch_mapper.py +132 -132
gitflow_analytics/core/cache.py +1554 -175
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +571 -185
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/base.py +13 -11
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +77 -59
gitflow_analytics/extractors/tickets.py +841 -89
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +258 -87
gitflow_analytics/integrations/jira_integration.py +572 -123
gitflow_analytics/integrations/orchestrator.py +206 -82
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +542 -179
gitflow_analytics/models/database.py +986 -59
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +29 -0
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
gitflow_analytics/qualitative/core/__init__.py +13 -0
gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
gitflow_analytics/qualitative/core/processor.py +673 -0
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +25 -0
gitflow_analytics/qualitative/models/schemas.py +306 -0
gitflow_analytics/qualitative/utils/__init__.py +13 -0
gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
gitflow_analytics/qualitative/utils/metrics.py +361 -0
gitflow_analytics/qualitative/utils/text_processing.py +285 -0
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +550 -18
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1700 -216
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2289 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +5 -0
gitflow_analytics/tui/app.py +724 -0
gitflow_analytics/tui/screens/__init__.py +8 -0
gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
gitflow_analytics/tui/screens/configuration_screen.py +523 -0
gitflow_analytics/tui/screens/loading_screen.py +348 -0
gitflow_analytics/tui/screens/main_screen.py +321 -0
gitflow_analytics/tui/screens/results_screen.py +722 -0
gitflow_analytics/tui/widgets/__init__.py +7 -0
gitflow_analytics/tui/widgets/data_table.py +255 -0
gitflow_analytics/tui/widgets/export_modal.py +301 -0
gitflow_analytics/tui/widgets/progress_widget.py +187 -0
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py ADDED Viewed

@@ -0,0 +1,435 @@
+"""Cost tracking and management for LLM API usage.
+This module tracks API usage costs and provides warnings when
+approaching or exceeding cost thresholds.
+WHY: LLM API calls can be expensive. Tracking costs helps users
+monitor expenses and make informed decisions about usage.
+DESIGN DECISIONS:
+- Support multiple pricing models for different providers
+- Track costs at token level for accuracy
+- Provide cost warnings and limits
+- Support cost budgets and alerts
+- Export cost data for analysis
+"""
+import json
+import logging
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+logger = logging.getLogger(__name__)
+@dataclass
+class ModelPricing:
+    """Pricing information for a specific model.
+    WHY: Different models have different pricing structures.
+    This allows accurate cost calculation per model.
+    """
+    model_name: str
+    input_cost_per_million: float  # Cost per 1M input tokens in USD
+    output_cost_per_million: float  # Cost per 1M output tokens in USD
+    def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
+        """Calculate cost for given token counts.
+        Args:
+            input_tokens: Number of input/prompt tokens
+            output_tokens: Number of output/completion tokens
+        Returns:
+            Total cost in USD
+        """
+        input_cost = (input_tokens / 1_000_000) * self.input_cost_per_million
+        output_cost = (output_tokens / 1_000_000) * self.output_cost_per_million
+        return input_cost + output_cost
+@dataclass
+class CostRecord:
+    """Record of a single API call's cost.
+    WHY: Detailed cost records enable analysis of spending patterns
+    and identification of optimization opportunities.
+    """
+    timestamp: datetime
+    model: str
+    input_tokens: int
+    output_tokens: int
+    cost_usd: float
+    endpoint: str = "unknown"
+    batch_id: Optional[str] = None
+class CostTracker:
+    """Tracks and manages LLM API usage costs.
+    WHY: Cost management is critical for production LLM usage.
+    This provides detailed tracking, warnings, and budgeting.
+    """
+    # Default pricing for common models (as of 2024)
+    DEFAULT_PRICING = {
+        "gpt-4": ModelPricing("gpt-4", 30.0, 60.0),
+        "gpt-4-turbo": ModelPricing("gpt-4-turbo", 10.0, 30.0),
+        "gpt-4-turbo-preview": ModelPricing("gpt-4-turbo-preview", 10.0, 30.0),
+        "gpt-3.5-turbo": ModelPricing("gpt-3.5-turbo", 0.5, 1.5),
+        "gpt-3.5-turbo-16k": ModelPricing("gpt-3.5-turbo-16k", 1.0, 2.0),
+        "claude-3-opus": ModelPricing("claude-3-opus", 15.0, 75.0),
+        "claude-3-sonnet": ModelPricing("claude-3-sonnet", 3.0, 15.0),
+        "claude-3-haiku": ModelPricing("claude-3-haiku", 0.25, 1.25),
+        "claude-2.1": ModelPricing("claude-2.1", 8.0, 24.0),
+        "claude-2": ModelPricing("claude-2", 8.0, 24.0),
+        "mistral-7b": ModelPricing("mistral-7b", 0.25, 0.25),
+        "mistral-8x7b": ModelPricing("mistral-8x7b", 0.7, 0.7),
+        "llama-2-70b": ModelPricing("llama-2-70b", 0.7, 0.9),
+        "llama-2-13b": ModelPricing("llama-2-13b", 0.2, 0.25),
+    }
+    def __init__(
+        self,
+        cache_dir: Optional[Path] = None,
+        daily_budget: Optional[float] = None,
+        monthly_budget: Optional[float] = None,
+    ):
+        """Initialize cost tracker.
+        Args:
+            cache_dir: Directory for storing cost records
+            daily_budget: Optional daily spending limit in USD
+            monthly_budget: Optional monthly spending limit in USD
+        """
+        self.cache_dir = cache_dir or Path(".gitflow-cache")
+        self.cache_dir.mkdir(exist_ok=True)
+        self.daily_budget = daily_budget
+        self.monthly_budget = monthly_budget
+        # Current session costs
+        self.session_costs: list[CostRecord] = []
+        self.session_total = 0.0
+        # Current model pricing
+        self.current_pricing: Optional[ModelPricing] = None
+        # Load historical costs
+        self._load_cost_history()
+    def set_model_pricing(self, pricing: ModelPricing) -> None:
+        """Set the pricing for the current model.
+        Args:
+            pricing: Model pricing information
+        """
+        self.current_pricing = pricing
+        logger.debug(
+            f"Set pricing for {pricing.model_name}: "
+            f"${pricing.input_cost_per_million}/1M input, "
+            f"${pricing.output_cost_per_million}/1M output"
+        )
+    def track_usage(
+        self,
+        input_tokens: int,
+        output_tokens: int,
+        model: Optional[str] = None,
+        batch_id: Optional[str] = None,
+    ) -> float:
+        """Track token usage and calculate cost.
+        Args:
+            input_tokens: Number of input tokens used
+            output_tokens: Number of output tokens used
+            model: Optional model name override
+            batch_id: Optional batch identifier
+        Returns:
+            Cost of this usage in USD
+        """
+        # Use current pricing or try to find from model name
+        pricing = self.current_pricing
+        if not pricing and model:
+            pricing = self._find_pricing_for_model(model)
+        if not pricing:
+            # Use a default conservative estimate
+            pricing = ModelPricing("unknown", 1.0, 1.0)
+        # Calculate cost
+        cost = pricing.calculate_cost(input_tokens, output_tokens)
+        # Create cost record
+        record = CostRecord(
+            timestamp=datetime.now(),
+            model=model or pricing.model_name,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            cost_usd=cost,
+            batch_id=batch_id,
+        )
+        # Track in session
+        self.session_costs.append(record)
+        self.session_total += cost
+        # Check budgets
+        self._check_budgets(cost)
+        # Log if significant cost
+        if cost > 0.01:  # Log costs over 1 cent
+            logger.info(f"API call cost: ${cost:.4f} ({input_tokens} in, {output_tokens} out)")
+        return cost
+    def calculate_cost(
+        self, input_tokens: int, output_tokens: int, model: Optional[str] = None
+    ) -> float:
+        """Calculate cost without tracking (for estimates).
+        Args:
+            input_tokens: Number of input tokens
+            output_tokens: Number of output tokens
+            model: Optional model name
+        Returns:
+            Estimated cost in USD
+        """
+        pricing = self.current_pricing
+        if not pricing and model:
+            pricing = self._find_pricing_for_model(model)
+        if not pricing:
+            pricing = ModelPricing("unknown", 1.0, 1.0)
+        return pricing.calculate_cost(input_tokens, output_tokens)
+    def get_session_summary(self) -> dict:
+        """Get summary of current session costs.
+        Returns:
+            Dictionary with session cost information
+        """
+        if not self.session_costs:
+            return {
+                "total_cost": 0.0,
+                "total_calls": 0,
+                "total_input_tokens": 0,
+                "total_output_tokens": 0,
+                "average_cost_per_call": 0.0,
+            }
+        total_input = sum(r.input_tokens for r in self.session_costs)
+        total_output = sum(r.output_tokens for r in self.session_costs)
+        return {
+            "total_cost": self.session_total,
+            "total_calls": len(self.session_costs),
+            "total_input_tokens": total_input,
+            "total_output_tokens": total_output,
+            "average_cost_per_call": self.session_total / len(self.session_costs),
+            "models_used": list(set(r.model for r in self.session_costs)),
+        }
+    def get_daily_costs(self) -> float:
+        """Get total costs for today.
+        Returns:
+            Total cost in USD for the current day
+        """
+        today = datetime.now().date()
+        daily_total = sum(r.cost_usd for r in self.session_costs if r.timestamp.date() == today)
+        # Also check historical costs
+        history_file = self._get_history_file()
+        if history_file.exists():
+            try:
+                with open(history_file) as f:
+                    for line in f:
+                        record_dict = json.loads(line)
+                        timestamp = datetime.fromisoformat(record_dict["timestamp"])
+                        if timestamp.date() == today:
+                            daily_total += record_dict["cost_usd"]
+            except Exception as e:
+                logger.warning(f"Error reading cost history: {e}")
+        return daily_total
+    def get_monthly_costs(self) -> float:
+        """Get total costs for the current month.
+        Returns:
+            Total cost in USD for the current month
+        """
+        now = datetime.now()
+        month_start = datetime(now.year, now.month, 1)
+        monthly_total = sum(r.cost_usd for r in self.session_costs if r.timestamp >= month_start)
+        # Also check historical costs
+        history_file = self._get_history_file()
+        if history_file.exists():
+            try:
+                with open(history_file) as f:
+                    for line in f:
+                        record_dict = json.loads(line)
+                        timestamp = datetime.fromisoformat(record_dict["timestamp"])
+                        if timestamp >= month_start:
+                            monthly_total += record_dict["cost_usd"]
+            except Exception as e:
+                logger.warning(f"Error reading cost history: {e}")
+        return monthly_total
+    def save_session(self) -> None:
+        """Save current session costs to history file.
+        WHY: Persisting cost data enables long-term tracking
+        and analysis of LLM usage patterns.
+        """
+        if not self.session_costs:
+            return
+        history_file = self._get_history_file()
+        try:
+            with open(history_file, "a") as f:
+                for record in self.session_costs:
+                    # Convert to dict and handle datetime
+                    record_dict = asdict(record)
+                    record_dict["timestamp"] = record.timestamp.isoformat()
+                    f.write(json.dumps(record_dict) + "\n")
+            logger.info(f"Saved {len(self.session_costs)} cost records to history")
+            # Clear session costs after saving
+            self.session_costs = []
+            self.session_total = 0.0
+        except Exception as e:
+            logger.error(f"Failed to save cost history: {e}")
+    def export_costs(self, output_file: Path) -> None:
+        """Export all cost data to a JSON file.
+        Args:
+            output_file: Path to export file
+        """
+        all_records = []
+        # Add current session
+        for record in self.session_costs:
+            record_dict = asdict(record)
+            record_dict["timestamp"] = record.timestamp.isoformat()
+            all_records.append(record_dict)
+        # Add historical
+        history_file = self._get_history_file()
+        if history_file.exists():
+            try:
+                with open(history_file) as f:
+                    for line in f:
+                        all_records.append(json.loads(line))
+            except Exception as e:
+                logger.warning(f"Error reading cost history: {e}")
+        # Write export file
+        with open(output_file, "w") as f:
+            json.dump(
+                {
+                    "records": all_records,
+                    "summary": self.get_session_summary(),
+                    "daily_total": self.get_daily_costs(),
+                    "monthly_total": self.get_monthly_costs(),
+                },
+                f,
+                indent=2,
+            )
+        logger.info(f"Exported {len(all_records)} cost records to {output_file}")
+    def _find_pricing_for_model(self, model: str) -> Optional[ModelPricing]:
+        """Find pricing information for a model name.
+        Args:
+            model: Model name to find pricing for
+        Returns:
+            ModelPricing or None if not found
+        """
+        model_lower = model.lower()
+        # Check exact matches first
+        if model_lower in self.DEFAULT_PRICING:
+            return self.DEFAULT_PRICING[model_lower]
+        # Check partial matches
+        for key, pricing in self.DEFAULT_PRICING.items():
+            if key in model_lower or model_lower in key:
+                return pricing
+        # Check for common prefixes
+        if "gpt-4" in model_lower:
+            return self.DEFAULT_PRICING.get("gpt-4-turbo", self.DEFAULT_PRICING["gpt-4"])
+        elif "gpt-3" in model_lower:
+            return self.DEFAULT_PRICING["gpt-3.5-turbo"]
+        elif "claude" in model_lower:
+            return self.DEFAULT_PRICING.get("claude-2", ModelPricing("claude", 8.0, 24.0))
+        elif "mistral" in model_lower:
+            return self.DEFAULT_PRICING.get("mistral-7b", ModelPricing("mistral", 0.25, 0.25))
+        elif "llama" in model_lower:
+            return self.DEFAULT_PRICING.get("llama-2-70b", ModelPricing("llama", 0.7, 0.9))
+        return None
+    def _check_budgets(self, new_cost: float) -> None:
+        """Check if budgets are being exceeded.
+        Args:
+            new_cost: Cost of the latest API call
+        """
+        # Check daily budget
+        if self.daily_budget:
+            daily_total = self.get_daily_costs()
+            if daily_total > self.daily_budget:
+                logger.warning(
+                    f"DAILY BUDGET EXCEEDED: ${daily_total:.2f} > ${self.daily_budget:.2f}"
+                )
+            elif daily_total > self.daily_budget * 0.8:
+                logger.warning(
+                    f"Approaching daily budget: ${daily_total:.2f} of ${self.daily_budget:.2f}"
+                )
+        # Check monthly budget
+        if self.monthly_budget:
+            monthly_total = self.get_monthly_costs()
+            if monthly_total > self.monthly_budget:
+                logger.warning(
+                    f"MONTHLY BUDGET EXCEEDED: ${monthly_total:.2f} > ${self.monthly_budget:.2f}"
+                )
+            elif monthly_total > self.monthly_budget * 0.8:
+                logger.warning(
+                    f"Approaching monthly budget: ${monthly_total:.2f} of ${self.monthly_budget:.2f}"
+                )
+    def _get_history_file(self) -> Path:
+        """Get path to cost history file.
+        Returns:
+            Path to history file
+        """
+        return self.cache_dir / "llm_costs.jsonl"
+    def _load_cost_history(self) -> None:
+        """Load cost history from file.
+        WHY: Loading historical costs enables budget tracking
+        across multiple sessions.
+        """
+        # For now, we don't load into memory to avoid memory issues
+        # History is queried when needed for daily/monthly totals
+        pass

gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.1py3-none-any.whl → 1.3.6py3-none-any.whl