PyPI - gitflow-analytics - Versions diffs - 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4108 -350
gitflow_analytics/cli_rich.py +198 -48
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -508
gitflow_analytics/core/analyzer.py +1209 -98
gitflow_analytics/core/cache.py +1337 -29
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +363 -14
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +8 -1
gitflow_analytics/extractors/tickets.py +749 -11
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +175 -11
gitflow_analytics/integrations/jira_integration.py +461 -24
gitflow_analytics/integrations/orchestrator.py +124 -1
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +379 -20
gitflow_analytics/models/database.py +843 -53
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +9 -10
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
gitflow_analytics/qualitative/core/__init__.py +4 -4
gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
gitflow_analytics/qualitative/core/processor.py +381 -248
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +7 -7
gitflow_analytics/qualitative/models/schemas.py +155 -121
gitflow_analytics/qualitative/utils/__init__.py +4 -4
gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
gitflow_analytics/qualitative/utils/metrics.py +172 -158
gitflow_analytics/qualitative/utils/text_processing.py +146 -104
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +539 -14
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1676 -212
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2287 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +1 -1
gitflow_analytics/tui/app.py +129 -126
gitflow_analytics/tui/screens/__init__.py +3 -3
gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
gitflow_analytics/tui/screens/configuration_screen.py +154 -178
gitflow_analytics/tui/screens/loading_screen.py +100 -110
gitflow_analytics/tui/screens/main_screen.py +89 -72
gitflow_analytics/tui/screens/results_screen.py +305 -281
gitflow_analytics/tui/widgets/__init__.py +2 -2
gitflow_analytics/tui/widgets/data_table.py +67 -69
gitflow_analytics/tui/widgets/export_modal.py +76 -76
gitflow_analytics/tui/widgets/progress_widget.py +41 -46
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/qualitative/utils/cost_tracker.py CHANGED Viewed

@@ -1,17 +1,17 @@
 """Cost tracking utilities for LLM usage monitoring."""
+import json
 import logging
-from datetime import datetime, timedelta
-from typing import Dict, List, Optional
 from dataclasses import dataclass
+from datetime import datetime, timedelta
 from pathlib import Path
-import json
+from typing import Optional
 @dataclass
 class LLMCall:
     """Record of a single LLM API call."""
     timestamp: datetime
     model: str
     input_tokens: int
@@ -25,36 +25,33 @@ class LLMCall:
 class CostTracker:
     """Track and manage LLM API usage costs.
     This class provides cost monitoring, budgeting, and optimization
     features to keep LLM usage within acceptable limits while
     maintaining analysis quality.
     """
     # OpenRouter pricing (approximate, in USD per 1M tokens)
     MODEL_PRICING = {
         # Anthropic models
-        'anthropic/claude-3-haiku': {'input': 0.25, 'output': 1.25},
-        'anthropic/claude-3-sonnet': {'input': 3.0, 'output': 15.0},
-        'anthropic/claude-3-opus': {'input': 15.0, 'output': 75.0},
+        "anthropic/claude-3-haiku": {"input": 0.25, "output": 1.25},
+        "anthropic/claude-3-sonnet": {"input": 3.0, "output": 15.0},
+        "anthropic/claude-3-opus": {"input": 15.0, "output": 75.0},
         # OpenAI models
-        'openai/gpt-3.5-turbo': {'input': 0.5, 'output': 1.5},
-        'openai/gpt-4': {'input': 30.0, 'output': 60.0},
-        'openai/gpt-4-turbo': {'input': 10.0, 'output': 30.0},
+        "openai/gpt-3.5-turbo": {"input": 0.5, "output": 1.5},
+        "openai/gpt-4": {"input": 30.0, "output": 60.0},
+        "openai/gpt-4-turbo": {"input": 10.0, "output": 30.0},
         # Free models (Llama)
-        'meta-llama/llama-3.1-8b-instruct:free': {'input': 0.0, 'output': 0.0},
-        'meta-llama/llama-3.1-70b-instruct:free': {'input': 0.0, 'output': 0.0},
+        "meta-llama/llama-3.1-8b-instruct:free": {"input": 0.0, "output": 0.0},
+        "meta-llama/llama-3.1-70b-instruct:free": {"input": 0.0, "output": 0.0},
         # Other popular models
-        'google/gemini-pro': {'input': 0.5, 'output': 1.5},
-        'mistralai/mixtral-8x7b-instruct': {'input': 0.27, 'output': 0.27},
+        "google/gemini-pro": {"input": 0.5, "output": 1.5},
+        "mistralai/mixtral-8x7b-instruct": {"input": 0.27, "output": 0.27},
     }
     def __init__(self, cache_dir: Optional[Path] = None, daily_budget: float = 5.0):
         """Initialize cost tracker.
         Args:
             cache_dir: Directory to store cost tracking data
             daily_budget: Maximum daily spending in USD
@@ -62,33 +59,40 @@ class CostTracker:
         self.daily_budget = daily_budget
         self.cache_dir = cache_dir or Path(".qualitative_cache")
         self.cache_dir.mkdir(exist_ok=True)
         self.cost_file = self.cache_dir / "llm_costs.json"
-        self.calls: List[LLMCall] = []
+        self.calls: list[LLMCall] = []
         self.logger = logging.getLogger(__name__)
         # Load existing cost data
         self._load_cost_data()
-    def record_call(self, model: str, input_tokens: int, output_tokens: int,
-                   processing_time: float, batch_size: int = 1,
-                   success: bool = True, error_message: Optional[str] = None) -> float:
+    def record_call(
+        self,
+        model: str,
+        input_tokens: int,
+        output_tokens: int,
+        processing_time: float,
+        batch_size: int = 1,
+        success: bool = True,
+        error_message: Optional[str] = None,
+    ) -> float:
         """Record an LLM API call and return estimated cost.
         Args:
             model: Model name used
             input_tokens: Number of input tokens
-            output_tokens: Number of output tokens
+            output_tokens: Number of output tokens
             processing_time: Processing time in seconds
             batch_size: Number of commits processed in this call
             success: Whether the call was successful
             error_message: Error message if call failed
         Returns:
             Estimated cost in USD
         """
         estimated_cost = self._calculate_cost(model, input_tokens, output_tokens)
         call = LLMCall(
             timestamp=datetime.utcnow(),
             model=model,
@@ -98,174 +102,172 @@ class CostTracker:
             estimated_cost=estimated_cost,
             batch_size=batch_size,
             success=success,
-            error_message=error_message
+            error_message=error_message,
         )
         self.calls.append(call)
         self._save_cost_data()
         # Log cost information
         self.logger.info(
             f"LLM call: {model} | tokens: {input_tokens}+{output_tokens} | "
             f"cost: ${estimated_cost:.4f} | batch: {batch_size}"
         )
         return estimated_cost
     def get_daily_spend(self, date: Optional[datetime] = None) -> float:
         """Get total spending for a specific date.
         Args:
             date: Date to check (defaults to today)
         Returns:
             Total spending in USD for the date
         """
         if date is None:
             date = datetime.utcnow()
         start_of_day = date.replace(hour=0, minute=0, second=0, microsecond=0)
         end_of_day = start_of_day + timedelta(days=1)
         daily_spend = sum(
-            call.estimated_cost for call in self.calls
+            call.estimated_cost
+            for call in self.calls
             if start_of_day <= call.timestamp < end_of_day and call.success
         )
         return daily_spend
     def check_budget_remaining(self) -> float:
         """Check remaining budget for today.
         Returns:
             Remaining budget in USD (negative if over budget)
         """
         daily_spend = self.get_daily_spend()
         return self.daily_budget - daily_spend
     def can_afford_call(self, model: str, estimated_tokens: int) -> bool:
         """Check if we can afford an API call within budget.
         Args:
             model: Model to use
             estimated_tokens: Estimated total tokens (input + output)
         Returns:
             True if call is within budget
         """
         estimated_cost = self._calculate_cost(model, estimated_tokens // 2, estimated_tokens // 2)
         remaining_budget = self.check_budget_remaining()
         return remaining_budget >= estimated_cost
-    def get_usage_stats(self, days: int = 7) -> Dict[str, any]:
+    def get_usage_stats(self, days: int = 7) -> dict[str, any]:
         """Get usage statistics for the last N days.
         Args:
             days: Number of days to analyze
         Returns:
             Dictionary with usage statistics
         """
         cutoff_date = datetime.utcnow() - timedelta(days=days)
         recent_calls = [call for call in self.calls if call.timestamp >= cutoff_date]
         if not recent_calls:
             return {
-                'total_calls': 0,
-                'total_cost': 0.0,
-                'total_tokens': 0,
-                'avg_cost_per_call': 0.0,
-                'model_usage': {},
-                'success_rate': 1.0
+                "total_calls": 0,
+                "total_cost": 0.0,
+                "total_tokens": 0,
+                "avg_cost_per_call": 0.0,
+                "model_usage": {},
+                "success_rate": 1.0,
             }
         successful_calls = [call for call in recent_calls if call.success]
         # Calculate statistics
         total_cost = sum(call.estimated_cost for call in successful_calls)
         total_tokens = sum(call.input_tokens + call.output_tokens for call in recent_calls)
         # Model usage breakdown
         model_usage = {}
         for call in recent_calls:
             if call.model not in model_usage:
-                model_usage[call.model] = {'calls': 0, 'cost': 0.0, 'tokens': 0}
-            model_usage[call.model]['calls'] += 1
-            model_usage[call.model]['cost'] += call.estimated_cost
-            model_usage[call.model]['tokens'] += call.input_tokens + call.output_tokens
+                model_usage[call.model] = {"calls": 0, "cost": 0.0, "tokens": 0}
+            model_usage[call.model]["calls"] += 1
+            model_usage[call.model]["cost"] += call.estimated_cost
+            model_usage[call.model]["tokens"] += call.input_tokens + call.output_tokens
         return {
-            'total_calls': len(recent_calls),
-            'successful_calls': len(successful_calls),
-            'total_cost': total_cost,
-            'total_tokens': total_tokens,
-            'avg_cost_per_call': total_cost / len(successful_calls) if successful_calls else 0.0,
-            'model_usage': model_usage,
-            'success_rate': len(successful_calls) / len(recent_calls) if recent_calls else 1.0,
-            'daily_average_cost': total_cost / days,
+            "total_calls": len(recent_calls),
+            "successful_calls": len(successful_calls),
+            "total_cost": total_cost,
+            "total_tokens": total_tokens,
+            "avg_cost_per_call": total_cost / len(successful_calls) if successful_calls else 0.0,
+            "model_usage": model_usage,
+            "success_rate": len(successful_calls) / len(recent_calls) if recent_calls else 1.0,
+            "daily_average_cost": total_cost / days,
         }
-    def suggest_cost_optimizations(self) -> List[str]:
+    def suggest_cost_optimizations(self) -> list[str]:
         """Suggest ways to optimize costs based on usage patterns.
         Returns:
             List of optimization suggestions
         """
         suggestions = []
         stats = self.get_usage_stats(days=7)
-        if stats['total_calls'] == 0:
+        if stats["total_calls"] == 0:
             return suggestions
         # Check if expensive models are overused
-        model_usage = stats['model_usage']
-        total_cost = stats['total_cost']
-        expensive_models = ['anthropic/claude-3-opus', 'openai/gpt-4']
+        model_usage = stats["model_usage"]
+        total_cost = stats["total_cost"]
+        expensive_models = ["anthropic/claude-3-opus", "openai/gpt-4"]
         expensive_usage = sum(
-            model_usage.get(model, {}).get('cost', 0)
-            for model in expensive_models
+            model_usage.get(model, {}).get("cost", 0) for model in expensive_models
         )
         if expensive_usage > total_cost * 0.3:
             suggestions.append(
                 "Consider using cheaper models (Claude Haiku, GPT-3.5) for routine classification"
             )
         # Check for free model opportunities
-        free_usage = model_usage.get('meta-llama/llama-3.1-8b-instruct:free', {}).get('calls', 0)
-        if free_usage < stats['total_calls'] * 0.5:
+        free_usage = model_usage.get("meta-llama/llama-3.1-8b-instruct:free", {}).get("calls", 0)
+        if free_usage < stats["total_calls"] * 0.5:
             suggestions.append(
                 "Increase usage of free Llama models for simple classification tasks"
             )
         # Check daily spend
         if self.get_daily_spend() > self.daily_budget * 0.8:
             suggestions.append(
                 "Approaching daily budget limit - consider increasing NLP confidence threshold"
             )
         # Check batch efficiency
-        avg_batch_size = sum(
-            call.batch_size for call in self.calls[-50:]  # Last 50 calls
-        ) / min(50, len(self.calls))
+        avg_batch_size = sum(call.batch_size for call in self.calls[-50:]) / min(  # Last 50 calls
+            50, len(self.calls)
+        )
         if avg_batch_size < 3:
-            suggestions.append(
-                "Increase batch size for LLM calls to improve cost efficiency"
-            )
+            suggestions.append("Increase batch size for LLM calls to improve cost efficiency")
         return suggestions
     def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
         """Calculate estimated cost for an API call.
         Args:
             model: Model name
             input_tokens: Number of input tokens
             output_tokens: Number of output tokens
         Returns:
             Estimated cost in USD
         """
@@ -276,68 +278,68 @@ class CostTracker:
             self.logger.warning(f"Unknown model pricing for {model}, using default rates")
         else:
             pricing = self.MODEL_PRICING[model]
-            input_price = pricing['input']
-            output_price = pricing['output']
+            input_price = pricing["input"]
+            output_price = pricing["output"]
         # Calculate cost (pricing is per 1M tokens)
         input_cost = (input_tokens / 1_000_000) * input_price
         output_cost = (output_tokens / 1_000_000) * output_price
         return input_cost + output_cost
     def _load_cost_data(self) -> None:
         """Load cost tracking data from file."""
         if not self.cost_file.exists():
             return
         try:
-            with open(self.cost_file, 'r') as f:
+            with open(self.cost_file) as f:
                 data = json.load(f)
             self.calls = []
-            for call_data in data.get('calls', []):
+            for call_data in data.get("calls", []):
                 call = LLMCall(
-                    timestamp=datetime.fromisoformat(call_data['timestamp']),
-                    model=call_data['model'],
-                    input_tokens=call_data['input_tokens'],
-                    output_tokens=call_data['output_tokens'],
-                    processing_time_ms=call_data['processing_time_ms'],
-                    estimated_cost=call_data['estimated_cost'],
-                    batch_size=call_data.get('batch_size', 1),
-                    success=call_data.get('success', True),
-                    error_message=call_data.get('error_message')
+                    timestamp=datetime.fromisoformat(call_data["timestamp"]),
+                    model=call_data["model"],
+                    input_tokens=call_data["input_tokens"],
+                    output_tokens=call_data["output_tokens"],
+                    processing_time_ms=call_data["processing_time_ms"],
+                    estimated_cost=call_data["estimated_cost"],
+                    batch_size=call_data.get("batch_size", 1),
+                    success=call_data.get("success", True),
+                    error_message=call_data.get("error_message"),
                 )
                 self.calls.append(call)
         except Exception as e:
             self.logger.error(f"Failed to load cost data: {e}")
             self.calls = []
     def _save_cost_data(self) -> None:
         """Save cost tracking data to file."""
         try:
             # Keep only last 1000 calls to prevent file from growing too large
             recent_calls = self.calls[-1000:]
             data = {
-                'calls': [
+                "calls": [
                     {
-                        'timestamp': call.timestamp.isoformat(),
-                        'model': call.model,
-                        'input_tokens': call.input_tokens,
-                        'output_tokens': call.output_tokens,
-                        'processing_time_ms': call.processing_time_ms,
-                        'estimated_cost': call.estimated_cost,
-                        'batch_size': call.batch_size,
-                        'success': call.success,
-                        'error_message': call.error_message
+                        "timestamp": call.timestamp.isoformat(),
+                        "model": call.model,
+                        "input_tokens": call.input_tokens,
+                        "output_tokens": call.output_tokens,
+                        "processing_time_ms": call.processing_time_ms,
+                        "estimated_cost": call.estimated_cost,
+                        "batch_size": call.batch_size,
+                        "success": call.success,
+                        "error_message": call.error_message,
                     }
                     for call in recent_calls
                 ]
             }
-            with open(self.cost_file, 'w') as f:
+            with open(self.cost_file, "w") as f:
                 json.dump(data, f, indent=2)
         except Exception as e:
-            self.logger.error(f"Failed to save cost data: {e}")
+            self.logger.error(f"Failed to save cost data: {e}")

gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl