PyPI - gitflow-analytics - Versions diffs - 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl - Mend

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4158 -350
gitflow_analytics/cli_rich.py +198 -48
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +905 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +444 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -508
gitflow_analytics/core/analyzer.py +1209 -98
gitflow_analytics/core/cache.py +1337 -29
gitflow_analytics/core/data_fetcher.py +1285 -0
gitflow_analytics/core/identity.py +363 -14
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +8 -1
gitflow_analytics/extractors/tickets.py +749 -11
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +175 -11
gitflow_analytics/integrations/jira_integration.py +461 -24
gitflow_analytics/integrations/orchestrator.py +124 -1
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +379 -20
gitflow_analytics/models/database.py +843 -53
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +9 -10
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
gitflow_analytics/qualitative/core/__init__.py +4 -4
gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
gitflow_analytics/qualitative/core/processor.py +381 -248
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +7 -7
gitflow_analytics/qualitative/models/schemas.py +155 -121
gitflow_analytics/qualitative/utils/__init__.py +4 -4
gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
gitflow_analytics/qualitative/utils/metrics.py +172 -158
gitflow_analytics/qualitative/utils/text_processing.py +146 -104
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +539 -14
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1676 -212
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2287 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +1 -1
gitflow_analytics/tui/app.py +129 -126
gitflow_analytics/tui/screens/__init__.py +3 -3
gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
gitflow_analytics/tui/screens/configuration_screen.py +154 -178
gitflow_analytics/tui/screens/loading_screen.py +100 -110
gitflow_analytics/tui/screens/main_screen.py +89 -72
gitflow_analytics/tui/screens/results_screen.py +305 -281
gitflow_analytics/tui/widgets/__init__.py +2 -2
gitflow_analytics/tui/widgets/data_table.py +67 -69
gitflow_analytics/tui/widgets/export_modal.py +76 -76
gitflow_analytics/tui/widgets/progress_widget.py +41 -46
gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0

gitflow_analytics/qualitative/classifiers/llm/openai_client.py ADDED Viewed

@@ -0,0 +1,403 @@
+"""OpenAI and OpenRouter API client for LLM classification.
+This module handles all OpenAI-compatible API interactions, including
+OpenRouter which provides access to multiple models through a unified API.
+WHY: Separating API interaction logic from classification logic makes the
+system more maintainable and allows easy addition of new providers.
+DESIGN DECISIONS:
+- Support both OpenAI direct and OpenRouter endpoints
+- Implement exponential backoff for retries
+- Handle rate limiting gracefully
+- Track token usage and costs accurately
+- Support different pricing models
+"""
+import logging
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Optional
+try:
+    import requests
+    REQUESTS_AVAILABLE = True
+except ImportError:
+    REQUESTS_AVAILABLE = False
+    requests = None
+from .base import BaseLLMClassifier, ClassificationResult, LLMProviderConfig
+from .cost_tracker import CostTracker, ModelPricing
+from .prompts import PromptGenerator, PromptVersion
+from .response_parser import ResponseParser
+logger = logging.getLogger(__name__)
+@dataclass
+class OpenAIConfig(LLMProviderConfig):
+    """Configuration specific to OpenAI/OpenRouter providers.
+    WHY: OpenAI-compatible APIs have specific configuration needs
+    beyond the base configuration.
+    """
+    api_base_url: str = "https://openrouter.ai/api/v1"  # Default to OpenRouter
+    organization: Optional[str] = None  # OpenAI organization ID
+    # OpenRouter specific
+    site_url: str = "https://github.com/gitflow-analytics"
+    app_name: str = "GitFlow Analytics"
+    # Model selection
+    use_openrouter: bool = True  # If False, use direct OpenAI API
+    def validate(self) -> None:
+        """Validate OpenAI-specific configuration."""
+        super().validate()
+        if not REQUESTS_AVAILABLE:
+            raise ImportError("requests library required for OpenAI/OpenRouter")
+        # API key is optional - classifier will gracefully degrade without it
+        # if not self.api_key:
+        #     raise ValueError("API key is required for OpenAI/OpenRouter")
+        # Set appropriate base URL based on provider
+        if not self.use_openrouter and "openrouter" in self.api_base_url:
+            self.api_base_url = "https://api.openai.com/v1"
+class OpenAIClassifier(BaseLLMClassifier):
+    """OpenAI/OpenRouter-based commit classifier.
+    WHY: OpenAI and OpenRouter provide high-quality language models
+    for classification. This implementation supports both providers
+    through their compatible APIs.
+    """
+    def __init__(
+        self,
+        config: OpenAIConfig,
+        cache_dir: Optional[Path] = None,
+        prompt_version: PromptVersion = PromptVersion.V3_CONTEXTUAL,
+    ):
+        """Initialize OpenAI classifier.
+        Args:
+            config: OpenAI-specific configuration
+            cache_dir: Directory for caching predictions
+            prompt_version: Version of prompts to use
+        """
+        super().__init__(config, cache_dir)
+        self.config: OpenAIConfig = config
+        # Initialize components
+        self.prompt_generator = PromptGenerator(prompt_version)
+        self.response_parser = ResponseParser()
+        self.cost_tracker = CostTracker()
+        # Set up model pricing
+        self._setup_pricing()
+        # Rate limiting state
+        self._last_request_time = 0
+        self._request_count = 0
+        self._minute_start = time.time()
+        logger.info(f"OpenAIClassifier initialized with model: {config.model}")
+    def _setup_pricing(self) -> None:
+        """Set up pricing information for the configured model.
+        WHY: Accurate cost tracking helps users monitor and control
+        their LLM usage expenses.
+        """
+        # Common model pricing (per 1M tokens)
+        pricing_map = {
+            "gpt-4": ModelPricing("gpt-4", 30.0, 60.0),
+            "gpt-4-turbo": ModelPricing("gpt-4-turbo", 10.0, 30.0),
+            "gpt-3.5-turbo": ModelPricing("gpt-3.5-turbo", 0.5, 1.5),
+            "mistralai/mistral-7b-instruct": ModelPricing("mistral-7b", 0.25, 0.25),
+            "meta-llama/llama-2-70b-chat": ModelPricing("llama-2-70b", 0.7, 0.9),
+            "anthropic/claude-2": ModelPricing("claude-2", 8.0, 24.0),
+        }
+        # Find matching pricing or use default
+        model_lower = self.config.model.lower()
+        for model_key, pricing in pricing_map.items():
+            if model_key in model_lower:
+                self.cost_tracker.set_model_pricing(pricing)
+                return
+        # Default pricing for unknown models
+        self.cost_tracker.set_model_pricing(ModelPricing(self.config.model, 1.0, 1.0))
+    def get_provider_name(self) -> str:
+        """Get the name of the LLM provider."""
+        if self.config.use_openrouter:
+            return "openrouter"
+        return "openai"
+    def classify_commit(
+        self, message: str, files_changed: Optional[list[str]] = None
+    ) -> ClassificationResult:
+        """Classify a single commit message.
+        Args:
+            message: Commit message to classify
+            files_changed: Optional list of changed files
+        Returns:
+            Classification result
+        """
+        start_time = time.time()
+        # Validate input
+        if not message or not message.strip():
+            return ClassificationResult(
+                category="maintenance",
+                confidence=0.3,
+                method="empty_message",
+                reasoning="Empty commit message",
+                model="none",
+                alternatives=[],
+                processing_time_ms=(time.time() - start_time) * 1000,
+            )
+        # Apply rate limiting
+        self._apply_rate_limiting()
+        # Generate prompt
+        system_prompt, user_prompt = self.prompt_generator.generate_prompt(message, files_changed)
+        # Make API request with retries
+        for attempt in range(self.config.max_retries):
+            try:
+                response_text, tokens_used = self._make_api_request(system_prompt, user_prompt)
+                # Parse response
+                category, confidence, reasoning = self.response_parser.parse_response(
+                    response_text, self.prompt_generator.CATEGORIES
+                )
+                # Track costs
+                prompt_tokens = self._estimate_tokens(system_prompt + user_prompt)
+                completion_tokens = tokens_used - prompt_tokens if tokens_used else 50
+                cost = self.cost_tracker.track_usage(prompt_tokens, completion_tokens)
+                # Update statistics
+                self.total_tokens_used += (
+                    tokens_used if tokens_used else prompt_tokens + completion_tokens
+                )
+                self.total_cost += cost
+                self.api_calls_made += 1
+                return ClassificationResult(
+                    category=category,
+                    confidence=confidence,
+                    method="llm",
+                    reasoning=reasoning,
+                    model=self.config.model,
+                    alternatives=[],
+                    processing_time_ms=(time.time() - start_time) * 1000,
+                )
+            except Exception as e:
+                logger.warning(f"API request attempt {attempt + 1} failed: {e}")
+                if attempt < self.config.max_retries - 1:
+                    time.sleep(self.config.retry_delay_seconds * (2**attempt))
+                else:
+                    # Final attempt failed, return fallback
+                    return ClassificationResult(
+                        category="maintenance",
+                        confidence=0.1,
+                        method="llm_error",
+                        reasoning=f"LLM classification failed: {str(e)}",
+                        model="fallback",
+                        alternatives=[],
+                        processing_time_ms=(time.time() - start_time) * 1000,
+                    )
+        # Should never reach here
+        return ClassificationResult(
+            category="maintenance",
+            confidence=0.1,
+            method="llm_error",
+            reasoning="Unexpected error in classification",
+            model="fallback",
+            alternatives=[],
+            processing_time_ms=(time.time() - start_time) * 1000,
+        )
+    def classify_commits_batch(
+        self, commits: list[dict[str, Any]], batch_id: Optional[str] = None
+    ) -> list[ClassificationResult]:
+        """Classify a batch of commits.
+        WHY: Batch processing can be more efficient for large numbers
+        of commits, though this implementation processes them serially
+        to respect rate limits.
+        Args:
+            commits: List of commit dictionaries
+            batch_id: Optional batch identifier
+        Returns:
+            List of classification results
+        """
+        results = []
+        for commit in commits:
+            message = commit.get("message", "")
+            files_changed = []
+            # Extract files from commit data
+            if "files_changed" in commit:
+                fc = commit["files_changed"]
+                if isinstance(fc, list):
+                    files_changed = fc
+            # Classify individual commit
+            result = self.classify_commit(message, files_changed)
+            # Add batch ID if provided
+            if batch_id:
+                result.batch_id = batch_id
+            results.append(result)
+        return results
+    def _make_api_request(self, system_prompt: str, user_prompt: str) -> tuple[str, int]:
+        """Make API request to OpenAI/OpenRouter.
+        Args:
+            system_prompt: System prompt for the model
+            user_prompt: User prompt with the classification task
+        Returns:
+            Tuple of (response_text, tokens_used)
+        Raises:
+            Exception: If API request fails
+        """
+        if not self.config.api_key:
+            raise ValueError("API key not configured - cannot make LLM requests")
+        headers = {
+            "Authorization": f"Bearer {self.config.api_key}",
+            "Content-Type": "application/json",
+        }
+        # Add OpenRouter-specific headers
+        if self.config.use_openrouter:
+            headers["HTTP-Referer"] = self.config.site_url
+            headers["X-Title"] = self.config.app_name
+        # Add OpenAI organization if specified
+        if self.config.organization:
+            headers["OpenAI-Organization"] = self.config.organization
+        # Prepare request payload
+        payload = {
+            "model": self.config.model,
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            "max_tokens": self.config.max_tokens,
+            "temperature": self.config.temperature,
+        }
+        # Make request
+        url = f"{self.config.api_base_url}/chat/completions"
+        response = requests.post(
+            url, headers=headers, json=payload, timeout=self.config.timeout_seconds
+        )
+        # Check response
+        if response.status_code != 200:
+            error_msg = f"API request failed with status {response.status_code}"
+            try:
+                error_data = response.json()
+                if "error" in error_data:
+                    error_msg += f": {error_data['error'].get('message', 'Unknown error')}"
+            except Exception:
+                error_msg += f": {response.text}"
+            raise Exception(error_msg)
+        # Parse response
+        data = response.json()
+        if "choices" not in data or not data["choices"]:
+            raise Exception("No response choices in API response")
+        response_text = data["choices"][0]["message"]["content"].strip()
+        # Extract token usage if available
+        tokens_used = 0
+        if "usage" in data:
+            tokens_used = data["usage"].get("total_tokens", 0)
+        return response_text, tokens_used
+    def _apply_rate_limiting(self) -> None:
+        """Apply rate limiting to respect API limits.
+        WHY: Prevents hitting API rate limits which would cause
+        errors and potential account suspension.
+        """
+        current_time = time.time()
+        # Check if we're in a new minute
+        if current_time - self._minute_start >= 60:
+            self._request_count = 0
+            self._minute_start = current_time
+        # If we've hit the per-minute limit, wait
+        if self._request_count >= self.config.max_requests_per_minute:
+            sleep_time = 60 - (current_time - self._minute_start)
+            if sleep_time > 0:
+                logger.debug(f"Rate limiting: sleeping for {sleep_time:.1f} seconds")
+                time.sleep(sleep_time)
+                self._request_count = 0
+                self._minute_start = time.time()
+        # Increment request count
+        self._request_count += 1
+        self._last_request_time = time.time()
+    def _estimate_tokens(self, text: str) -> int:
+        """Estimate token count for text.
+        WHY: Token estimation helps track costs even when the API
+        doesn't return exact token counts.
+        Args:
+            text: Text to estimate tokens for
+        Returns:
+            Estimated token count
+        """
+        # Simple estimation: ~4 characters per token on average
+        # This is a rough approximation; actual tokenization varies
+        return len(text) // 4
+    def estimate_cost(self, text: str) -> float:
+        """Estimate the cost of classifying the given text.
+        Args:
+            text: Text to be classified
+        Returns:
+            Estimated cost in USD
+        """
+        # Estimate tokens for the full prompt
+        system_prompt = "You are a commit classification expert."  # Simplified
+        prompt_tokens = self._estimate_tokens(system_prompt + text) + 100  # Add buffer
+        completion_tokens = self.config.max_tokens
+        return self.cost_tracker.calculate_cost(prompt_tokens, completion_tokens)

gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.11py3-none-any.whl