PyPI - gitflow-analytics - Versions diffs - 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

gitflow_analytics/_version.py +1 -1
gitflow_analytics/classification/__init__.py +31 -0
gitflow_analytics/classification/batch_classifier.py +752 -0
gitflow_analytics/classification/classifier.py +464 -0
gitflow_analytics/classification/feature_extractor.py +725 -0
gitflow_analytics/classification/linguist_analyzer.py +574 -0
gitflow_analytics/classification/model.py +455 -0
gitflow_analytics/cli.py +4108 -350
gitflow_analytics/cli_rich.py +198 -48
gitflow_analytics/config/__init__.py +43 -0
gitflow_analytics/config/errors.py +261 -0
gitflow_analytics/config/loader.py +904 -0
gitflow_analytics/config/profiles.py +264 -0
gitflow_analytics/config/repository.py +124 -0
gitflow_analytics/config/schema.py +441 -0
gitflow_analytics/config/validator.py +154 -0
gitflow_analytics/config.py +44 -508
gitflow_analytics/core/analyzer.py +1209 -98
gitflow_analytics/core/cache.py +1337 -29
gitflow_analytics/core/data_fetcher.py +1193 -0
gitflow_analytics/core/identity.py +363 -14
gitflow_analytics/core/metrics_storage.py +526 -0
gitflow_analytics/core/progress.py +372 -0
gitflow_analytics/core/schema_version.py +269 -0
gitflow_analytics/extractors/ml_tickets.py +1100 -0
gitflow_analytics/extractors/story_points.py +8 -1
gitflow_analytics/extractors/tickets.py +749 -11
gitflow_analytics/identity_llm/__init__.py +6 -0
gitflow_analytics/identity_llm/analysis_pass.py +231 -0
gitflow_analytics/identity_llm/analyzer.py +464 -0
gitflow_analytics/identity_llm/models.py +76 -0
gitflow_analytics/integrations/github_integration.py +175 -11
gitflow_analytics/integrations/jira_integration.py +461 -24
gitflow_analytics/integrations/orchestrator.py +124 -1
gitflow_analytics/metrics/activity_scoring.py +322 -0
gitflow_analytics/metrics/branch_health.py +470 -0
gitflow_analytics/metrics/dora.py +379 -20
gitflow_analytics/models/database.py +843 -53
gitflow_analytics/pm_framework/__init__.py +115 -0
gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
gitflow_analytics/pm_framework/base.py +406 -0
gitflow_analytics/pm_framework/models.py +211 -0
gitflow_analytics/pm_framework/orchestrator.py +652 -0
gitflow_analytics/pm_framework/registry.py +333 -0
gitflow_analytics/qualitative/__init__.py +9 -10
gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
gitflow_analytics/qualitative/core/__init__.py +4 -4
gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
gitflow_analytics/qualitative/core/processor.py +381 -248
gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
gitflow_analytics/qualitative/models/__init__.py +7 -7
gitflow_analytics/qualitative/models/schemas.py +155 -121
gitflow_analytics/qualitative/utils/__init__.py +4 -4
gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
gitflow_analytics/qualitative/utils/metrics.py +172 -158
gitflow_analytics/qualitative/utils/text_processing.py +146 -104
gitflow_analytics/reports/__init__.py +100 -0
gitflow_analytics/reports/analytics_writer.py +539 -14
gitflow_analytics/reports/base.py +648 -0
gitflow_analytics/reports/branch_health_writer.py +322 -0
gitflow_analytics/reports/classification_writer.py +924 -0
gitflow_analytics/reports/cli_integration.py +427 -0
gitflow_analytics/reports/csv_writer.py +1676 -212
gitflow_analytics/reports/data_models.py +504 -0
gitflow_analytics/reports/database_report_generator.py +427 -0
gitflow_analytics/reports/example_usage.py +344 -0
gitflow_analytics/reports/factory.py +499 -0
gitflow_analytics/reports/formatters.py +698 -0
gitflow_analytics/reports/html_generator.py +1116 -0
gitflow_analytics/reports/interfaces.py +489 -0
gitflow_analytics/reports/json_exporter.py +2770 -0
gitflow_analytics/reports/narrative_writer.py +2287 -158
gitflow_analytics/reports/story_point_correlation.py +1144 -0
gitflow_analytics/reports/weekly_trends_writer.py +389 -0
gitflow_analytics/training/__init__.py +5 -0
gitflow_analytics/training/model_loader.py +377 -0
gitflow_analytics/training/pipeline.py +550 -0
gitflow_analytics/tui/__init__.py +1 -1
gitflow_analytics/tui/app.py +129 -126
gitflow_analytics/tui/screens/__init__.py +3 -3
gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
gitflow_analytics/tui/screens/configuration_screen.py +154 -178
gitflow_analytics/tui/screens/loading_screen.py +100 -110
gitflow_analytics/tui/screens/main_screen.py +89 -72
gitflow_analytics/tui/screens/results_screen.py +305 -281
gitflow_analytics/tui/widgets/__init__.py +2 -2
gitflow_analytics/tui/widgets/data_table.py +67 -69
gitflow_analytics/tui/widgets/export_modal.py +76 -76
gitflow_analytics/tui/widgets/progress_widget.py +41 -46
gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0

gitflow_analytics/qualitative/models/schemas.py CHANGED Viewed

@@ -1,167 +1,199 @@
 """Data models and configuration schemas for qualitative analysis."""
-import time
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any
 @dataclass
 class QualitativeCommitData:
     """Enhanced commit data with qualitative analysis results.
     This class extends basic commit information with semantic analysis results
     including change type, business domain, risk assessment, and processing metadata.
     """
     # Existing commit data from GitFlow Analytics
     hash: str
     message: str
     author_name: str
     author_email: str
     timestamp: datetime
-    files_changed: List[str]
+    files_changed: list[str]
     insertions: int
     deletions: int
     # New qualitative analysis fields
     change_type: str  # feature|bugfix|refactor|docs|test|chore|security|hotfix|config
     change_type_confidence: float  # 0.0-1.0
     business_domain: str  # frontend|backend|database|infrastructure|mobile|devops|unknown
     domain_confidence: float  # 0.0-1.0
     risk_level: str  # low|medium|high|critical
-    risk_factors: List[str]  # List of identified risk factors
-    intent_signals: Dict[str, Any]  # Intent analysis results
-    collaboration_patterns: Dict[str, Any]  # Team interaction patterns
-    technical_context: Dict[str, Any]  # Technical context information
+    risk_factors: list[str]  # List of identified risk factors
+    intent_signals: dict[str, Any]  # Intent analysis results
+    collaboration_patterns: dict[str, Any]  # Team interaction patterns
+    technical_context: dict[str, Any]  # Technical context information
     # Processing metadata
     processing_method: str  # 'nlp' or 'llm'
     processing_time_ms: float
     confidence_score: float  # Overall confidence in analysis
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """Convert to dictionary for JSON serialization."""
         return {
-            'hash': self.hash,
-            'message': self.message,
-            'author_name': self.author_name,
-            'author_email': self.author_email,
-            'timestamp': self.timestamp.isoformat(),
-            'files_changed': self.files_changed,
-            'insertions': self.insertions,
-            'deletions': self.deletions,
-            'change_type': self.change_type,
-            'change_type_confidence': self.change_type_confidence,
-            'business_domain': self.business_domain,
-            'domain_confidence': self.domain_confidence,
-            'risk_level': self.risk_level,
-            'risk_factors': self.risk_factors,
-            'intent_signals': self.intent_signals,
-            'collaboration_patterns': self.collaboration_patterns,
-            'technical_context': self.technical_context,
-            'processing_method': self.processing_method,
-            'processing_time_ms': self.processing_time_ms,
-            'confidence_score': self.confidence_score,
+            "hash": self.hash,
+            "message": self.message,
+            "author_name": self.author_name,
+            "author_email": self.author_email,
+            "timestamp": self.timestamp.isoformat(),
+            "files_changed": self.files_changed,
+            "insertions": self.insertions,
+            "deletions": self.deletions,
+            "change_type": self.change_type,
+            "change_type_confidence": self.change_type_confidence,
+            "business_domain": self.business_domain,
+            "domain_confidence": self.domain_confidence,
+            "risk_level": self.risk_level,
+            "risk_factors": self.risk_factors,
+            "intent_signals": self.intent_signals,
+            "collaboration_patterns": self.collaboration_patterns,
+            "technical_context": self.technical_context,
+            "processing_method": self.processing_method,
+            "processing_time_ms": self.processing_time_ms,
+            "confidence_score": self.confidence_score,
         }
 @dataclass
 class ChangeTypeConfig:
     """Configuration for change type classification."""
     min_confidence: float = 0.7
     semantic_weight: float = 0.6  # Weight for semantic features
     file_pattern_weight: float = 0.4  # Weight for file pattern signals
     enable_custom_patterns: bool = True
-    custom_patterns: Dict[str, Dict[str, List[str]]] = field(default_factory=dict)
+    custom_patterns: dict[str, dict[str, list[str]]] = field(default_factory=dict)
 @dataclass
 class IntentConfig:
     """Configuration for intent analysis."""
-    urgency_keywords: Dict[str, List[str]] = field(default_factory=lambda: {
-        'critical': ['critical', 'urgent', 'hotfix', 'emergency', 'immediate'],
-        'important': ['important', 'priority', 'asap', 'needed'],
-        'routine': ['routine', 'regular', 'normal', 'standard']
-    })
+    urgency_keywords: dict[str, list[str]] = field(
+        default_factory=lambda: {
+            "critical": ["critical", "urgent", "hotfix", "emergency", "immediate"],
+            "important": ["important", "priority", "asap", "needed"],
+            "routine": ["routine", "regular", "normal", "standard"],
+        }
+    )
     confidence_threshold: float = 0.6
     sentiment_analysis: bool = True
-@dataclass
+@dataclass
 class DomainConfig:
     """Configuration for domain classification."""
-    file_patterns: Dict[str, List[str]] = field(default_factory=lambda: {
-        'frontend': ['*.js', '*.jsx', '*.ts', '*.tsx', '*.vue', '*.html', '*.css', '*.scss'],
-        'backend': ['*.py', '*.java', '*.go', '*.rb', '*.php', '*.cs', '*.cpp'],
-        'database': ['*.sql', 'migrations/*', 'schema/*', '**/models/**'],
-        'infrastructure': ['Dockerfile', '*.yaml', '*.yml', 'terraform/*', '*.tf'],
-        'mobile': ['*.swift', '*.kt', '*.java', 'android/*', 'ios/*'],
-        'devops': ['*.yml', '*.yaml', 'ci/*', '.github/*', 'docker/*']
-    })
-    keyword_patterns: Dict[str, List[str]] = field(default_factory=lambda: {
-        'frontend': ['ui', 'component', 'styling', 'interface', 'layout'],
-        'backend': ['api', 'endpoint', 'service', 'server', 'logic'],
-        'database': ['query', 'schema', 'migration', 'data', 'model'],
-        'infrastructure': ['deploy', 'config', 'environment', 'setup'],
-        'mobile': ['android', 'ios', 'mobile', 'app'],
-        'devops': ['build', 'pipeline', 'deploy', 'ci', 'docker']
-    })
+    file_patterns: dict[str, list[str]] = field(
+        default_factory=lambda: {
+            "frontend": ["*.js", "*.jsx", "*.ts", "*.tsx", "*.vue", "*.html", "*.css", "*.scss"],
+            "backend": ["*.py", "*.java", "*.go", "*.rb", "*.php", "*.cs", "*.cpp"],
+            "database": ["*.sql", "migrations/*", "schema/*", "**/models/**"],
+            "infrastructure": ["Dockerfile", "*.yaml", "*.yml", "terraform/*", "*.tf"],
+            "mobile": ["*.swift", "*.kt", "*.java", "android/*", "ios/*"],
+            "devops": ["*.yml", "*.yaml", "ci/*", ".github/*", "docker/*"],
+        }
+    )
+    keyword_patterns: dict[str, list[str]] = field(
+        default_factory=lambda: {
+            "frontend": ["ui", "component", "styling", "interface", "layout"],
+            "backend": ["api", "endpoint", "service", "server", "logic"],
+            "database": ["query", "schema", "migration", "data", "model"],
+            "infrastructure": ["deploy", "config", "environment", "setup"],
+            "mobile": ["android", "ios", "mobile", "app"],
+            "devops": ["build", "pipeline", "deploy", "ci", "docker"],
+        }
+    )
     min_confidence: float = 0.6
 @dataclass
 class RiskConfig:
     """Configuration for risk analysis."""
-    high_risk_patterns: List[str] = field(default_factory=lambda: [
-        # Security-related patterns
-        'password', 'secret', 'key', 'token', 'auth', 'security',
-        # Critical system patterns
-        'production', 'prod', 'critical', 'emergency',
-        # Infrastructure patterns
-        'database', 'migration', 'schema', 'deploy',
-        # Large change patterns
-        'refactor', 'rewrite', 'restructure'
-    ])
-    medium_risk_patterns: List[str] = field(default_factory=lambda: [
-        'config', 'configuration', 'settings', 'environment',
-        'api', 'endpoint', 'service', 'integration'
-    ])
-    file_risk_patterns: Dict[str, str] = field(default_factory=lambda: {
-        # High risk file patterns
-        '**/*prod*': 'high',
-        '**/migrations/**': 'high',
-        '**/schema/**': 'high',
-        'Dockerfile': 'medium',
-        '*.yml': 'medium',
-        '*.yaml': 'medium',
-        '**/*config*': 'medium'
-    })
-    size_thresholds: Dict[str, int] = field(default_factory=lambda: {
-        'large_commit_files': 20,  # Files changed
-        'large_commit_lines': 500,  # Lines changed
-        'massive_commit_lines': 2000  # Very large changes
-    })
+    high_risk_patterns: list[str] = field(
+        default_factory=lambda: [
+            # Security-related patterns
+            "password",
+            "secret",
+            "key",
+            "token",
+            "auth",
+            "security",
+            # Critical system patterns
+            "production",
+            "prod",
+            "critical",
+            "emergency",
+            # Infrastructure patterns
+            "database",
+            "migration",
+            "schema",
+            "deploy",
+            # Large change patterns
+            "refactor",
+            "rewrite",
+            "restructure",
+        ]
+    )
+    medium_risk_patterns: list[str] = field(
+        default_factory=lambda: [
+            "config",
+            "configuration",
+            "settings",
+            "environment",
+            "api",
+            "endpoint",
+            "service",
+            "integration",
+        ]
+    )
+    file_risk_patterns: dict[str, str] = field(
+        default_factory=lambda: {
+            # High risk file patterns
+            "**/*prod*": "high",
+            "**/migrations/**": "high",
+            "**/schema/**": "high",
+            "Dockerfile": "medium",
+            "*.yml": "medium",
+            "*.yaml": "medium",
+            "**/*config*": "medium",
+        }
+    )
+    size_thresholds: dict[str, int] = field(
+        default_factory=lambda: {
+            "large_commit_files": 20,  # Files changed
+            "large_commit_lines": 500,  # Lines changed
+            "massive_commit_lines": 2000,  # Very large changes
+        }
+    )
 @dataclass
 class NLPConfig:
     """Configuration for NLP processing engine."""
     spacy_model: str = "en_core_web_sm"
     spacy_batch_size: int = 1000
     fast_mode: bool = True  # Disable parser/NER for speed
     # Component configurations
     change_type_config: ChangeTypeConfig = field(default_factory=ChangeTypeConfig)
     intent_config: IntentConfig = field(default_factory=IntentConfig)
     domain_config: DomainConfig = field(default_factory=DomainConfig)
     risk_config: RiskConfig = field(default_factory=RiskConfig)
     # Performance settings
     enable_parallel_processing: bool = True
     max_workers: int = 4
@@ -170,32 +202,32 @@ class NLPConfig:
 @dataclass
 class LLMConfig:
     """Configuration for LLM fallback processing via OpenRouter."""
     # OpenRouter API settings
     openrouter_api_key: str = "${OPENROUTER_API_KEY}"
     base_url: str = "https://openrouter.ai/api/v1"
     # Model selection strategy
     primary_model: str = "anthropic/claude-3-haiku"  # Fast, cheap classification
     fallback_model: str = "meta-llama/llama-3.1-8b-instruct:free"  # Free fallback
     complex_model: str = "anthropic/claude-3-sonnet"  # For complex cases
     # Model routing thresholds
     complexity_threshold: float = 0.5  # Route complex cases to better model
     cost_threshold_per_1k: float = 0.01  # Max cost per 1k commits
     # Processing settings
     max_tokens: int = 1000
     temperature: float = 0.1
-    # Batching settings
+    # Batching settings
     max_group_size: int = 10  # Process up to 10 commits per batch
     similarity_threshold: float = 0.8  # Group similar commits together
     # Rate limiting
     requests_per_minute: int = 200  # Higher limit with OpenRouter
     max_retries: int = 3
     # Cost control
     max_daily_cost: float = 5.0  # Max daily spend in USD
     enable_cost_tracking: bool = True
@@ -204,16 +236,16 @@ class LLMConfig:
 @dataclass
 class CacheConfig:
     """Configuration for qualitative analysis caching."""
     cache_dir: str = ".qualitative_cache"
     semantic_cache_size: int = 10000  # Max cached patterns
     pattern_cache_ttl_hours: int = 168  # 1 week
     # Learning settings
     enable_pattern_learning: bool = True
     learning_threshold: int = 10  # Min examples to learn pattern
     confidence_boost_factor: float = 0.1  # Boost for learned patterns
     # Cache optimization
     enable_compression: bool = True
     max_cache_size_mb: int = 100
@@ -222,51 +254,53 @@ class CacheConfig:
 @dataclass
 class QualitativeConfig:
     """Main configuration for qualitative analysis system.
     This configuration orchestrates the entire qualitative analysis pipeline,
-    balancing performance, accuracy, and cost through intelligent NLP and
+    balancing performance, accuracy, and cost through intelligent NLP and
     strategic LLM usage.
     """
     # Processing settings
     enabled: bool = True
     batch_size: int = 1000  # Commits processed per batch
     max_llm_fallback_pct: float = 0.15  # Max 15% of commits use LLM
     confidence_threshold: float = 0.7  # Min confidence for NLP results
     # Component configurations
     nlp_config: NLPConfig = field(default_factory=NLPConfig)
     llm_config: LLMConfig = field(default_factory=LLMConfig)
     cache_config: CacheConfig = field(default_factory=CacheConfig)
     # Performance monitoring
     enable_performance_tracking: bool = True
     target_processing_time_ms: float = 2.0  # Target per-commit processing time
     # Quality settings
     min_overall_confidence: float = 0.6  # Min confidence for any result
     enable_quality_feedback: bool = True  # Learn from corrections
-    def validate(self) -> List[str]:
+    def validate(self) -> list[str]:
         """Validate configuration and return any warnings.
         Returns:
             List of validation warning messages.
         """
         warnings = []
         if self.max_llm_fallback_pct > 0.3:
             warnings.append("LLM fallback percentage > 30% may result in high costs")
         if self.confidence_threshold > 0.9:
             warnings.append("Very high confidence threshold may route too many commits to LLM")
         if self.batch_size > 5000:
             warnings.append("Large batch size may cause memory issues")
         # Validate LLM config if API key is set
-        if self.llm_config.openrouter_api_key and self.llm_config.openrouter_api_key != "${OPENROUTER_API_KEY}":
-            if self.llm_config.max_daily_cost < 1.0:
-                warnings.append("Very low daily cost limit may restrict LLM usage")
-        return warnings
+        if (
+            self.llm_config.openrouter_api_key
+            and self.llm_config.openrouter_api_key != "${OPENROUTER_API_KEY}"
+        ) and self.llm_config.max_daily_cost < 1.0:
+            warnings.append("Very low daily cost limit may restrict LLM usage")
+        return warnings

gitflow_analytics/qualitative/utils/__init__.py CHANGED Viewed

@@ -1,13 +1,13 @@
 """Utility functions for qualitative analysis."""
-from .text_processing import TextProcessor
-from .batch_processor import BatchProcessor
-from .metrics import PerformanceMetrics
+from .batch_processor import BatchProcessor
 from .cost_tracker import CostTracker
+from .metrics import PerformanceMetrics
+from .text_processing import TextProcessor
 __all__ = [
     "TextProcessor",
     "BatchProcessor",
     "PerformanceMetrics",
     "CostTracker",
-]
+]

gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

gitflow-analytics 1.0.3py3-none-any.whl → 1.3.6py3-none-any.whl