PyPI - gitflow-analytics - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

gitflow-analytics 1.0.0py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

gitflow_analytics/__init__.py +11 -9
gitflow_analytics/_version.py +2 -2
gitflow_analytics/cli.py +691 -243
gitflow_analytics/cli_rich.py +353 -0
gitflow_analytics/config.py +389 -96
gitflow_analytics/core/analyzer.py +175 -78
gitflow_analytics/core/branch_mapper.py +132 -132
gitflow_analytics/core/cache.py +242 -173
gitflow_analytics/core/identity.py +214 -178
gitflow_analytics/extractors/base.py +13 -11
gitflow_analytics/extractors/story_points.py +70 -59
gitflow_analytics/extractors/tickets.py +111 -88
gitflow_analytics/integrations/github_integration.py +91 -77
gitflow_analytics/integrations/jira_integration.py +284 -0
gitflow_analytics/integrations/orchestrator.py +99 -72
gitflow_analytics/metrics/dora.py +183 -179
gitflow_analytics/models/database.py +191 -54
gitflow_analytics/qualitative/__init__.py +30 -0
gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
gitflow_analytics/qualitative/core/__init__.py +13 -0
gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
gitflow_analytics/qualitative/core/processor.py +540 -0
gitflow_analytics/qualitative/models/__init__.py +25 -0
gitflow_analytics/qualitative/models/schemas.py +272 -0
gitflow_analytics/qualitative/utils/__init__.py +13 -0
gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
gitflow_analytics/qualitative/utils/metrics.py +347 -0
gitflow_analytics/qualitative/utils/text_processing.py +243 -0
gitflow_analytics/reports/analytics_writer.py +25 -8
gitflow_analytics/reports/csv_writer.py +60 -32
gitflow_analytics/reports/narrative_writer.py +21 -15
gitflow_analytics/tui/__init__.py +5 -0
gitflow_analytics/tui/app.py +721 -0
gitflow_analytics/tui/screens/__init__.py +8 -0
gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
gitflow_analytics/tui/screens/configuration_screen.py +547 -0
gitflow_analytics/tui/screens/loading_screen.py +358 -0
gitflow_analytics/tui/screens/main_screen.py +304 -0
gitflow_analytics/tui/screens/results_screen.py +698 -0
gitflow_analytics/tui/widgets/__init__.py +7 -0
gitflow_analytics/tui/widgets/data_table.py +257 -0
gitflow_analytics/tui/widgets/export_modal.py +301 -0
gitflow_analytics/tui/widgets/progress_widget.py +192 -0
gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
{gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0

gitflow_analytics/models/database.py CHANGED Viewed

@@ -1,24 +1,27 @@
 """Database models for GitFlow Analytics using SQLAlchemy."""
 from datetime import datetime
-from typing import Optional
-from sqlalchemy import create_engine, Column, String, Integer, Float, DateTime, Boolean, JSON, Index
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker, Session
 from pathlib import Path
-Base = declarative_base()
+from sqlalchemy import JSON, Boolean, Column, DateTime, Float, ForeignKey, Index, Integer, String, create_engine
+from sqlalchemy.orm import Session, sessionmaker, declarative_base
+from typing import Any
+Base: Any = declarative_base()
 class CachedCommit(Base):
     """Cached commit analysis results."""
-    __tablename__ = 'cached_commits'
+    __tablename__ = "cached_commits"
     # Primary key
     id = Column(Integer, primary_key=True)
     # Commit identification
     repo_path = Column(String, nullable=False)
     commit_hash = Column(String, nullable=False)
     # Commit data
     author_name = Column(String)
     author_email = Column(String)
@@ -26,108 +29,114 @@ class CachedCommit(Base):
     timestamp = Column(DateTime)
     branch = Column(String)
     is_merge = Column(Boolean, default=False)
     # Metrics
     files_changed = Column(Integer)
     insertions = Column(Integer)
     deletions = Column(Integer)
     complexity_delta = Column(Float)
     # Extracted data
     story_points = Column(Integer, nullable=True)
     ticket_references = Column(JSON)  # List of ticket IDs
     # Cache metadata
     cached_at = Column(DateTime, default=datetime.utcnow)
     cache_version = Column(String, default="1.0")
     # Indexes for performance
     __table_args__ = (
-        Index('idx_repo_commit', 'repo_path', 'commit_hash', unique=True),
-        Index('idx_timestamp', 'timestamp'),
-        Index('idx_cached_at', 'cached_at'),
+        Index("idx_repo_commit", "repo_path", "commit_hash", unique=True),
+        Index("idx_timestamp", "timestamp"),
+        Index("idx_cached_at", "cached_at"),
     )
 class DeveloperIdentity(Base):
     """Developer identity mappings."""
-    __tablename__ = 'developer_identities'
+    __tablename__ = "developer_identities"
     id = Column(Integer, primary_key=True)
     canonical_id = Column(String, unique=True, nullable=False)
     primary_name = Column(String, nullable=False)
     primary_email = Column(String, nullable=False)
     github_username = Column(String, nullable=True)
     # Statistics
     total_commits = Column(Integer, default=0)
     total_story_points = Column(Integer, default=0)
     first_seen = Column(DateTime, default=datetime.utcnow)
     last_seen = Column(DateTime, default=datetime.utcnow)
     # Metadata
     created_at = Column(DateTime, default=datetime.utcnow)
     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
     __table_args__ = (
-        Index('idx_primary_email', 'primary_email'),
-        Index('idx_canonical_id', 'canonical_id'),
+        Index("idx_primary_email", "primary_email"),
+        Index("idx_canonical_id", "canonical_id"),
     )
 class DeveloperAlias(Base):
     """Alternative names/emails for developers."""
-    __tablename__ = 'developer_aliases'
+    __tablename__ = "developer_aliases"
     id = Column(Integer, primary_key=True)
     canonical_id = Column(String, nullable=False)  # Foreign key to DeveloperIdentity
     name = Column(String, nullable=False)
     email = Column(String, nullable=False)
     __table_args__ = (
-        Index('idx_alias_email', 'email'),
-        Index('idx_alias_canonical_id', 'canonical_id'),
-        Index('idx_name_email', 'name', 'email', unique=True),
+        Index("idx_alias_email", "email"),
+        Index("idx_alias_canonical_id", "canonical_id"),
+        Index("idx_name_email", "name", "email", unique=True),
     )
 class PullRequestCache(Base):
     """Cached pull request data."""
-    __tablename__ = 'pull_request_cache'
+    __tablename__ = "pull_request_cache"
     id = Column(Integer, primary_key=True)
     repo_path = Column(String, nullable=False)
     pr_number = Column(Integer, nullable=False)
     # PR data
     title = Column(String)
     description = Column(String)
     author = Column(String)
     created_at = Column(DateTime)
     merged_at = Column(DateTime, nullable=True)
     # Extracted data
     story_points = Column(Integer, nullable=True)
     labels = Column(JSON)  # List of labels
     # Associated commits
     commit_hashes = Column(JSON)  # List of commit hashes
     # Cache metadata
     cached_at = Column(DateTime, default=datetime.utcnow)
-    __table_args__ = (
-        Index('idx_repo_pr', 'repo_path', 'pr_number', unique=True),
-    )
+    __table_args__ = (Index("idx_repo_pr", "repo_path", "pr_number", unique=True),)
 class IssueCache(Base):
     """Cached issue data from various platforms."""
-    __tablename__ = 'issue_cache'
+    __tablename__ = "issue_cache"
     id = Column(Integer, primary_key=True)
     # Issue identification
     platform = Column(String, nullable=False)  # 'jira', 'github', 'clickup', 'linear'
     issue_id = Column(String, nullable=False)
     project_key = Column(String, nullable=False)
     # Issue data
     title = Column(String)
     description = Column(String)
@@ -136,36 +145,164 @@ class IssueCache(Base):
     created_at = Column(DateTime)
     updated_at = Column(DateTime)
     resolved_at = Column(DateTime, nullable=True)
     # Extracted data
     story_points = Column(Integer, nullable=True)
     labels = Column(JSON)
     # Platform-specific data
     platform_data = Column(JSON)  # Additional platform-specific fields
     # Cache metadata
     cached_at = Column(DateTime, default=datetime.utcnow)
+    __table_args__ = (
+        Index("idx_platform_issue", "platform", "issue_id", unique=True),
+        Index("idx_project_key", "project_key"),
+    )
+class QualitativeCommitData(Base):
+    """Extended commit data with qualitative analysis results.
+    This table stores the results of qualitative analysis performed on commits,
+    including change type classification, domain analysis, risk assessment,
+    and processing metadata.
+    """
+    __tablename__ = 'qualitative_commits'
+    # Link to existing commit
+    commit_id = Column(Integer, ForeignKey('cached_commits.id'), primary_key=True)
+    # Classification results
+    change_type = Column(String, nullable=False)
+    change_type_confidence = Column(Float, nullable=False)
+    business_domain = Column(String, nullable=False)
+    domain_confidence = Column(Float, nullable=False)
+    risk_level = Column(String, nullable=False)
+    risk_factors = Column(JSON)  # List of risk factors
+    # Intent and context analysis
+    intent_signals = Column(JSON)  # Intent analysis results
+    collaboration_patterns = Column(JSON)  # Team interaction patterns
+    technical_context = Column(JSON)  # Technical context information
+    # Processing metadata
+    processing_method = Column(String, nullable=False)  # 'nlp' or 'llm'
+    processing_time_ms = Column(Float)
+    confidence_score = Column(Float, nullable=False)
+    # Timestamps
+    analyzed_at = Column(DateTime, default=datetime.utcnow)
+    analysis_version = Column(String, default="1.0")
+    # Indexes for efficient querying
     __table_args__ = (
-        Index('idx_platform_issue', 'platform', 'issue_id', unique=True),
-        Index('idx_project_key', 'project_key'),
+        Index('idx_change_type', 'change_type'),
+        Index('idx_business_domain', 'business_domain'),
+        Index('idx_risk_level', 'risk_level'),
+        Index('idx_qualitative_confidence', 'confidence_score'),
+        Index('idx_processing_method', 'processing_method'),
+        Index('idx_analyzed_at', 'analyzed_at'),
     )
+class PatternCache(Base):
+    """Cache for learned patterns and classifications.
+    This table stores frequently occurring patterns to avoid reprocessing
+    similar commits and to improve classification accuracy over time.
+    """
+    __tablename__ = 'pattern_cache'
+    id = Column(Integer, primary_key=True)
+    # Pattern identification
+    message_hash = Column(String, nullable=False, unique=True)
+    semantic_fingerprint = Column(String, nullable=False)
+    # Cached classification results
+    classification_result = Column(JSON, nullable=False)
+    confidence_score = Column(Float, nullable=False)
+    # Usage tracking for cache management
+    hit_count = Column(Integer, default=1)
+    last_used = Column(DateTime, default=datetime.utcnow)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    # Source tracking
+    source_method = Column(String, nullable=False)  # 'nlp' or 'llm'
+    source_model = Column(String)  # Model/method that created this pattern
+    # Performance tracking
+    avg_processing_time_ms = Column(Float)
+    # Indexes for pattern matching and cleanup
+    __table_args__ = (
+        Index('idx_semantic_fingerprint', 'semantic_fingerprint'),
+        Index('idx_pattern_confidence', 'confidence_score'),
+        Index('idx_hit_count', 'hit_count'),
+        Index('idx_last_used', 'last_used'),
+        Index('idx_source_method', 'source_method'),
+    )
+class LLMUsageStats(Base):
+    """Track LLM usage statistics for cost monitoring and optimization.
+    This table helps monitor LLM API usage, costs, and performance to
+    optimize the balance between speed, accuracy, and cost.
+    """
+    __tablename__ = 'llm_usage_stats'
+    id = Column(Integer, primary_key=True)
+    # API call metadata
+    model_name = Column(String, nullable=False)
+    api_provider = Column(String, default='openrouter')
+    timestamp = Column(DateTime, default=datetime.utcnow)
+    # Usage metrics
+    input_tokens = Column(Integer, nullable=False)
+    output_tokens = Column(Integer, nullable=False)
+    processing_time_ms = Column(Float, nullable=False)
+    # Cost tracking
+    estimated_cost_usd = Column(Float)
+    cost_per_token = Column(Float)
+    # Batch information
+    batch_size = Column(Integer, default=1)  # Number of commits processed
+    batch_id = Column(String)  # Group related calls
+    # Quality metrics
+    avg_confidence_score = Column(Float)
+    success = Column(Boolean, default=True)
+    error_message = Column(String)
+    # Indexes for analysis and monitoring
+    __table_args__ = (
+        Index('idx_model_timestamp', 'model_name', 'timestamp'),
+        Index('idx_llm_timestamp', 'timestamp'),
+        Index('idx_batch_id', 'batch_id'),
+        Index('idx_success', 'success'),
+    )
 class Database:
     """Database connection manager."""
     def __init__(self, db_path: Path):
         """Initialize database connection."""
         db_path.parent.mkdir(parents=True, exist_ok=True)
-        self.engine = create_engine(f'sqlite:///{db_path}')
+        self.engine = create_engine(f"sqlite:///{db_path}")
         Base.metadata.create_all(self.engine)
         self.SessionLocal = sessionmaker(bind=self.engine)
     def get_session(self) -> Session:
         """Get a new database session."""
         return self.SessionLocal()
-    def init_db(self):
+    def init_db(self) -> None:
         """Initialize database tables."""
-        Base.metadata.create_all(self.engine)
+        Base.metadata.create_all(self.engine)

gitflow_analytics/qualitative/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Qualitative data extraction module for GitFlow Analytics.
+This module provides NLP-based analysis of Git commits to extract semantic meaning,
+change types, domain classification, and risk assessment from commit messages and
+file changes.
+Key Components:
+- QualitativeProcessor: Main orchestrator for qualitative analysis
+- NLPEngine: spaCy-based fast processing for most commits
+- LLMFallback: Strategic use of LLMs for uncertain cases
+- Various classifiers for change type, domain, risk, and intent analysis
+"""
+from .core.processor import QualitativeProcessor
+from .models.schemas import (
+    QualitativeCommitData,
+    QualitativeConfig,
+    NLPConfig,
+    LLMConfig,
+    CacheConfig as QualitativeCacheConfig,
+)
+__all__ = [
+    "QualitativeProcessor",
+    "QualitativeCommitData",
+    "QualitativeConfig",
+    "NLPConfig",
+    "LLMConfig",
+    "QualitativeCacheConfig",
+]

gitflow_analytics/qualitative/classifiers/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Classification components for qualitative analysis."""
+from .change_type import ChangeTypeClassifier
+from .domain_classifier import DomainClassifier
+from .intent_analyzer import IntentAnalyzer
+from .risk_analyzer import RiskAnalyzer
+__all__ = [
+    "ChangeTypeClassifier",
+    "DomainClassifier",
+    "IntentAnalyzer",
+    "RiskAnalyzer",
+]

gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

gitflow-analytics 1.0.0py3-none-any.whl → 1.0.3py3-none-any.whl