PyPI - greenmining - Versions diffs - 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

greenmining 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

greenmining/__init__.py +11 -29
greenmining/__main__.py +9 -3
greenmining/__version__.py +2 -2
greenmining/analyzers/__init__.py +3 -7
greenmining/analyzers/code_diff_analyzer.py +151 -61
greenmining/analyzers/qualitative_analyzer.py +15 -81
greenmining/analyzers/statistical_analyzer.py +8 -69
greenmining/analyzers/temporal_analyzer.py +16 -72
greenmining/config.py +105 -58
greenmining/controllers/__init__.py +1 -5
greenmining/controllers/repository_controller.py +153 -94
greenmining/energy/__init__.py +13 -0
greenmining/energy/base.py +165 -0
greenmining/energy/codecarbon_meter.py +146 -0
greenmining/energy/rapl.py +157 -0
greenmining/gsf_patterns.py +4 -26
greenmining/models/__init__.py +1 -5
greenmining/models/aggregated_stats.py +4 -4
greenmining/models/analysis_result.py +4 -4
greenmining/models/commit.py +5 -5
greenmining/models/repository.py +5 -5
greenmining/presenters/__init__.py +1 -5
greenmining/presenters/console_presenter.py +24 -24
greenmining/services/__init__.py +10 -6
greenmining/services/commit_extractor.py +8 -152
greenmining/services/data_aggregator.py +45 -175
greenmining/services/data_analyzer.py +9 -202
greenmining/services/github_fetcher.py +212 -323
greenmining/services/github_graphql_fetcher.py +371 -0
greenmining/services/local_repo_analyzer.py +387 -0
greenmining/services/reports.py +33 -137
greenmining/utils.py +21 -149
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/METADATA +61 -151
greenmining-1.0.4.dist-info/RECORD +37 -0
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/WHEEL +1 -1
greenmining/analyzers/ml_feature_extractor.py +0 -512
greenmining/analyzers/nlp_analyzer.py +0 -365
greenmining/cli.py +0 -471
greenmining/main.py +0 -37
greenmining-1.0.3.dist-info/RECORD +0 -36
greenmining-1.0.3.dist-info/entry_points.txt +0 -2
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/licenses/LICENSE +0 -0
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/top_level.txt +0 -0

greenmining/analyzers/temporal_analyzer.py CHANGED Viewed

@@ -1,18 +1,4 @@
-"""
-Temporal and Historical Analysis for Green Software Practices
-Implements time-series analysis from Soliman et al. (2017):
-- Adoption trend analysis (when practices emerged)
-- Velocity analysis (commit frequency over time)
-- Pattern evolution tracking (which practices dominated when)
-- Temporal correlations (do practices cluster in time?)
-Addresses research questions:
-1. When did green practices emerge in software development?
-2. Are green practices increasing or decreasing over time?
-3. Which practices were early vs. late adopters?
-4. Do green practices correlate with project maturity?
-"""
+# Temporal and Historical Analysis for Green Software Practices
 from __future__ import annotations
@@ -25,7 +11,7 @@ import statistics
 @dataclass
 class TemporalMetrics:
-    """Metrics for a specific time period"""
+    # Metrics for a specific time period
     period: str
     start_date: datetime
@@ -40,7 +26,7 @@ class TemporalMetrics:
 @dataclass
 class TrendAnalysis:
-    """Trend analysis results"""
+    # Trend analysis results
     trend_direction: str  # 'increasing', 'decreasing', 'stable'
     slope: float
@@ -51,35 +37,16 @@ class TrendAnalysis:
 class TemporalAnalyzer:
-    """
-    Analyze temporal patterns in green software adoption.
-    Based on Soliman et al.: "Time-range filtering is standard practice"
-    Extends with: trend detection, velocity analysis, evolution tracking
-    """
+    # Analyze temporal patterns in green software adoption.
     def __init__(self, granularity: str = "quarter"):
-        """
-        Initialize temporal analyzer.
-        Args:
-            granularity: Time period granularity ('day', 'week', 'month', 'quarter', 'year')
-        """
+        # Initialize temporal analyzer.
         self.granularity = granularity
     def group_commits_by_period(
         self, commits: List[Dict], date_field: str = "date"
     ) -> Dict[str, List[Dict]]:
-        """
-        Group commits into time periods.
-        Args:
-            commits: List of commit dictionaries
-            date_field: Field containing commit date
-        Returns:
-            Dictionary mapping period strings to commit lists
-        """
+        # Group commits into time periods.
         periods = defaultdict(list)
         for commit in commits:
@@ -103,7 +70,7 @@ class TemporalAnalyzer:
         return dict(periods)
     def _get_period_key(self, date: datetime) -> str:
-        """Get period key for a date based on granularity."""
+        # Get period key for a date based on granularity.
         if self.granularity == "day":
             return date.strftime("%Y-%m-%d")
         elif self.granularity == "week":
@@ -120,7 +87,7 @@ class TemporalAnalyzer:
             return date.strftime("%Y-%m")
     def _parse_period_key(self, period_key: str) -> Tuple[datetime, datetime]:
-        """Parse period key back to start and end dates."""
+        # Parse period key back to start and end dates.
         if "W" in period_key:
             # Week format: 2024-W15
             year, week = period_key.split("-W")
@@ -173,17 +140,7 @@ class TemporalAnalyzer:
     def calculate_period_metrics(
         self, period_key: str, commits: List[Dict], analysis_results: List[Dict]
     ) -> TemporalMetrics:
-        """
-        Calculate metrics for a time period.
-        Args:
-            period_key: Period identifier
-            commits: Commits in this period
-            analysis_results: Pattern analysis results for commits
-        Returns:
-            TemporalMetrics object
-        """
+        # Calculate metrics for a time period.
         start_date, end_date = self._parse_period_key(period_key)
         # Count green commits
@@ -229,20 +186,7 @@ class TemporalAnalyzer:
         )
     def analyze_trends(self, commits: List[Dict], analysis_results: List[Dict]) -> Dict:
-        """
-        Comprehensive temporal trend analysis.
-        Args:
-            commits: All commits to analyze
-            analysis_results: Pattern analysis results
-        Returns:
-            Dictionary with:
-            - periods: List of TemporalMetrics
-            - trend: TrendAnalysis
-            - adoption_curve: List of (period, cumulative_rate)
-            - velocity_trend: Velocity change over time
-        """
+        # Comprehensive temporal trend analysis.
         # Group by periods
         grouped = self.group_commits_by_period(commits)
@@ -284,7 +228,7 @@ class TemporalAnalyzer:
         }
     def _calculate_trend(self, periods: List[TemporalMetrics]) -> Optional[TrendAnalysis]:
-        """Calculate linear trend using least squares regression."""
+        # Calculate linear trend using least squares regression.
         if len(periods) < 2:
             return None
@@ -332,7 +276,7 @@ class TemporalAnalyzer:
         )
     def _calculate_adoption_curve(self, periods: List[TemporalMetrics]) -> List[Tuple[str, float]]:
-        """Calculate cumulative adoption over time."""
+        # Calculate cumulative adoption over time.
         cumulative_green = 0
         cumulative_total = 0
         curve = []
@@ -348,7 +292,7 @@ class TemporalAnalyzer:
         return curve
     def _calculate_velocity_trend(self, periods: List[TemporalMetrics]) -> Dict:
-        """Analyze velocity changes over time."""
+        # Analyze velocity changes over time.
         if not periods:
             return {}
@@ -365,7 +309,7 @@ class TemporalAnalyzer:
     def _analyze_pattern_evolution(
         self, periods: List[TemporalMetrics], analysis_results: List[Dict]
     ) -> Dict:
-        """Track when different patterns emerged and dominated."""
+        # Track when different patterns emerged and dominated.
         pattern_timeline = defaultdict(lambda: {"first_seen": None, "occurrences_by_period": {}})
         for period in periods:
@@ -406,7 +350,7 @@ class TemporalAnalyzer:
         }
     def _metrics_to_dict(self, metrics: TemporalMetrics) -> Dict:
-        """Convert TemporalMetrics to dictionary."""
+        # Convert TemporalMetrics to dictionary.
         return {
             "period": metrics.period,
             "start_date": metrics.start_date.isoformat(),
@@ -420,7 +364,7 @@ class TemporalAnalyzer:
         }
     def _trend_to_dict(self, trend: Optional[TrendAnalysis]) -> Dict:
-        """Convert TrendAnalysis to dictionary."""
+        # Convert TrendAnalysis to dictionary.
         if not trend:
             return {}

greenmining/config.py CHANGED Viewed

@@ -1,72 +1,93 @@
-"""Configuration management for green microservices mining CLI."""
 import os
 from pathlib import Path
+from typing import Any, Dict, List, Optional
 from dotenv import load_dotenv
-class Config:
-    """Configuration class for loading and validating environment variables."""
+def _load_yaml_config(yaml_path: Path) -> Dict[str, Any]:
+    # Load configuration from YAML file if it exists.
+    if not yaml_path.exists():
+        return {}
+    try:
+        import yaml
+        with open(yaml_path, 'r') as f:
+            return yaml.safe_load(f) or {}
+    except ImportError:
+        return {}
+    except Exception:
+        return {}
-    def __init__(self, env_file: str = ".env"):
-        """Initialize configuration from environment file.
-        Args:
-            env_file: Path to .env file
-        """
+class Config:
+    # Configuration class for loading from env vars and YAML.
+    def __init__(self, env_file: str = ".env", yaml_file: str = "greenmining.yaml"):
+        # Initialize configuration from environment and YAML file.
         # Load environment variables
         env_path = Path(env_file)
         if env_path.exists():
             load_dotenv(env_path)
         else:
             load_dotenv()  # Load from system environment
+        # Load YAML config (takes precedence for certain options)
+        yaml_path = Path(yaml_file)
+        self._yaml_config = _load_yaml_config(yaml_path)
         # GitHub API Configuration
         self.GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
         if not self.GITHUB_TOKEN or self.GITHUB_TOKEN == "your_github_pat_here":
             raise ValueError("GITHUB_TOKEN not set. Please set it in .env file or environment.")
-        # Analysis Type - Using GitHub Copilot for AI-powered analysis
+        # Analysis Type
         self.ANALYSIS_TYPE = "keyword_heuristic"
-        # Search and Processing Configuration
-        self.GITHUB_SEARCH_KEYWORDS = ["microservices", "microservice-architecture", "cloud-native"]
-        self.SUPPORTED_LANGUAGES = [
-            "Java",
-            "Python",
-            "Go",
-            "JavaScript",
-            "TypeScript",
-            "C#",
-            "Rust",
-        ]
-        # Repository and Commit Limits
-        self.MIN_STARS = int(os.getenv("MIN_STARS", "100"))
-        self.MAX_REPOS = int(os.getenv("MAX_REPOS", "100"))
-        self.COMMITS_PER_REPO = int(os.getenv("COMMITS_PER_REPO", "50"))
-        self.DAYS_BACK = int(os.getenv("DAYS_BACK", "730"))  # 2 years
+        # Search and Processing Configuration (YAML: sources.search.keywords)
+        yaml_search = self._yaml_config.get("sources", {}).get("search", {})
+        self.GITHUB_SEARCH_KEYWORDS = yaml_search.get("keywords",
+            ["microservices", "microservice-architecture", "cloud-native"])
+        # Supported Languages (YAML: sources.search.languages)
+        self.SUPPORTED_LANGUAGES = yaml_search.get("languages", [
+            "Java", "Python", "Go", "JavaScript", "TypeScript", "C#", "Rust",
+        ])
-        # Advanced Analyzer Configuration
+        # Repository and Commit Limits (YAML: extraction.*)
+        yaml_extraction = self._yaml_config.get("extraction", {})
+        self.MIN_STARS = yaml_search.get("min_stars", int(os.getenv("MIN_STARS", "100")))
+        self.MAX_REPOS = int(os.getenv("MAX_REPOS", "100"))
+        self.COMMITS_PER_REPO = yaml_extraction.get("max_commits",
+            int(os.getenv("COMMITS_PER_REPO", "50")))
+        self.DAYS_BACK = yaml_extraction.get("days_back",
+            int(os.getenv("DAYS_BACK", "730")))
+        self.SKIP_MERGES = yaml_extraction.get("skip_merges", True)
+        # Analysis Configuration (YAML: analysis.*)
+        yaml_analysis = self._yaml_config.get("analysis", {})
         self.ENABLE_NLP_ANALYSIS = os.getenv("ENABLE_NLP_ANALYSIS", "false").lower() == "true"
         self.ENABLE_TEMPORAL_ANALYSIS = (
             os.getenv("ENABLE_TEMPORAL_ANALYSIS", "false").lower() == "true"
         )
-        self.TEMPORAL_GRANULARITY = os.getenv(
-            "TEMPORAL_GRANULARITY", "quarter"
-        )  # day, week, month, quarter, year
+        self.TEMPORAL_GRANULARITY = os.getenv("TEMPORAL_GRANULARITY", "quarter")
         self.ENABLE_ML_FEATURES = os.getenv("ENABLE_ML_FEATURES", "false").lower() == "true"
         self.VALIDATION_SAMPLE_SIZE = int(os.getenv("VALIDATION_SAMPLE_SIZE", "30"))
-        # Temporal Filtering (NEW)
-        self.CREATED_AFTER = os.getenv("CREATED_AFTER")  # YYYY-MM-DD
-        self.CREATED_BEFORE = os.getenv("CREATED_BEFORE")  # YYYY-MM-DD
-        self.PUSHED_AFTER = os.getenv("PUSHED_AFTER")  # YYYY-MM-DD
-        self.PUSHED_BEFORE = os.getenv("PUSHED_BEFORE")  # YYYY-MM-DD
-        self.COMMIT_DATE_FROM = os.getenv("COMMIT_DATE_FROM")  # YYYY-MM-DD
-        self.COMMIT_DATE_TO = os.getenv("COMMIT_DATE_TO")  # YYYY-MM-DD
+        # PyDriller options (YAML: analysis.process_metrics, etc.)
+        self.PROCESS_METRICS_ENABLED = yaml_analysis.get("process_metrics",
+            os.getenv("PROCESS_METRICS_ENABLED", "true").lower() == "true")
+        self.STRUCTURAL_METRICS_ENABLED = yaml_analysis.get("structural_metrics",
+            os.getenv("STRUCTURAL_METRICS_ENABLED", "true").lower() == "true")
+        self.DMM_ENABLED = yaml_analysis.get("delta_maintainability",
+            os.getenv("DMM_ENABLED", "true").lower() == "true")
+        # Temporal Filtering
+        self.CREATED_AFTER = os.getenv("CREATED_AFTER")
+        self.CREATED_BEFORE = os.getenv("CREATED_BEFORE")
+        self.PUSHED_AFTER = os.getenv("PUSHED_AFTER")
+        self.PUSHED_BEFORE = os.getenv("PUSHED_BEFORE")
+        self.COMMIT_DATE_FROM = os.getenv("COMMIT_DATE_FROM")
+        self.COMMIT_DATE_TO = os.getenv("COMMIT_DATE_TO")
         self.MIN_COMMITS = int(os.getenv("MIN_COMMITS", "0"))
         self.ACTIVITY_WINDOW_DAYS = int(os.getenv("ACTIVITY_WINDOW_DAYS", "730"))
@@ -76,11 +97,13 @@ class Config:
         # Processing Configuration
         self.TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", "30"))
         self.MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
-        self.RETRY_DELAY = 2  # seconds
+        self.RETRY_DELAY = 2
         self.EXPONENTIAL_BACKOFF = True
-        # Output Configuration
-        self.OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "./data"))
+        # Output Configuration (YAML: output.directory)
+        yaml_output = self._yaml_config.get("output", {})
+        self.OUTPUT_DIR = Path(yaml_output.get("directory",
+            os.getenv("OUTPUT_DIR", "./data")))
         self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
         # File Paths
@@ -92,17 +115,48 @@ class Config:
         self.REPORT_FILE = self.OUTPUT_DIR / "green_microservices_analysis.md"
         self.CHECKPOINT_FILE = self.OUTPUT_DIR / "checkpoint.json"
+        # Direct Repository URL Support (YAML: sources.urls)
+        yaml_urls = self._yaml_config.get("sources", {}).get("urls", [])
+        env_urls = self._parse_repository_urls(os.getenv("REPOSITORY_URLS", ""))
+        self.REPOSITORY_URLS: List[str] = yaml_urls if yaml_urls else env_urls
+        # Clone path (YAML: extraction.clone_path)
+        self.CLONE_PATH = Path(yaml_extraction.get("clone_path",
+            os.getenv("CLONE_PATH", "/tmp/greenmining_repos")))
+        self.CLEANUP_AFTER_ANALYSIS = (
+            os.getenv("CLEANUP_AFTER_ANALYSIS", "true").lower() == "true"
+        )
+        # Energy Measurement (YAML: energy.*)
+        yaml_energy = self._yaml_config.get("energy", {})
+        self.ENERGY_ENABLED = yaml_energy.get("enabled",
+            os.getenv("ENERGY_ENABLED", "false").lower() == "true")
+        self.ENERGY_BACKEND = yaml_energy.get("backend",
+            os.getenv("ENERGY_BACKEND", "rapl"))
+        self.CARBON_TRACKING = yaml_energy.get("carbon_tracking",
+            os.getenv("CARBON_TRACKING", "false").lower() == "true")
+        self.COUNTRY_ISO = yaml_energy.get("country_iso",
+            os.getenv("COUNTRY_ISO", "USA"))
+        # Power profiling (YAML: energy.power_profiling.*)
+        yaml_power = yaml_energy.get("power_profiling", {})
+        self.POWER_PROFILING_ENABLED = yaml_power.get("enabled", False)
+        self.POWER_TEST_COMMAND = yaml_power.get("test_command", None)
+        self.POWER_REGRESSION_THRESHOLD = yaml_power.get("regression_threshold", 5.0)
         # Logging
         self.VERBOSE = os.getenv("VERBOSE", "false").lower() == "true"
         self.LOG_FILE = self.OUTPUT_DIR / "mining.log"
-    def validate(self) -> bool:
-        """Validate that all required configuration is present.
+    def _parse_repository_urls(self, urls_str: str) -> List[str]:
+        # Parse comma-separated repository URLs from environment variable.
+        if not urls_str:
+            return []
+        return [url.strip() for url in urls_str.split(",") if url.strip()]
-        Returns:
-            True if configuration is valid
-        """
-        required_attrs = ["GITHUB_TOKEN", "CLAUDE_API_KEY", "MAX_REPOS", "COMMITS_PER_REPO"]
+    def validate(self) -> bool:
+        # Validate that all required configuration is present.
+        required_attrs = ["GITHUB_TOKEN", "MAX_REPOS", "COMMITS_PER_REPO"]
         for attr in required_attrs:
             if not getattr(self, attr, None):
@@ -111,7 +165,7 @@ class Config:
         return True
     def __repr__(self) -> str:
-        """String representation of configuration (hiding sensitive data)."""
+        # String representation of configuration (hiding sensitive data).
         return (
             f"Config("
             f"MAX_REPOS={self.MAX_REPOS}, "
@@ -127,14 +181,7 @@ _config_instance = None
 def get_config(env_file: str = ".env") -> Config:
-    """Get or create global configuration instance.
-    Args:
-        env_file: Path to .env file
-    Returns:
-        Config instance
-    """
+    # Get or create global configuration instance.
     global _config_instance
     if _config_instance is None:
         _config_instance = Config(env_file)

greenmining/controllers/__init__.py CHANGED Viewed

@@ -1,8 +1,4 @@
-"""
-Controllers Package - Business logic and orchestration for mining operations.
-Controllers coordinate between models, services, and presenters following MCP architecture.
-"""
+# Controllers Package - Business logic and orchestration for mining operations.
 from .repository_controller import RepositoryController

greenmining 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

greenmining 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl