PyPI - greenmining - Versions diffs - 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

greenmining 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

greenmining/__init__.py +11 -29
greenmining/__main__.py +9 -3
greenmining/__version__.py +2 -2
greenmining/analyzers/__init__.py +3 -7
greenmining/analyzers/code_diff_analyzer.py +151 -61
greenmining/analyzers/qualitative_analyzer.py +15 -81
greenmining/analyzers/statistical_analyzer.py +8 -69
greenmining/analyzers/temporal_analyzer.py +16 -72
greenmining/config.py +105 -58
greenmining/controllers/__init__.py +1 -5
greenmining/controllers/repository_controller.py +153 -94
greenmining/energy/__init__.py +13 -0
greenmining/energy/base.py +165 -0
greenmining/energy/codecarbon_meter.py +146 -0
greenmining/energy/rapl.py +157 -0
greenmining/gsf_patterns.py +4 -26
greenmining/models/__init__.py +1 -5
greenmining/models/aggregated_stats.py +4 -4
greenmining/models/analysis_result.py +4 -4
greenmining/models/commit.py +5 -5
greenmining/models/repository.py +5 -5
greenmining/presenters/__init__.py +1 -5
greenmining/presenters/console_presenter.py +24 -24
greenmining/services/__init__.py +10 -6
greenmining/services/commit_extractor.py +8 -152
greenmining/services/data_aggregator.py +45 -175
greenmining/services/data_analyzer.py +9 -202
greenmining/services/github_fetcher.py +212 -323
greenmining/services/github_graphql_fetcher.py +371 -0
greenmining/services/local_repo_analyzer.py +387 -0
greenmining/services/reports.py +33 -137
greenmining/utils.py +21 -149
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/METADATA +61 -151
greenmining-1.0.4.dist-info/RECORD +37 -0
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/WHEEL +1 -1
greenmining/analyzers/ml_feature_extractor.py +0 -512
greenmining/analyzers/nlp_analyzer.py +0 -365
greenmining/cli.py +0 -471
greenmining/main.py +0 -37
greenmining-1.0.3.dist-info/RECORD +0 -36
greenmining-1.0.3.dist-info/entry_points.txt +0 -2
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/licenses/LICENSE +0 -0
{greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/top_level.txt +0 -0

greenmining/services/reports.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Report generation for green mining analysis."""
+# Report generation for green mining analysis.
 from __future__ import annotations
@@ -7,8 +7,6 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
-import click
 from greenmining.config import get_config
 from greenmining.utils import (
     colored_print,
@@ -20,10 +18,10 @@ from greenmining.utils import (
 class ReportGenerator:
-    """Generates markdown report from aggregated statistics."""
+    # Generates markdown report from aggregated statistics.
     def __init__(self):
-        """Initialize report generator."""
+        # Initialize report generator.
         pass
     def generate_report(
@@ -32,16 +30,7 @@ class ReportGenerator:
         analysis_data: dict[str, Any],
         repos_data: dict[str, Any],
     ) -> str:
-        """Generate comprehensive markdown report.
-        Args:
-            aggregated_data: Aggregated statistics
-            analysis_data: Original analysis results
-            repos_data: Repository metadata
-        Returns:
-            Markdown report content
-        """
+        # Generate comprehensive markdown report.
         report_sections = []
         # Title and metadata
@@ -68,7 +57,7 @@ class ReportGenerator:
         return "\n\n".join(report_sections)
     def _generate_header(self) -> str:
-        """Generate report header."""
+        # Generate report header.
         return f"""# Mining Software Repositories for Green Microservices
 ## Comprehensive Analysis Report
@@ -78,7 +67,7 @@ class ReportGenerator:
 ---"""
     def _generate_executive_summary(self, data: dict[str, Any]) -> str:
-        """Generate executive summary."""
+        # Generate executive summary.
         summary = data["summary"]
         top_patterns = data["known_patterns"][:3] if data["known_patterns"] else []
@@ -106,7 +95,7 @@ These findings suggest that while green software practices are present in micros
     def _generate_methodology(
         self, repos_data: dict[str, Any], analysis_data: dict[str, Any]
     ) -> str:
-        """Generate methodology section."""
+        # Generate methodology section.
         metadata = repos_data.get("metadata", {})
         analysis_metadata = analysis_data.get("metadata", {})
@@ -167,7 +156,7 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
 - 2-year time window may not capture all historical practices"""
     def _generate_results(self, data: dict[str, Any]) -> str:
-        """Generate results section."""
+        # Generate results section.
         sections = []
         # 2.1 Green Awareness
@@ -182,15 +171,15 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
         # 2.4 Per-Repository Analysis
         sections.append(self._generate_repo_analysis_section(data))
-        # 2.5 Enhanced Statistics (if available)
-        enhanced_section = self._generate_enhanced_statistics_section(data)
-        if enhanced_section:
-            sections.append(enhanced_section)
+        # 2.5 Statistics (if available)
+        stats_section = self._generate_statistics_section(data)
+        if stats_section:
+            sections.append(stats_section)
         return "### 2. Results\n\n" + "\n\n".join(sections)
     def _generate_green_awareness_section(self, data: dict[str, Any]) -> str:
-        """Generate green awareness subsection."""
+        # Generate green awareness subsection.
         summary = data["summary"]
         per_lang = data["per_language_stats"]
         per_repo = data["per_repo_stats"]
@@ -219,7 +208,7 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
 {lang_table}"""
     def _generate_known_patterns_section(self, data: dict[str, Any]) -> str:
-        """Generate known patterns subsection."""
+        # Generate known patterns subsection.
         patterns = data["known_patterns"]
         if not patterns:
@@ -259,7 +248,7 @@ The following table summarizes the known green software patterns detected in the
 {chr(10).join(pattern_details)}"""
     def _generate_emergent_patterns_section(self, data: dict[str, Any]) -> str:
-        """Generate emergent patterns subsection."""
+        # Generate emergent patterns subsection.
         emergent = data["emergent_patterns"]
         if not emergent:
@@ -281,7 +270,7 @@ No novel microservice-specific green practices were automatically detected. Manu
 {chr(10).join(pattern_list)}"""
     def _generate_repo_analysis_section(self, data: dict[str, Any]) -> str:
-        """Generate per-repository analysis subsection."""
+        # Generate per-repository analysis subsection.
         per_repo = data["per_repo_stats"]
         # Top 10 greenest
@@ -307,37 +296,30 @@ No novel microservice-specific green practices were automatically detected. Manu
 **Repositories with No Green Mentions:** {no_green_count} out of {len(per_repo)} repositories had zero green-aware commits."""
-    def _generate_enhanced_statistics_section(self, data: dict[str, Any]) -> str:
-        """Generate enhanced statistical analysis subsection.
-        Args:
-            data: Aggregated data containing enhanced_statistics field
-        Returns:
-            Markdown section with enhanced statistics
-        """
-        enhanced_stats = data.get("enhanced_statistics")
+    def _generate_statistics_section(self, data: dict[str, Any]) -> str:
+        # Generate statistical analysis subsection.
+        stats = data.get("statistics")
-        if not enhanced_stats:
+        if not stats:
             return ""
         # Handle error case
-        if "error" in enhanced_stats:
-            return f"""#### 2.5 Enhanced Statistical Analysis
+        if "error" in stats:
+            return f"""#### 2.5 Statistical Analysis
-**Note:** Enhanced statistical analysis encountered an error: {enhanced_stats['error']}
+**Note:** Statistical analysis encountered an error: {stats['error']}
 """
         sections = []
-        sections.append("#### 2.5 Enhanced Statistical Analysis")
+        sections.append("#### 2.5 Statistical Analysis")
         sections.append("")
         sections.append(
-            "This section presents advanced statistical analyses of green software engineering patterns."
+            "This section presents statistical analyses of green software engineering patterns."
         )
         sections.append("")
         # Temporal trends
-        temporal = enhanced_stats.get("temporal_trends", {})
+        temporal = stats.get("temporal_trends", {})
         if temporal and "error" not in temporal:
             sections.append("##### Temporal Trends")
             sections.append("")
@@ -362,7 +344,7 @@ No novel microservice-specific green practices were automatically detected. Manu
                 sections.append("")
         # Pattern correlations
-        correlations = enhanced_stats.get("pattern_correlations", {})
+        correlations = stats.get("pattern_correlations", {})
         if correlations and "error" not in correlations:
             sections.append("##### Pattern Correlations")
             sections.append("")
@@ -383,7 +365,7 @@ No novel microservice-specific green practices were automatically detected. Manu
                 sections.append("")
         # Effect sizes
-        effect_sizes = enhanced_stats.get("effect_size", {})
+        effect_sizes = stats.get("effect_size", {})
         if effect_sizes and "error" not in effect_sizes:
             sections.append("##### Effect Size Analysis")
             sections.append("")
@@ -398,7 +380,7 @@ No novel microservice-specific green practices were automatically detected. Manu
                 sections.append("")
         # Descriptive statistics
-        descriptive = enhanced_stats.get("descriptive", {})
+        descriptive = stats.get("descriptive", {})
         if descriptive and "error" not in descriptive:
             sections.append("##### Descriptive Statistics")
             sections.append("")
@@ -422,7 +404,7 @@ No novel microservice-specific green practices were automatically detected. Manu
         return "\n".join(sections)
     def _generate_discussion(self, data: dict[str, Any]) -> str:
-        """Generate discussion section."""
+        # Generate discussion section.
         summary = data["summary"]
         green_pct = summary["green_aware_percentage"]
@@ -473,7 +455,7 @@ Based on the detected patterns, microservice developers primarily focus on:
 4. **Best practices dissemination:** Green microservices patterns should be documented and promoted in the community"""
     def _generate_limitations(self) -> str:
-        """Generate limitations section."""
+        # Generate limitations section.
         return """### 4. Limitations
 #### 4.1 Sample Size and Selection Bias
@@ -505,7 +487,7 @@ Based on the detected patterns, microservice developers primarily focus on:
 5. **Energy measurement:** Correlate detected patterns with actual energy consumption data"""
     def _generate_conclusion(self, data: dict[str, Any]) -> str:
-        """Generate conclusion section."""
+        # Generate conclusion section.
         summary = data["summary"]
         top_patterns = (
             [p["pattern_name"] for p in data["known_patterns"][:5]]
@@ -559,94 +541,8 @@ Answer: Automated keyword analysis found limited evidence of novel patterns. Man
 *For questions or additional analysis, please refer to the accompanying data files: `green_analysis_results.csv` and `aggregated_statistics.json`*"""
     def save_report(self, report_content: str, output_file: Path):
-        """Save report to markdown file."""
+        # Save report to markdown file.
         output_file.parent.mkdir(parents=True, exist_ok=True)
         with open(output_file, "w", encoding="utf-8") as f:
             f.write(report_content)
         colored_print(f"Saved report to {output_file}", "green")
-@click.command()
-@click.option("--aggregated-file", default=None, help="Input aggregated statistics file")
-@click.option("--analysis-file", default=None, help="Input analysis results file")
-@click.option("--repos-file", default=None, help="Input repositories file")
-@click.option(
-    "--output-file",
-    default=None,
-    help="Output markdown file (default: data/green_microservices_analysis.md)",
-)
-@click.option("--config-file", default=".env", help="Path to .env configuration file")
-def report(
-    aggregated_file: Optional[str],
-    analysis_file: Optional[str],
-    repos_file: Optional[str],
-    output_file: Optional[str],
-    config_file: str,
-):
-    """Generate comprehensive markdown report."""
-    print_banner("Report Generator")
-    try:
-        # Load configuration
-        config = get_config(config_file)
-        # Determine input/output files
-        agg_input = Path(aggregated_file) if aggregated_file else config.AGGREGATED_FILE
-        analysis_input = Path(analysis_file) if analysis_file else config.ANALYSIS_FILE
-        repos_input = Path(repos_file) if repos_file else config.REPOS_FILE
-        output = Path(output_file) if output_file else config.REPORT_FILE
-        # Check if input files exist
-        missing_files = []
-        if not agg_input.exists():
-            missing_files.append(str(agg_input))
-        if not analysis_input.exists():
-            missing_files.append(str(analysis_input))
-        if not repos_input.exists():
-            missing_files.append(str(repos_input))
-        if missing_files:
-            colored_print("Missing required input files:", "red")
-            for f in missing_files:
-                colored_print(f"  - {f}", "red")
-            colored_print(
-                "\nPlease run the full pipeline first: fetch → extract → analyze → aggregate",
-                "yellow",
-            )
-            exit(1)
-        # Load data
-        colored_print("Loading data files...", "blue")
-        aggregated_data = load_json_file(agg_input)
-        analysis_data = load_json_file(analysis_input)
-        repos_data = load_json_file(repos_input)
-        colored_print("✓ Data loaded successfully", "green")
-        # Generate report
-        colored_print("\nGenerating report...", "blue")
-        generator = ReportGenerator()
-        report_content = generator.generate_report(aggregated_data, analysis_data, repos_data)
-        # Save report
-        generator.save_report(report_content, output)
-        colored_print("\n✓ Report generated successfully!", "green")
-        colored_print(f"Output: {output}", "green")
-        colored_print(f"Report size: {len(report_content):,} characters", "white")
-    except FileNotFoundError as e:
-        colored_print(f"File not found: {e}", "red")
-        exit(1)
-    except json.JSONDecodeError as e:
-        colored_print(f"Invalid JSON: {e}", "red")
-        exit(1)
-    except Exception as e:
-        colored_print(f"Error: {e}", "red")
-        import traceback
-        traceback.print_exc()
-        exit(1)
-if __name__ == "__main__":
-    report()

greenmining/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Utility functions for green microservices mining CLI."""
+# Utility functions for green microservices mining CLI.
 import json
 import time
@@ -15,32 +15,14 @@ init(autoreset=True)
 def format_timestamp(dt: Optional[datetime] = None) -> str:
-    """Format timestamp in ISO 8601 format.
-    Args:
-        dt: Datetime object, defaults to now
-    Returns:
-        ISO formatted timestamp string
-    """
+    # Format timestamp in ISO 8601 format.
     if dt is None:
         dt = datetime.utcnow()
     return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
 def load_json_file(path: Path) -> dict[str, Any]:
-    """Load JSON data from file.
-    Args:
-        path: Path to JSON file
-    Returns:
-        Parsed JSON data
-    Raises:
-        FileNotFoundError: If file doesn't exist
-        json.JSONDecodeError: If file is not valid JSON
-    """
+    # Load JSON data from file.
     if not path.exists():
         raise FileNotFoundError(f"File not found: {path}")
@@ -49,13 +31,7 @@ def load_json_file(path: Path) -> dict[str, Any]:
 def save_json_file(data: dict[str, Any], path: Path, indent: int = 2) -> None:
-    """Save data to JSON file.
-    Args:
-        data: Data to save
-        path: Output file path
-        indent: JSON indentation level
-    """
+    # Save data to JSON file.
     path.parent.mkdir(parents=True, exist_ok=True)
     with open(path, "w", encoding="utf-8") as f:
@@ -63,17 +39,7 @@ def save_json_file(data: dict[str, Any], path: Path, indent: int = 2) -> None:
 def load_csv_file(path: Path) -> pd.DataFrame:
-    """Load CSV file as pandas DataFrame.
-    Args:
-        path: Path to CSV file
-    Returns:
-        DataFrame with CSV data
-    Raises:
-        FileNotFoundError: If file doesn't exist
-    """
+    # Load CSV file as pandas DataFrame.
     if not path.exists():
         raise FileNotFoundError(f"File not found: {path}")
@@ -81,40 +47,18 @@ def load_csv_file(path: Path) -> pd.DataFrame:
 def save_csv_file(df: pd.DataFrame, path: Path) -> None:
-    """Save DataFrame to CSV file.
-    Args:
-        df: DataFrame to save
-        path: Output file path
-    """
+    # Save DataFrame to CSV file.
     path.parent.mkdir(parents=True, exist_ok=True)
     df.to_csv(path, index=False, encoding="utf-8")
 def estimate_tokens(text: str) -> int:
-    """Estimate number of tokens in text.
-    Uses rough approximation: 1 token ≈ 4 characters
-    Args:
-        text: Input text
-    Returns:
-        Estimated token count
-    """
+    # Estimate number of tokens in text.
     return len(text) // 4
 def estimate_cost(tokens: int, model: str = "claude-sonnet-4-20250514") -> float:
-    """Estimate API cost based on token usage.
-    Args:
-        tokens: Number of tokens
-        model: Model name
-    Returns:
-        Estimated cost in USD
-    """
+    # Estimate API cost based on token usage.
     # Claude Sonnet 4 pricing (as of Dec 2024)
     # Input: $3 per million tokens
     # Output: $15 per million tokens
@@ -135,17 +79,7 @@ def retry_on_exception(
     exponential_backoff: bool = True,
     exceptions: tuple = (Exception,),
 ) -> Callable:
-    """Decorator to retry function on exception.
-    Args:
-        max_retries: Maximum number of retry attempts
-        delay: Initial delay between retries in seconds
-        exponential_backoff: Use exponential backoff for delays
-        exceptions: Tuple of exception types to catch
-    Returns:
-        Decorated function
-    """
+    # Decorator to retry function on exception.
     def decorator(func: Callable) -> Callable:
         @wraps(func)
@@ -175,12 +109,7 @@ def retry_on_exception(
 def colored_print(text: str, color: str = "white") -> None:
-    """Print colored text to console.
-    Args:
-        text: Text to print
-        color: Color name (red, green, yellow, blue, magenta, cyan, white)
-    """
+    # Print colored text to console.
     color_map = {
         "red": Fore.RED,
         "green": Fore.GREEN,
@@ -196,14 +125,7 @@ def colored_print(text: str, color: str = "white") -> None:
 def handle_github_rate_limit(response) -> None:
-    """Handle GitHub API rate limiting.
-    Args:
-        response: GitHub API response object
-    Raises:
-        Exception: If rate limit is exceeded
-    """
+    # Handle GitHub API rate limiting.
     if hasattr(response, "status") and response.status == 403:
         colored_print("GitHub API rate limit exceeded!", "red")
         colored_print("Please wait or use an authenticated token.", "yellow")
@@ -211,39 +133,17 @@ def handle_github_rate_limit(response) -> None:
 def format_number(num: int) -> str:
-    """Format large numbers with thousand separators.
-    Args:
-        num: Number to format
-    Returns:
-        Formatted string
-    """
+    # Format large numbers with thousand separators.
     return f"{num:,}"
 def format_percentage(value: float, decimals: int = 1) -> str:
-    """Format percentage value.
-    Args:
-        value: Percentage value (0-100)
-        decimals: Number of decimal places
-    Returns:
-        Formatted percentage string
-    """
+    # Format percentage value.
     return f"{value:.{decimals}f}%"
 def format_duration(seconds: float) -> str:
-    """Format duration in human-readable format.
-    Args:
-        seconds: Duration in seconds
-    Returns:
-        Formatted duration string (e.g., "2h 15m")
-    """
+    # Format duration in human-readable format.
     if seconds < 60:
         return f"{int(seconds)}s"
     elif seconds < 3600:
@@ -257,40 +157,20 @@ def format_duration(seconds: float) -> str:
 def truncate_text(text: str, max_length: int = 100) -> str:
-    """Truncate text to maximum length.
-    Args:
-        text: Input text
-        max_length: Maximum length
-    Returns:
-        Truncated text with ellipsis if needed
-    """
+    # Truncate text to maximum length.
     if len(text) <= max_length:
         return text
     return text[: max_length - 3] + "..."
 def create_checkpoint(checkpoint_file: Path, data: dict[str, Any]) -> None:
-    """Create checkpoint file for resuming operations.
-    Args:
-        checkpoint_file: Path to checkpoint file
-        data: Checkpoint data
-    """
+    # Create checkpoint file for resuming operations.
     save_json_file(data, checkpoint_file)
     colored_print(f"Checkpoint saved: {checkpoint_file}", "green")
 def load_checkpoint(checkpoint_file: Path) -> Optional[dict[str, Any]]:
-    """Load checkpoint data if exists.
-    Args:
-        checkpoint_file: Path to checkpoint file
-    Returns:
-        Checkpoint data or None if doesn't exist
-    """
+    # Load checkpoint data if exists.
     if checkpoint_file.exists():
         try:
             return load_json_file(checkpoint_file)
@@ -300,21 +180,13 @@ def load_checkpoint(checkpoint_file: Path) -> Optional[dict[str, Any]]:
 def print_banner(title: str) -> None:
-    """Print formatted banner.
-    Args:
-        title: Banner title
-    """
+    # Print formatted banner.
     colored_print("\n" + "=" * 60, "cyan")
-    colored_print(f"🔍 {title}", "cyan")
+    colored_print(f" {title}", "cyan")
     colored_print("=" * 60 + "\n", "cyan")
 def print_section(title: str) -> None:
-    """Print section header.
-    Args:
-        title: Section title
-    """
-    colored_print(f"\n📌 {title}", "blue")
+    # Print section header.
+    colored_print(f"\n {title}", "blue")
     colored_print("-" * 60, "blue")

greenmining 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

greenmining 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl