PyPI - greenmining - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

greenmining 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

greenmining/__init__.py +54 -2
greenmining/analyzers/__init__.py +9 -0
greenmining/analyzers/metrics_power_correlator.py +165 -0
greenmining/analyzers/power_regression.py +212 -0
greenmining/analyzers/version_power_analyzer.py +246 -0
greenmining/config.py +46 -34
greenmining/dashboard/__init__.py +5 -0
greenmining/dashboard/app.py +200 -0
greenmining/energy/__init__.py +8 -1
greenmining/energy/base.py +45 -35
greenmining/energy/carbon_reporter.py +242 -0
greenmining/energy/codecarbon_meter.py +25 -24
greenmining/energy/cpu_meter.py +144 -0
greenmining/energy/rapl.py +30 -36
greenmining/services/__init__.py +13 -3
greenmining/services/commit_extractor.py +9 -5
greenmining/services/local_repo_analyzer.py +325 -63
greenmining/services/reports.py +5 -8
{greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/METADATA +212 -43
{greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/RECORD +23 -16
{greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/WHEEL +0 -0
{greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/licenses/LICENSE +0 -0
{greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/top_level.txt +0 -0

greenmining/services/local_repo_analyzer.py CHANGED Viewed

@@ -5,7 +5,9 @@ from __future__ import annotations
 import os
 import re
 import shutil
+import subprocess
 import tempfile
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -25,10 +27,62 @@ from greenmining.gsf_patterns import get_pattern_by_keywords, is_green_aware, GS
 from greenmining.utils import colored_print
+@dataclass
+class MethodMetrics:
+    # Per-method analysis metrics from Lizard integration.
+    name: str
+    long_name: str
+    filename: str
+    nloc: int = 0
+    complexity: int = 0
+    token_count: int = 0
+    parameters: int = 0
+    start_line: int = 0
+    end_line: int = 0
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "name": self.name,
+            "long_name": self.long_name,
+            "filename": self.filename,
+            "nloc": self.nloc,
+            "complexity": self.complexity,
+            "token_count": self.token_count,
+            "parameters": self.parameters,
+            "start_line": self.start_line,
+            "end_line": self.end_line,
+        }
+@dataclass
+class SourceCodeChange:
+    # Source code before/after a commit for refactoring detection.
+    filename: str
+    source_code_before: Optional[str] = None
+    source_code_after: Optional[str] = None
+    diff: Optional[str] = None
+    added_lines: int = 0
+    deleted_lines: int = 0
+    change_type: str = ""  # ADD, DELETE, MODIFY, RENAME
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "filename": self.filename,
+            "source_code_before": self.source_code_before,
+            "source_code_after": self.source_code_after,
+            "diff": self.diff,
+            "added_lines": self.added_lines,
+            "deleted_lines": self.deleted_lines,
+            "change_type": self.change_type,
+        }
 @dataclass
 class CommitAnalysis:
     # Analysis result for a single commit.
     hash: str
     message: str
     author: str
@@ -42,21 +96,31 @@ class CommitAnalysis:
     files_modified: List[str]
     insertions: int
     deletions: int
     # PyDriller DMM metrics
     dmm_unit_size: Optional[float] = None
     dmm_unit_complexity: Optional[float] = None
     dmm_unit_interfacing: Optional[float] = None
     # Structural metrics (Lizard)
     total_nloc: int = 0
     total_complexity: int = 0
     max_complexity: int = 0
     methods_count: int = 0
+    # Method-level analysis (Phase 3.2)
+    methods: List[MethodMetrics] = field(default_factory=list)
+    # Source code access (Phase 3.3)
+    source_changes: List[SourceCodeChange] = field(default_factory=list)
+    # Energy metrics (Phase 2.2 - populated when energy_tracking=True)
+    energy_joules: Optional[float] = None
+    energy_watts_avg: Optional[float] = None
     def to_dict(self) -> Dict[str, Any]:
         # Convert to dictionary.
-        return {
+        result = {
             "commit_hash": self.hash,
             "message": self.message,
             "author": self.author,
@@ -79,11 +143,23 @@ class CommitAnalysis:
             "methods_count": self.methods_count,
         }
+        if self.methods:
+            result["methods"] = [m.to_dict() for m in self.methods]
+        if self.source_changes:
+            result["source_changes"] = [s.to_dict() for s in self.source_changes]
+        if self.energy_joules is not None:
+            result["energy_joules"] = self.energy_joules
+            result["energy_watts_avg"] = self.energy_watts_avg
+        return result
 @dataclass
 class RepositoryAnalysis:
     # Complete analysis result for a repository.
     url: str
     name: str
     total_commits: int
@@ -91,10 +167,11 @@ class RepositoryAnalysis:
     green_commit_rate: float
     commits: List[CommitAnalysis] = field(default_factory=list)
     process_metrics: Dict[str, Any] = field(default_factory=dict)
+    energy_metrics: Optional[Dict[str, Any]] = None
     def to_dict(self) -> Dict[str, Any]:
         # Convert to dictionary.
-        return {
+        result = {
             "url": self.url,
             "name": self.name,
             "total_commits": self.total_commits,
@@ -103,11 +180,15 @@ class RepositoryAnalysis:
             "commits": [c.to_dict() for c in self.commits],
             "process_metrics": self.process_metrics,
         }
+        if self.energy_metrics:
+            result["energy_metrics"] = self.energy_metrics
+        return result
 class LocalRepoAnalyzer:
     # Analyze repositories directly from GitHub URLs using PyDriller.
+    # Supports HTTPS URLs, SSH URLs, and private repositories.
     def __init__(
         self,
         clone_path: Optional[Path] = None,
@@ -116,8 +197,29 @@ class LocalRepoAnalyzer:
         skip_merges: bool = True,
         compute_process_metrics: bool = True,
         cleanup_after: bool = True,
+        ssh_key_path: Optional[str] = None,
+        github_token: Optional[str] = None,
+        energy_tracking: bool = False,
+        energy_backend: str = "rapl",
+        method_level_analysis: bool = False,
+        include_source_code: bool = False,
+        process_metrics: str = "standard",
     ):
         # Initialize the local repository analyzer.
+        # Args:
+        #   clone_path: Directory to clone repos into
+        #   max_commits: Maximum commits to analyze per repo
+        #   days_back: How far back to analyze
+        #   skip_merges: Skip merge commits
+        #   compute_process_metrics: Compute PyDriller process metrics
+        #   cleanup_after: Remove cloned repos after analysis
+        #   ssh_key_path: Path to SSH private key for private repos
+        #   github_token: GitHub token for private HTTPS repos
+        #   energy_tracking: Enable automatic energy measurement
+        #   energy_backend: Energy measurement backend (rapl, codecarbon)
+        #   method_level_analysis: Extract per-method metrics via Lizard
+        #   include_source_code: Include source code before/after in results
+        #   process_metrics: "standard" or "full" PyDriller process metrics
         self.clone_path = clone_path or Path(tempfile.gettempdir()) / "greenmining_repos"
         self.clone_path.mkdir(parents=True, exist_ok=True)
         self.max_commits = max_commits
@@ -126,7 +228,53 @@ class LocalRepoAnalyzer:
         self.compute_process_metrics = compute_process_metrics
         self.cleanup_after = cleanup_after
         self.gsf_patterns = GSF_PATTERNS
+        # Phase 1.3: Private repository support
+        self.ssh_key_path = ssh_key_path
+        self.github_token = github_token
+        # Phase 2.2: Integrated energy tracking
+        self.energy_tracking = energy_tracking
+        self.energy_backend = energy_backend
+        self._energy_meter = None
+        if energy_tracking:
+            self._init_energy_meter()
+        # Phase 3.2: Method-level analysis
+        self.method_level_analysis = method_level_analysis
+        # Phase 3.3: Source code access
+        self.include_source_code = include_source_code
+        # Phase 3.1: Full process metrics mode
+        self.process_metrics_mode = process_metrics
+    def _init_energy_meter(self):
+        # Initialize the energy measurement backend.
+        try:
+            from greenmining.energy.base import get_energy_meter
+            self._energy_meter = get_energy_meter(self.energy_backend)
+        except Exception as e:
+            colored_print(f"   Warning: Energy tracking unavailable: {e}", "yellow")
+            self.energy_tracking = False
+    def _prepare_auth_url(self, url: str) -> str:
+        # Prepare authenticated URL for private repositories.
+        if self.github_token and url.startswith("https://"):
+            # Inject token into HTTPS URL for private repo access
+            return url.replace("https://", f"https://x-access-token:{self.github_token}@")
+        return url
+    def _setup_ssh_env(self) -> Dict[str, str]:
+        # Set up SSH environment for private repository cloning.
+        env = os.environ.copy()
+        if self.ssh_key_path:
+            ssh_key = os.path.expanduser(self.ssh_key_path)
+            if os.path.exists(ssh_key):
+                env["GIT_SSH_COMMAND"] = f"ssh -i {ssh_key} -o StrictHostKeyChecking=no"
+        return env
     def _parse_repo_url(self, url: str) -> tuple[str, str]:
         # Parse repository URL to extract owner and name.
         # Handle HTTPS URLs
@@ -134,66 +282,111 @@ class LocalRepoAnalyzer:
         match = re.search(https_pattern, url)
         if match:
             return match.group(1), match.group(2).replace(".git", "")
         # Handle SSH URLs
         ssh_pattern = r"git@github\.com:([^/]+)/([^/\.]+)"
         match = re.search(ssh_pattern, url)
         if match:
             return match.group(1), match.group(2).replace(".git", "")
         raise ValueError(f"Could not parse GitHub URL: {url}")
     def _get_pattern_details(self, matched_patterns: List[str]) -> List[Dict[str, Any]]:
         # Get detailed pattern information.
         details = []
         for pattern_id, pattern in self.gsf_patterns.items():
             if pattern["name"] in matched_patterns:
-                details.append({
-                    "name": pattern["name"],
-                    "category": pattern["category"],
-                    "description": pattern["description"],
-                    "sci_impact": pattern["sci_impact"],
-                })
+                details.append(
+                    {
+                        "name": pattern["name"],
+                        "category": pattern["category"],
+                        "description": pattern["description"],
+                        "sci_impact": pattern["sci_impact"],
+                    }
+                )
         return details
+    def _extract_method_metrics(self, commit) -> List[MethodMetrics]:
+        # Extract per-method metrics from modified files using Lizard (via PyDriller).
+        methods = []
+        try:
+            for mod in commit.modified_files:
+                if mod.methods:
+                    for method in mod.methods:
+                        methods.append(
+                            MethodMetrics(
+                                name=method.name,
+                                long_name=method.long_name,
+                                filename=mod.filename,
+                                nloc=method.nloc,
+                                complexity=method.complexity,
+                                token_count=method.token_count,
+                                parameters=len(method.parameters),
+                                start_line=method.start_line,
+                                end_line=method.end_line,
+                            )
+                        )
+        except Exception:
+            pass
+        return methods
+    def _extract_source_changes(self, commit) -> List[SourceCodeChange]:
+        # Extract source code before/after for each modified file.
+        changes = []
+        try:
+            for mod in commit.modified_files:
+                change = SourceCodeChange(
+                    filename=mod.filename,
+                    source_code_before=mod.source_code_before if mod.source_code_before else None,
+                    source_code_after=mod.source_code if mod.source_code else None,
+                    diff=mod.diff if mod.diff else None,
+                    added_lines=mod.added_lines,
+                    deleted_lines=mod.deleted_lines,
+                    change_type=mod.change_type.name if mod.change_type else "",
+                )
+                changes.append(change)
+        except Exception:
+            pass
+        return changes
     def analyze_commit(self, commit) -> CommitAnalysis:
         # Analyze a single PyDriller commit object.
         message = commit.msg or ""
         # Green awareness check
         green_aware = is_green_aware(message)
         # GSF pattern matching
         matched_patterns = get_pattern_by_keywords(message)
         pattern_details = self._get_pattern_details(matched_patterns)
         # Confidence calculation
         pattern_count = len(matched_patterns)
         confidence = "high" if pattern_count >= 2 else "medium" if pattern_count == 1 else "low"
         # File modifications
         files_modified = [mod.filename for mod in commit.modified_files]
         insertions = sum(mod.added_lines for mod in commit.modified_files)
         deletions = sum(mod.deleted_lines for mod in commit.modified_files)
         # Delta Maintainability Model (if available)
         dmm_unit_size = None
         dmm_unit_complexity = None
         dmm_unit_interfacing = None
         try:
             dmm_unit_size = commit.dmm_unit_size
             dmm_unit_complexity = commit.dmm_unit_complexity
             dmm_unit_interfacing = commit.dmm_unit_interfacing
         except Exception:
             pass  # DMM may not be available for all commits
         # Structural metrics from Lizard (via PyDriller)
         total_nloc = 0
         total_complexity = 0
         max_complexity = 0
         methods_count = 0
         try:
             for mod in commit.modified_files:
                 if mod.nloc:
@@ -206,7 +399,17 @@ class LocalRepoAnalyzer:
                     methods_count += len(mod.methods)
         except Exception:
             pass  # Structural metrics may fail for some files
+        # Phase 3.2: Method-level analysis
+        methods = []
+        if self.method_level_analysis:
+            methods = self._extract_method_metrics(commit)
+        # Phase 3.3: Source code access
+        source_changes = []
+        if self.include_source_code:
+            source_changes = self._extract_source_changes(commit)
         return CommitAnalysis(
             hash=commit.hash,
             message=message,
@@ -228,66 +431,93 @@ class LocalRepoAnalyzer:
             total_complexity=total_complexity,
             max_complexity=max_complexity,
             methods_count=methods_count,
+            methods=methods,
+            source_changes=source_changes,
         )
     def analyze_repository(self, url: str) -> RepositoryAnalysis:
         # Analyze a repository from its URL.
         owner, repo_name = self._parse_repo_url(url)
         full_name = f"{owner}/{repo_name}"
         colored_print(f"\n Analyzing repository: {full_name}", "cyan")
+        # Phase 1.3: Prepare authenticated URL for private repos
+        auth_url = self._prepare_auth_url(url)
         # Calculate date range
         since_date = datetime.now() - timedelta(days=self.days_back)
         # Configure PyDriller Repository
         repo_config = {
-            "path_to_repo": url,
+            "path_to_repo": auth_url,
             "since": since_date,
             "only_no_merge": self.skip_merges,
         }
         # Clone to specific path if needed
         local_path = self.clone_path / repo_name
         if local_path.exists():
             shutil.rmtree(local_path)
         repo_config["clone_repo_to"] = str(self.clone_path)
         colored_print(f"   Cloning to: {local_path}", "cyan")
+        # Phase 2.2: Start energy measurement if enabled
+        energy_result = None
+        if self.energy_tracking and self._energy_meter:
+            try:
+                self._energy_meter.start()
+            except Exception as e:
+                colored_print(f"   Warning: Energy measurement start failed: {e}", "yellow")
         commits_analyzed = []
         commit_count = 0
         try:
             for commit in Repository(**repo_config).traverse_commits():
                 if commit_count >= self.max_commits:
                     break
                 try:
                     analysis = self.analyze_commit(commit)
                     commits_analyzed.append(analysis)
                     commit_count += 1
                     if commit_count % 50 == 0:
                         colored_print(f"   Processed {commit_count} commits...", "cyan")
                 except Exception as e:
-                    colored_print(f"   Warning: Error analyzing commit {commit.hash[:8]}: {e}", "yellow")
+                    colored_print(
+                        f"   Warning: Error analyzing commit {commit.hash[:8]}: {e}", "yellow"
+                    )
                     continue
             colored_print(f"    Analyzed {len(commits_analyzed)} commits", "green")
+            # Phase 2.2: Stop energy measurement
+            if self.energy_tracking and self._energy_meter:
+                try:
+                    energy_result = self._energy_meter.stop()
+                except Exception as e:
+                    colored_print(f"   Warning: Energy measurement stop failed: {e}", "yellow")
             # Compute process metrics if enabled
             process_metrics = {}
             if self.compute_process_metrics and local_path.exists():
                 colored_print("   Computing process metrics...", "cyan")
                 process_metrics = self._compute_process_metrics(str(local_path))
             # Calculate summary
             green_commits = sum(1 for c in commits_analyzed if c.green_aware)
             green_rate = green_commits / len(commits_analyzed) if commits_analyzed else 0
+            # Build energy metrics dict
+            energy_dict = None
+            if energy_result:
+                energy_dict = energy_result.to_dict()
             result = RepositoryAnalysis(
                 url=url,
                 name=full_name,
@@ -296,22 +526,23 @@ class LocalRepoAnalyzer:
                 green_commit_rate=green_rate,
                 commits=commits_analyzed,
                 process_metrics=process_metrics,
+                energy_metrics=energy_dict,
             )
             return result
         finally:
             # Cleanup if requested
             if self.cleanup_after and local_path.exists():
                 colored_print(f"   Cleaning up: {local_path}", "cyan")
                 shutil.rmtree(local_path, ignore_errors=True)
     def _compute_process_metrics(self, repo_path: str) -> Dict[str, Any]:
         # Compute PyDriller process metrics for the repository.
         metrics = {}
         since_date = datetime.now() - timedelta(days=self.days_back)
         to_date = datetime.now()
         try:
             # ChangeSet metrics
             cs = ChangeSet(repo_path, since=since_date, to=to_date)
@@ -319,62 +550,76 @@ class LocalRepoAnalyzer:
             metrics["change_set_avg"] = cs.avg()
         except Exception as e:
             colored_print(f"   Warning: ChangeSet metrics failed: {e}", "yellow")
         try:
             # CodeChurn metrics
             churn = CodeChurn(repo_path, since=since_date, to=to_date)
             metrics["code_churn"] = churn.count()
         except Exception as e:
             colored_print(f"   Warning: CodeChurn metrics failed: {e}", "yellow")
         try:
             # CommitsCount metrics
             cc = CommitsCount(repo_path, since=since_date, to=to_date)
             metrics["commits_per_file"] = cc.count()
         except Exception as e:
             colored_print(f"   Warning: CommitsCount metrics failed: {e}", "yellow")
         try:
             # ContributorsCount metrics
             contrib = ContributorsCount(repo_path, since=since_date, to=to_date)
             metrics["contributors_per_file"] = contrib.count()
         except Exception as e:
             colored_print(f"   Warning: ContributorsCount metrics failed: {e}", "yellow")
         try:
             # ContributorsExperience metrics
             exp = ContributorsExperience(repo_path, since=since_date, to=to_date)
             metrics["contributors_experience"] = exp.count()
         except Exception as e:
             colored_print(f"   Warning: ContributorsExperience metrics failed: {e}", "yellow")
         try:
             # HistoryComplexity metrics
             hc = HistoryComplexity(repo_path, since=since_date, to=to_date)
             metrics["history_complexity"] = hc.count()
         except Exception as e:
             colored_print(f"   Warning: HistoryComplexity metrics failed: {e}", "yellow")
         try:
             # HunksCount metrics
             hunks = HunksCount(repo_path, since=since_date, to=to_date)
             metrics["hunks_count"] = hunks.count()
         except Exception as e:
             colored_print(f"   Warning: HunksCount metrics failed: {e}", "yellow")
         try:
             # LinesCount metrics
             lines = LinesCount(repo_path, since=since_date, to=to_date)
             metrics["lines_count"] = lines.count()
         except Exception as e:
             colored_print(f"   Warning: LinesCount metrics failed: {e}", "yellow")
         return metrics
-    def analyze_repositories(self, urls: List[str]) -> List[RepositoryAnalysis]:
+    def analyze_repositories(
+        self,
+        urls: List[str],
+        parallel_workers: int = 1,
+        output_format: str = "dict",
+    ) -> List[RepositoryAnalysis]:
         # Analyze multiple repositories from URLs.
+        # Args:
+        #   urls: List of repository URLs to analyze
+        #   parallel_workers: Number of concurrent workers (1 = sequential)
+        #   output_format: Output format (dict, json, csv)
+        if parallel_workers <= 1:
+            return self._analyze_sequential(urls)
+        return self._analyze_parallel(urls, parallel_workers)
+    def _analyze_sequential(self, urls: List[str]) -> List[RepositoryAnalysis]:
+        # Analyze repositories sequentially.
         results = []
         for i, url in enumerate(urls, 1):
             colored_print(f"\n[{i}/{len(urls)}] Processing repository...", "cyan")
             try:
@@ -383,5 +628,22 @@ class LocalRepoAnalyzer:
             except Exception as e:
                 colored_print(f"   Error analyzing {url}: {e}", "red")
                 continue
+        return results
+    def _analyze_parallel(self, urls: List[str], max_workers: int) -> List[RepositoryAnalysis]:
+        # Analyze repositories in parallel using thread pool.
+        results = []
+        colored_print(f"\n Analyzing {len(urls)} repositories with {max_workers} workers", "cyan")
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            future_to_url = {executor.submit(self.analyze_repository, url): url for url in urls}
+            for future in as_completed(future_to_url):
+                url = future_to_url[future]
+                try:
+                    result = future.result()
+                    results.append(result)
+                    colored_print(f"   Completed: {result.name}", "green")
+                except Exception as e:
+                    colored_print(f"   Error analyzing {url}: {e}", "red")
         return results

greenmining/services/reports.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # Report generation for green mining analysis.
+"""Report generation module for GreenMining analysis results."""
 from __future__ import annotations
@@ -228,12 +229,10 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
         # Pattern descriptions
         pattern_details = []
         for i, pattern in enumerate(patterns[:10], 1):
-            pattern_details.append(
-                f"""**{i}. {pattern['pattern_name']}**
+            pattern_details.append(f"""**{i}. {pattern['pattern_name']}**
 - Frequency: {format_number(pattern['count'])} commits ({format_percentage(pattern['percentage'])})
 - Confidence Distribution: HIGH={conf['HIGH']}, MEDIUM={conf['MEDIUM']}, LOW={conf['LOW']}
-- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}"""
-            )
+- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
         return f"""#### 2.2 Known Green Patterns & Tactics Applied
@@ -258,12 +257,10 @@ No novel microservice-specific green practices were automatically detected. Manu
         pattern_list = []
         for pattern in emergent:
-            pattern_list.append(
-                f"""**Pattern:** {pattern['pattern_name']}
+            pattern_list.append(f"""**Pattern:** {pattern['pattern_name']}
 - Occurrences: {pattern['count']}
 - Description: {pattern['description']}
-- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}"""
-            )
+- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
         return f"""#### 2.3 Emerging Practices Discovered

greenmining 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

greenmining 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl