greenmining 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.0.5"
12
+ __version__ = "1.0.6"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -32,6 +32,49 @@ def fetch_repositories(
32
32
  )
33
33
 
34
34
 
35
+ def analyze_repositories(
36
+ urls: list,
37
+ max_commits: int = 500,
38
+ parallel_workers: int = 1,
39
+ output_format: str = "dict",
40
+ energy_tracking: bool = False,
41
+ energy_backend: str = "rapl",
42
+ method_level_analysis: bool = False,
43
+ include_source_code: bool = False,
44
+ ssh_key_path: str = None,
45
+ github_token: str = None,
46
+ ):
47
+ # Analyze multiple repositories from URLs.
48
+ # Args:
49
+ # urls: List of GitHub repository URLs
50
+ # max_commits: Maximum commits to analyze per repository
51
+ # parallel_workers: Number of parallel analysis workers (1=sequential)
52
+ # output_format: Output format (dict, json, csv)
53
+ # energy_tracking: Enable automatic energy measurement during analysis
54
+ # energy_backend: Energy backend (rapl, codecarbon, cpu_meter, auto)
55
+ # method_level_analysis: Include per-method metrics via Lizard
56
+ # include_source_code: Include source code before/after in results
57
+ # ssh_key_path: SSH key path for private repositories
58
+ # github_token: GitHub token for private HTTPS repositories
59
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
60
+
61
+ analyzer = LocalRepoAnalyzer(
62
+ max_commits=max_commits,
63
+ energy_tracking=energy_tracking,
64
+ energy_backend=energy_backend,
65
+ method_level_analysis=method_level_analysis,
66
+ include_source_code=include_source_code,
67
+ ssh_key_path=ssh_key_path,
68
+ github_token=github_token,
69
+ )
70
+
71
+ return analyzer.analyze_repositories(
72
+ urls=urls,
73
+ parallel_workers=parallel_workers,
74
+ output_format=output_format,
75
+ )
76
+
77
+
35
78
  __all__ = [
36
79
  "Config",
37
80
  "GSF_PATTERNS",
@@ -39,5 +82,6 @@ __all__ = [
39
82
  "is_green_aware",
40
83
  "get_pattern_by_keywords",
41
84
  "fetch_repositories",
85
+ "analyze_repositories",
42
86
  "__version__",
43
- ]
87
+ ]
@@ -4,10 +4,19 @@ from .code_diff_analyzer import CodeDiffAnalyzer
4
4
  from .statistical_analyzer import StatisticalAnalyzer
5
5
  from .temporal_analyzer import TemporalAnalyzer
6
6
  from .qualitative_analyzer import QualitativeAnalyzer
7
+ from .power_regression import PowerRegressionDetector, PowerRegression
8
+ from .metrics_power_correlator import MetricsPowerCorrelator, CorrelationResult
9
+ from .version_power_analyzer import VersionPowerAnalyzer, VersionPowerReport
7
10
 
8
11
  __all__ = [
9
12
  "CodeDiffAnalyzer",
10
13
  "StatisticalAnalyzer",
11
14
  "TemporalAnalyzer",
12
15
  "QualitativeAnalyzer",
16
+ "PowerRegressionDetector",
17
+ "PowerRegression",
18
+ "MetricsPowerCorrelator",
19
+ "CorrelationResult",
20
+ "VersionPowerAnalyzer",
21
+ "VersionPowerReport",
13
22
  ]
@@ -0,0 +1,165 @@
1
+ # Metrics-to-power correlation analysis.
2
+ # Build models correlating code metrics (complexity, nloc, churn) with power consumption.
3
+
4
+ from __future__ import annotations
5
+
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+
9
+ import numpy as np
10
+ from scipy import stats
11
+
12
+
13
+ @dataclass
14
+ class CorrelationResult:
15
+ # Result of a metrics-to-power correlation analysis.
16
+
17
+ metric_name: str
18
+ pearson_r: float = 0.0
19
+ pearson_p: float = 1.0
20
+ spearman_r: float = 0.0
21
+ spearman_p: float = 1.0
22
+ significant: bool = False
23
+ strength: str = "none"
24
+
25
+ def to_dict(self) -> Dict[str, Any]:
26
+ return {
27
+ "metric_name": self.metric_name,
28
+ "pearson_r": round(self.pearson_r, 4),
29
+ "pearson_p": round(self.pearson_p, 6),
30
+ "spearman_r": round(self.spearman_r, 4),
31
+ "spearman_p": round(self.spearman_p, 6),
32
+ "significant": self.significant,
33
+ "strength": self.strength,
34
+ }
35
+
36
+
37
+ class MetricsPowerCorrelator:
38
+ # Correlate code metrics with power consumption measurements.
39
+ # Computes Pearson and Spearman correlations between software metrics
40
+ # and measured energy/power values.
41
+
42
+ def __init__(self, significance_level: float = 0.05):
43
+ # Initialize correlator.
44
+ # Args:
45
+ # significance_level: P-value threshold for significance
46
+ self.significance_level = significance_level
47
+ self._metrics_data: Dict[str, List[float]] = {}
48
+ self._power_data: List[float] = []
49
+ self._fitted = False
50
+ self._results: Dict[str, CorrelationResult] = {}
51
+ self._feature_importance: Dict[str, float] = {}
52
+
53
+ def fit(
54
+ self,
55
+ metrics: List[str],
56
+ metrics_values: Dict[str, List[float]],
57
+ power_measurements: List[float],
58
+ ) -> None:
59
+ # Fit the correlator with metrics and power data.
60
+ # Args:
61
+ # metrics: List of metric names to correlate
62
+ # metrics_values: Dict mapping metric name -> list of values
63
+ # power_measurements: List of power measurement values
64
+ self._metrics_data = {m: metrics_values[m] for m in metrics if m in metrics_values}
65
+ self._power_data = power_measurements
66
+
67
+ n = len(power_measurements)
68
+ if n < 3:
69
+ raise ValueError("Need at least 3 data points for correlation analysis")
70
+
71
+ # Compute correlations
72
+ for metric_name, values in self._metrics_data.items():
73
+ if len(values) != n:
74
+ continue
75
+
76
+ result = self._compute_correlation(metric_name, values, power_measurements)
77
+ self._results[metric_name] = result
78
+
79
+ # Compute feature importance (normalized absolute Spearman)
80
+ max_abs = max((abs(r.spearman_r) for r in self._results.values()), default=1.0)
81
+ if max_abs > 0:
82
+ self._feature_importance = {
83
+ name: abs(r.spearman_r) / max_abs for name, r in self._results.items()
84
+ }
85
+
86
+ self._fitted = True
87
+
88
+ def _compute_correlation(
89
+ self, metric_name: str, metric_values: List[float], power_values: List[float]
90
+ ) -> CorrelationResult:
91
+ # Compute Pearson and Spearman correlations for a single metric.
92
+ x = np.array(metric_values, dtype=float)
93
+ y = np.array(power_values, dtype=float)
94
+
95
+ # Handle constant arrays
96
+ if np.std(x) == 0 or np.std(y) == 0:
97
+ return CorrelationResult(metric_name=metric_name)
98
+
99
+ # Pearson correlation (linear)
100
+ pearson_r, pearson_p = stats.pearsonr(x, y)
101
+
102
+ # Spearman correlation (monotonic)
103
+ spearman_r, spearman_p = stats.spearmanr(x, y)
104
+
105
+ # Significance
106
+ significant = pearson_p < self.significance_level or spearman_p < self.significance_level
107
+
108
+ # Strength classification
109
+ abs_r = max(abs(pearson_r), abs(spearman_r))
110
+ if abs_r >= 0.7:
111
+ strength = "strong"
112
+ elif abs_r >= 0.4:
113
+ strength = "moderate"
114
+ elif abs_r >= 0.2:
115
+ strength = "weak"
116
+ else:
117
+ strength = "negligible"
118
+
119
+ return CorrelationResult(
120
+ metric_name=metric_name,
121
+ pearson_r=float(pearson_r),
122
+ pearson_p=float(pearson_p),
123
+ spearman_r=float(spearman_r),
124
+ spearman_p=float(spearman_p),
125
+ significant=significant,
126
+ strength=strength,
127
+ )
128
+
129
+ @property
130
+ def pearson(self) -> Dict[str, float]:
131
+ # Get Pearson correlations for all metrics.
132
+ return {name: r.pearson_r for name, r in self._results.items()}
133
+
134
+ @property
135
+ def spearman(self) -> Dict[str, float]:
136
+ # Get Spearman correlations for all metrics.
137
+ return {name: r.spearman_r for name, r in self._results.items()}
138
+
139
+ @property
140
+ def feature_importance(self) -> Dict[str, float]:
141
+ # Get normalized feature importance scores.
142
+ return self._feature_importance
143
+
144
+ def get_results(self) -> Dict[str, CorrelationResult]:
145
+ # Get all correlation results.
146
+ return self._results
147
+
148
+ def get_significant_correlations(self) -> Dict[str, CorrelationResult]:
149
+ # Get only statistically significant correlations.
150
+ return {name: r for name, r in self._results.items() if r.significant}
151
+
152
+ def summary(self) -> Dict[str, Any]:
153
+ # Generate summary of correlation analysis.
154
+ return {
155
+ "total_metrics": len(self._results),
156
+ "significant_count": sum(1 for r in self._results.values() if r.significant),
157
+ "correlations": {name: r.to_dict() for name, r in self._results.items()},
158
+ "feature_importance": self._feature_importance,
159
+ "strongest_positive": max(
160
+ self._results.values(), key=lambda r: r.spearman_r, default=None
161
+ ),
162
+ "strongest_negative": min(
163
+ self._results.values(), key=lambda r: r.spearman_r, default=None
164
+ ),
165
+ }
@@ -0,0 +1,212 @@
1
+ # Power regression detection for identifying commits that increased power consumption.
2
+ # Compares energy measurements between baseline and target commits.
3
+
4
+ from __future__ import annotations
5
+
6
+ import subprocess
7
+ import time
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from pydriller import Repository
12
+
13
+ from greenmining.utils import colored_print
14
+
15
+
16
+ @dataclass
17
+ class PowerRegression:
18
+ # A detected power regression from a commit.
19
+
20
+ sha: str
21
+ message: str
22
+ author: str
23
+ date: str
24
+ power_before: float # watts
25
+ power_after: float # watts
26
+ power_increase: float # percentage
27
+ energy_before: float # joules
28
+ energy_after: float # joules
29
+ is_regression: bool = True
30
+
31
+ def to_dict(self) -> Dict[str, Any]:
32
+ return {
33
+ "sha": self.sha,
34
+ "message": self.message,
35
+ "author": self.author,
36
+ "date": self.date,
37
+ "power_before": round(self.power_before, 4),
38
+ "power_after": round(self.power_after, 4),
39
+ "power_increase": round(self.power_increase, 2),
40
+ "energy_before": round(self.energy_before, 4),
41
+ "energy_after": round(self.energy_after, 4),
42
+ "is_regression": self.is_regression,
43
+ }
44
+
45
+
46
+ class PowerRegressionDetector:
47
+ # Detect commits that caused power consumption regressions.
48
+ # Runs a test command at each commit and measures energy usage.
49
+
50
+ def __init__(
51
+ self,
52
+ test_command: str = "pytest tests/ -x",
53
+ energy_backend: str = "rapl",
54
+ threshold_percent: float = 5.0,
55
+ iterations: int = 5,
56
+ warmup_iterations: int = 1,
57
+ ):
58
+ # Initialize power regression detector.
59
+ # Args:
60
+ # test_command: Shell command to run for energy measurement
61
+ # energy_backend: Energy measurement backend (rapl, codecarbon, cpu_meter)
62
+ # threshold_percent: Minimum percentage increase to flag as regression
63
+ # iterations: Number of measurement iterations per commit (for accuracy)
64
+ # warmup_iterations: Number of warmup runs before measurement
65
+ self.test_command = test_command
66
+ self.energy_backend = energy_backend
67
+ self.threshold_percent = threshold_percent
68
+ self.iterations = iterations
69
+ self.warmup_iterations = warmup_iterations
70
+ self._meter = None
71
+
72
+ def _get_energy_meter(self):
73
+ # Get energy meter instance.
74
+ if self._meter is None:
75
+ from greenmining.energy.base import get_energy_meter
76
+
77
+ self._meter = get_energy_meter(self.energy_backend)
78
+ return self._meter
79
+
80
+ def _run_test_command(self, cwd: str) -> float:
81
+ # Run test command and return energy consumed in joules.
82
+ meter = self._get_energy_meter()
83
+
84
+ # Warmup
85
+ for _ in range(self.warmup_iterations):
86
+ subprocess.run(
87
+ self.test_command,
88
+ shell=True,
89
+ cwd=cwd,
90
+ capture_output=True,
91
+ text=True,
92
+ timeout=300,
93
+ )
94
+
95
+ # Measure
96
+ total_joules = 0.0
97
+ for _ in range(self.iterations):
98
+ meter.start()
99
+ subprocess.run(
100
+ self.test_command,
101
+ shell=True,
102
+ cwd=cwd,
103
+ capture_output=True,
104
+ text=True,
105
+ timeout=300,
106
+ )
107
+ metrics = meter.stop()
108
+ total_joules += metrics.joules
109
+
110
+ return total_joules / self.iterations
111
+
112
+ def detect(
113
+ self,
114
+ repo_path: str,
115
+ baseline_commit: str = "HEAD~10",
116
+ target_commit: str = "HEAD",
117
+ max_commits: int = 50,
118
+ ) -> List[PowerRegression]:
119
+ # Detect power regressions between baseline and target commits.
120
+ # Args:
121
+ # repo_path: Path to local git repository
122
+ # baseline_commit: Baseline commit SHA or reference
123
+ # target_commit: Target commit SHA or reference
124
+ # max_commits: Maximum commits to analyze
125
+ regressions = []
126
+
127
+ colored_print(f"Detecting power regressions in {repo_path}", "cyan")
128
+ colored_print(f" Range: {baseline_commit}..{target_commit}", "cyan")
129
+ colored_print(f" Test: {self.test_command}", "cyan")
130
+ colored_print(f" Threshold: {self.threshold_percent}%", "cyan")
131
+
132
+ # Get commits in range
133
+ commits = list(
134
+ Repository(
135
+ path_to_repo=repo_path,
136
+ from_commit=baseline_commit,
137
+ to_commit=target_commit,
138
+ ).traverse_commits()
139
+ )
140
+
141
+ if not commits:
142
+ colored_print("No commits found in range", "yellow")
143
+ return regressions
144
+
145
+ # Measure baseline
146
+ colored_print(f" Measuring baseline ({commits[0].hash[:8]})...", "cyan")
147
+ self._checkout(repo_path, commits[0].hash)
148
+ baseline_energy = self._run_test_command(repo_path)
149
+ colored_print(f" Baseline: {baseline_energy:.4f} joules", "green")
150
+
151
+ previous_energy = baseline_energy
152
+ commit_count = 0
153
+
154
+ for commit in commits[1:]:
155
+ if commit_count >= max_commits:
156
+ break
157
+
158
+ try:
159
+ self._checkout(repo_path, commit.hash)
160
+ current_energy = self._run_test_command(repo_path)
161
+
162
+ # Calculate change
163
+ if previous_energy > 0:
164
+ change_percent = ((current_energy - previous_energy) / previous_energy) * 100
165
+ else:
166
+ change_percent = 0.0
167
+
168
+ # Check for regression
169
+ if change_percent > self.threshold_percent:
170
+ regression = PowerRegression(
171
+ sha=commit.hash,
172
+ message=commit.msg[:200],
173
+ author=commit.author.name,
174
+ date=commit.author_date.isoformat() if commit.author_date else "",
175
+ power_before=previous_energy / max(1, self.iterations),
176
+ power_after=current_energy / max(1, self.iterations),
177
+ power_increase=change_percent,
178
+ energy_before=previous_energy,
179
+ energy_after=current_energy,
180
+ )
181
+ regressions.append(regression)
182
+ colored_print(f" REGRESSION: {commit.hash[:8]} +{change_percent:.1f}%", "red")
183
+ else:
184
+ colored_print(f" OK: {commit.hash[:8]} {change_percent:+.1f}%", "green")
185
+
186
+ previous_energy = current_energy
187
+ commit_count += 1
188
+
189
+ except Exception as e:
190
+ colored_print(f" Warning: Failed on {commit.hash[:8]}: {e}", "yellow")
191
+ continue
192
+
193
+ # Restore to target
194
+ self._checkout(repo_path, target_commit)
195
+
196
+ colored_print(
197
+ f"\nFound {len(regressions)} power regressions "
198
+ f"(>{self.threshold_percent}% increase)",
199
+ "cyan" if not regressions else "red",
200
+ )
201
+
202
+ return regressions
203
+
204
+ @staticmethod
205
+ def _checkout(repo_path: str, ref: str):
206
+ # Checkout a specific commit.
207
+ subprocess.run(
208
+ ["git", "checkout", ref, "--quiet"],
209
+ cwd=repo_path,
210
+ capture_output=True,
211
+ text=True,
212
+ )