greenmining 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +46 -2
- greenmining/__version__.py +1 -1
- greenmining/analyzers/__init__.py +9 -0
- greenmining/analyzers/metrics_power_correlator.py +165 -0
- greenmining/analyzers/power_regression.py +212 -0
- greenmining/analyzers/version_power_analyzer.py +246 -0
- greenmining/config.py +46 -34
- greenmining/dashboard/__init__.py +5 -0
- greenmining/dashboard/app.py +200 -0
- greenmining/energy/__init__.py +8 -1
- greenmining/energy/base.py +45 -35
- greenmining/energy/carbon_reporter.py +242 -0
- greenmining/energy/codecarbon_meter.py +25 -24
- greenmining/energy/cpu_meter.py +144 -0
- greenmining/energy/rapl.py +30 -36
- greenmining/services/__init__.py +13 -3
- greenmining/services/commit_extractor.py +9 -5
- greenmining/services/github_fetcher.py +16 -18
- greenmining/services/github_graphql_fetcher.py +45 -55
- greenmining/services/local_repo_analyzer.py +325 -63
- greenmining/services/reports.py +5 -8
- {greenmining-1.0.4.dist-info → greenmining-1.0.6.dist-info}/METADATA +65 -54
- greenmining-1.0.6.dist-info/RECORD +44 -0
- greenmining-1.0.4.dist-info/RECORD +0 -37
- {greenmining-1.0.4.dist-info → greenmining-1.0.6.dist-info}/WHEEL +0 -0
- {greenmining-1.0.4.dist-info → greenmining-1.0.6.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.4.dist-info → greenmining-1.0.6.dist-info}/top_level.txt +0 -0
greenmining/energy/rapl.py
CHANGED
|
@@ -13,9 +13,9 @@ from .base import EnergyMeter, EnergyMetrics, EnergyBackend
|
|
|
13
13
|
|
|
14
14
|
class RAPLEnergyMeter(EnergyMeter):
|
|
15
15
|
# Energy measurement using Intel RAPL on Linux.
|
|
16
|
-
|
|
16
|
+
|
|
17
17
|
RAPL_PATH = Path("/sys/class/powercap/intel-rapl")
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
def __init__(self):
|
|
20
20
|
# Initialize RAPL energy meter.
|
|
21
21
|
super().__init__(EnergyBackend.RAPL)
|
|
@@ -24,12 +24,12 @@ class RAPLEnergyMeter(EnergyMeter):
|
|
|
24
24
|
self._start_time: Optional[float] = None
|
|
25
25
|
self._power_samples: List[float] = []
|
|
26
26
|
self._discover_domains()
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
def _discover_domains(self) -> None:
|
|
29
29
|
# Discover available RAPL domains.
|
|
30
30
|
if not self.RAPL_PATH.exists():
|
|
31
31
|
return
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
# Find all RAPL domains (intel-rapl:0, intel-rapl:0:0, etc.)
|
|
34
34
|
for domain_path in self.RAPL_PATH.glob("intel-rapl:*"):
|
|
35
35
|
if (domain_path / "energy_uj").exists():
|
|
@@ -39,9 +39,9 @@ class RAPLEnergyMeter(EnergyMeter):
|
|
|
39
39
|
domain_name = name_file.read_text().strip()
|
|
40
40
|
else:
|
|
41
41
|
domain_name = domain_path.name
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
self._domains[domain_name] = domain_path / "energy_uj"
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
# Check for sub-domains (core, uncore, dram, etc.)
|
|
46
46
|
for subdomain_path in domain_path.glob("intel-rapl:*:*"):
|
|
47
47
|
if (subdomain_path / "energy_uj").exists():
|
|
@@ -50,24 +50,24 @@ class RAPLEnergyMeter(EnergyMeter):
|
|
|
50
50
|
subdomain_name = name_file.read_text().strip()
|
|
51
51
|
else:
|
|
52
52
|
subdomain_name = subdomain_path.name
|
|
53
|
-
|
|
53
|
+
|
|
54
54
|
self._domains[subdomain_name] = subdomain_path / "energy_uj"
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
def _read_energy(self, path: Path) -> int:
|
|
57
57
|
# Read energy value in microjoules from a RAPL file.
|
|
58
58
|
try:
|
|
59
59
|
return int(path.read_text().strip())
|
|
60
60
|
except (PermissionError, FileNotFoundError, ValueError):
|
|
61
61
|
return 0
|
|
62
|
-
|
|
62
|
+
|
|
63
63
|
def is_available(self) -> bool:
|
|
64
64
|
# Check if RAPL is available on this system.
|
|
65
65
|
if not self.RAPL_PATH.exists():
|
|
66
66
|
return False
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
if not self._domains:
|
|
69
69
|
return False
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
# Try to read at least one domain
|
|
72
72
|
for path in self._domains.values():
|
|
73
73
|
try:
|
|
@@ -75,68 +75,62 @@ class RAPLEnergyMeter(EnergyMeter):
|
|
|
75
75
|
return True
|
|
76
76
|
except Exception:
|
|
77
77
|
continue
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
return False
|
|
80
|
-
|
|
80
|
+
|
|
81
81
|
def start(self) -> None:
|
|
82
82
|
# Start energy measurement.
|
|
83
83
|
if self._is_measuring:
|
|
84
84
|
raise RuntimeError("Already measuring energy")
|
|
85
|
-
|
|
85
|
+
|
|
86
86
|
self._is_measuring = True
|
|
87
87
|
self._start_time = time.time()
|
|
88
88
|
self._power_samples = []
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
# Read starting energy values for all domains
|
|
91
|
-
self._start_energy = {
|
|
92
|
-
|
|
93
|
-
for name, path in self._domains.items()
|
|
94
|
-
}
|
|
95
|
-
|
|
91
|
+
self._start_energy = {name: self._read_energy(path) for name, path in self._domains.items()}
|
|
92
|
+
|
|
96
93
|
def stop(self) -> EnergyMetrics:
|
|
97
94
|
# Stop energy measurement and return results.
|
|
98
95
|
if not self._is_measuring:
|
|
99
96
|
raise RuntimeError("Not currently measuring energy")
|
|
100
|
-
|
|
97
|
+
|
|
101
98
|
end_time = time.time()
|
|
102
99
|
self._is_measuring = False
|
|
103
|
-
|
|
100
|
+
|
|
104
101
|
# Read ending energy values
|
|
105
|
-
end_energy = {
|
|
106
|
-
|
|
107
|
-
for name, path in self._domains.items()
|
|
108
|
-
}
|
|
109
|
-
|
|
102
|
+
end_energy = {name: self._read_energy(path) for name, path in self._domains.items()}
|
|
103
|
+
|
|
110
104
|
# Calculate energy consumption per domain (in joules)
|
|
111
105
|
duration = end_time - self._start_time
|
|
112
|
-
|
|
106
|
+
|
|
113
107
|
# Handle counter wrap-around (RAPL counters are typically 32-bit)
|
|
114
108
|
MAX_ENERGY_UJ = 2**32
|
|
115
|
-
|
|
109
|
+
|
|
116
110
|
domain_energy = {}
|
|
117
111
|
for name in self._domains:
|
|
118
112
|
start = self._start_energy.get(name, 0)
|
|
119
113
|
end = end_energy.get(name, 0)
|
|
120
|
-
|
|
114
|
+
|
|
121
115
|
if end >= start:
|
|
122
116
|
delta_uj = end - start
|
|
123
117
|
else:
|
|
124
118
|
# Counter wrapped around
|
|
125
119
|
delta_uj = (MAX_ENERGY_UJ - start) + end
|
|
126
|
-
|
|
120
|
+
|
|
127
121
|
domain_energy[name] = delta_uj / 1_000_000 # Convert to joules
|
|
128
|
-
|
|
122
|
+
|
|
129
123
|
# Aggregate metrics
|
|
130
124
|
total_joules = sum(domain_energy.values())
|
|
131
|
-
|
|
125
|
+
|
|
132
126
|
# Extract component-specific energy
|
|
133
127
|
cpu_energy = domain_energy.get("core", 0) or domain_energy.get("package-0", total_joules)
|
|
134
128
|
dram_energy = domain_energy.get("dram", 0)
|
|
135
129
|
gpu_energy = domain_energy.get("uncore", None) # Integrated GPU
|
|
136
|
-
|
|
130
|
+
|
|
137
131
|
# Calculate power
|
|
138
132
|
watts_avg = total_joules / duration if duration > 0 else 0
|
|
139
|
-
|
|
133
|
+
|
|
140
134
|
return EnergyMetrics(
|
|
141
135
|
joules=total_joules,
|
|
142
136
|
watts_avg=watts_avg,
|
|
@@ -151,7 +145,7 @@ class RAPLEnergyMeter(EnergyMeter):
|
|
|
151
145
|
start_time=datetime.fromtimestamp(self._start_time),
|
|
152
146
|
end_time=datetime.fromtimestamp(end_time),
|
|
153
147
|
)
|
|
154
|
-
|
|
148
|
+
|
|
155
149
|
def get_available_domains(self) -> List[str]:
|
|
156
150
|
# Get list of available RAPL domains.
|
|
157
151
|
return list(self._domains.keys())
|
greenmining/services/__init__.py
CHANGED
|
@@ -3,15 +3,25 @@
|
|
|
3
3
|
from .commit_extractor import CommitExtractor
|
|
4
4
|
from .data_aggregator import DataAggregator
|
|
5
5
|
from .data_analyzer import DataAnalyzer
|
|
6
|
-
from .
|
|
7
|
-
from .local_repo_analyzer import
|
|
6
|
+
from .github_graphql_fetcher import GitHubGraphQLFetcher
|
|
7
|
+
from .local_repo_analyzer import (
|
|
8
|
+
LocalRepoAnalyzer,
|
|
9
|
+
CommitAnalysis,
|
|
10
|
+
RepositoryAnalysis,
|
|
11
|
+
MethodMetrics,
|
|
12
|
+
SourceCodeChange,
|
|
13
|
+
)
|
|
8
14
|
from .reports import ReportGenerator
|
|
9
15
|
|
|
10
16
|
__all__ = [
|
|
11
|
-
"
|
|
17
|
+
"GitHubGraphQLFetcher",
|
|
12
18
|
"CommitExtractor",
|
|
13
19
|
"DataAnalyzer",
|
|
14
20
|
"DataAggregator",
|
|
15
21
|
"ReportGenerator",
|
|
16
22
|
"LocalRepoAnalyzer",
|
|
23
|
+
"CommitAnalysis",
|
|
24
|
+
"RepositoryAnalysis",
|
|
25
|
+
"MethodMetrics",
|
|
26
|
+
"SourceCodeChange",
|
|
17
27
|
]
|
|
@@ -41,7 +41,9 @@ class CommitExtractor:
|
|
|
41
41
|
self.github = Github(github_token) if github_token else None
|
|
42
42
|
self.timeout = timeout
|
|
43
43
|
|
|
44
|
-
def extract_from_repositories(
|
|
44
|
+
def extract_from_repositories(
|
|
45
|
+
self, repositories: list[dict[str, Any] | Repository]
|
|
46
|
+
) -> list[dict[str, Any]]:
|
|
45
47
|
# Extract commits from list of repositories.
|
|
46
48
|
all_commits = []
|
|
47
49
|
failed_repos = []
|
|
@@ -74,15 +76,17 @@ class CommitExtractor:
|
|
|
74
76
|
pbar.update(1)
|
|
75
77
|
except TimeoutError:
|
|
76
78
|
signal.alarm(0) # Cancel alarm
|
|
77
|
-
repo_name =
|
|
78
|
-
|
|
79
|
-
f"\nTimeout processing {repo_name} (>{self.timeout}s)", "yellow"
|
|
79
|
+
repo_name = (
|
|
80
|
+
repo.full_name if isinstance(repo, Repository) else repo["full_name"]
|
|
80
81
|
)
|
|
82
|
+
colored_print(f"\nTimeout processing {repo_name} (>{self.timeout}s)", "yellow")
|
|
81
83
|
failed_repos.append(repo_name)
|
|
82
84
|
pbar.update(1)
|
|
83
85
|
except Exception as e:
|
|
84
86
|
signal.alarm(0) # Cancel alarm
|
|
85
|
-
repo_name =
|
|
87
|
+
repo_name = (
|
|
88
|
+
repo.full_name if isinstance(repo, Repository) else repo["full_name"]
|
|
89
|
+
)
|
|
86
90
|
colored_print(f"\nError processing {repo_name}: {e}", "yellow")
|
|
87
91
|
failed_repos.append(repo_name)
|
|
88
92
|
pbar.update(1)
|
|
@@ -1,21 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
================================================================================
|
|
18
|
-
"""
|
|
1
|
+
# ================================================================================
|
|
2
|
+
# DEADCODE - OLD REST API IMPLEMENTATION
|
|
3
|
+
# ================================================================================
|
|
4
|
+
#
|
|
5
|
+
# This file contains the OLD GitHub REST API implementation.
|
|
6
|
+
# It has been REPLACED by GitHubGraphQLFetcher for better performance.
|
|
7
|
+
#
|
|
8
|
+
# Performance comparison:
|
|
9
|
+
# REST API: 10+ requests for 100 repos, ~2 minutes
|
|
10
|
+
# GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
|
|
11
|
+
#
|
|
12
|
+
# USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
|
|
13
|
+
#
|
|
14
|
+
# This file is kept for reference only. Do not use in production.
|
|
15
|
+
#
|
|
16
|
+
# ================================================================================
|
|
19
17
|
|
|
20
18
|
# GitHub repository fetcher for green microservices mining.
|
|
21
19
|
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
reducing API calls and improving rate limit efficiency.
|
|
6
|
-
"""
|
|
1
|
+
# GitHub GraphQL API fetcher for faster and more efficient repository fetching.
|
|
2
|
+
#
|
|
3
|
+
# GraphQL allows fetching exactly the data you need in a single request,
|
|
4
|
+
# reducing API calls and improving rate limit efficiency.
|
|
7
5
|
|
|
8
6
|
import json
|
|
9
7
|
import time
|
|
@@ -15,25 +13,21 @@ from greenmining.models.repository import Repository
|
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
class GitHubGraphQLFetcher:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
-
|
|
25
|
-
- More powerful search capabilities
|
|
26
|
-
"""
|
|
16
|
+
# Fetch GitHub repositories using GraphQL API v4.
|
|
17
|
+
#
|
|
18
|
+
# Benefits over REST API:
|
|
19
|
+
# - Fetch repos + commits in 1 request instead of 100+ REST calls
|
|
20
|
+
# - Get exactly the fields you need (no over-fetching)
|
|
21
|
+
# - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
|
|
22
|
+
# - More powerful search capabilities
|
|
27
23
|
|
|
28
24
|
GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
|
|
29
25
|
|
|
30
26
|
def __init__(self, token: str):
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
token: GitHub personal access token
|
|
36
|
-
"""
|
|
27
|
+
# Initialize GraphQL fetcher.
|
|
28
|
+
#
|
|
29
|
+
# Args:
|
|
30
|
+
# token: GitHub personal access token
|
|
37
31
|
self.token = token
|
|
38
32
|
self.headers = {
|
|
39
33
|
"Authorization": f"Bearer {token}",
|
|
@@ -51,22 +45,20 @@ class GitHubGraphQLFetcher:
|
|
|
51
45
|
pushed_after: Optional[str] = None,
|
|
52
46
|
pushed_before: Optional[str] = None,
|
|
53
47
|
) -> List[Repository]:
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
List of Repository objects
|
|
69
|
-
"""
|
|
48
|
+
# Search GitHub repositories using GraphQL.
|
|
49
|
+
#
|
|
50
|
+
# Args:
|
|
51
|
+
# keywords: Search keywords
|
|
52
|
+
# max_repos: Maximum number of repositories to fetch
|
|
53
|
+
# min_stars: Minimum star count
|
|
54
|
+
# languages: Programming languages to filter
|
|
55
|
+
# created_after: Created after date (YYYY-MM-DD)
|
|
56
|
+
# created_before: Created before date (YYYY-MM-DD)
|
|
57
|
+
# pushed_after: Pushed after date (YYYY-MM-DD)
|
|
58
|
+
# pushed_before: Pushed before date (YYYY-MM-DD)
|
|
59
|
+
#
|
|
60
|
+
# Returns:
|
|
61
|
+
# List of Repository objects
|
|
70
62
|
# Build search query
|
|
71
63
|
search_query = self._build_search_query(
|
|
72
64
|
keywords,
|
|
@@ -195,7 +187,7 @@ class GitHubGraphQLFetcher:
|
|
|
195
187
|
pushed_after: Optional[str],
|
|
196
188
|
pushed_before: Optional[str],
|
|
197
189
|
) -> str:
|
|
198
|
-
|
|
190
|
+
# Build GitHub search query string.
|
|
199
191
|
query_parts = [keywords]
|
|
200
192
|
|
|
201
193
|
# Star count
|
|
@@ -219,7 +211,7 @@ class GitHubGraphQLFetcher:
|
|
|
219
211
|
return " ".join(query_parts)
|
|
220
212
|
|
|
221
213
|
def _execute_query(self, query: str, variables: Dict[str, Any]) -> Dict[str, Any]:
|
|
222
|
-
|
|
214
|
+
# Execute GraphQL query.
|
|
223
215
|
payload = {"query": query, "variables": variables}
|
|
224
216
|
|
|
225
217
|
response = requests.post(
|
|
@@ -230,7 +222,7 @@ class GitHubGraphQLFetcher:
|
|
|
230
222
|
return response.json()
|
|
231
223
|
|
|
232
224
|
def _parse_repository(self, node: Dict[str, Any]) -> Repository:
|
|
233
|
-
|
|
225
|
+
# Parse GraphQL repository node to Repository object.
|
|
234
226
|
# Extract languages
|
|
235
227
|
languages = []
|
|
236
228
|
if node.get("languages") and node["languages"].get("nodes"):
|
|
@@ -265,20 +257,18 @@ class GitHubGraphQLFetcher:
|
|
|
265
257
|
def get_repository_commits(
|
|
266
258
|
self, owner: str, name: str, max_commits: int = 100
|
|
267
259
|
) -> List[Dict[str, Any]]:
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
List of commit dictionaries
|
|
281
|
-
"""
|
|
260
|
+
# Fetch commits for a specific repository using GraphQL.
|
|
261
|
+
#
|
|
262
|
+
# This is much faster than REST API as it gets all commits in 1-2 requests
|
|
263
|
+
# instead of paginating through 100 individual REST calls.
|
|
264
|
+
#
|
|
265
|
+
# Args:
|
|
266
|
+
# owner: Repository owner
|
|
267
|
+
# name: Repository name
|
|
268
|
+
# max_commits: Maximum commits to fetch
|
|
269
|
+
#
|
|
270
|
+
# Returns:
|
|
271
|
+
# List of commit dictionaries
|
|
282
272
|
query = """
|
|
283
273
|
query($owner: String!, $name: String!, $first: Int!) {
|
|
284
274
|
repository(owner: $owner, name: $name) {
|
|
@@ -359,7 +349,7 @@ class GitHubGraphQLFetcher:
|
|
|
359
349
|
return commits
|
|
360
350
|
|
|
361
351
|
def save_results(self, repositories: List[Repository], output_file: str):
|
|
362
|
-
|
|
352
|
+
# Save repositories to JSON file.
|
|
363
353
|
data = {
|
|
364
354
|
"total_repositories": len(repositories),
|
|
365
355
|
"repositories": [repo.to_dict() for repo in repositories],
|