greenmining 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,9 +13,9 @@ from .base import EnergyMeter, EnergyMetrics, EnergyBackend
13
13
 
14
14
  class RAPLEnergyMeter(EnergyMeter):
15
15
  # Energy measurement using Intel RAPL on Linux.
16
-
16
+
17
17
  RAPL_PATH = Path("/sys/class/powercap/intel-rapl")
18
-
18
+
19
19
  def __init__(self):
20
20
  # Initialize RAPL energy meter.
21
21
  super().__init__(EnergyBackend.RAPL)
@@ -24,12 +24,12 @@ class RAPLEnergyMeter(EnergyMeter):
24
24
  self._start_time: Optional[float] = None
25
25
  self._power_samples: List[float] = []
26
26
  self._discover_domains()
27
-
27
+
28
28
  def _discover_domains(self) -> None:
29
29
  # Discover available RAPL domains.
30
30
  if not self.RAPL_PATH.exists():
31
31
  return
32
-
32
+
33
33
  # Find all RAPL domains (intel-rapl:0, intel-rapl:0:0, etc.)
34
34
  for domain_path in self.RAPL_PATH.glob("intel-rapl:*"):
35
35
  if (domain_path / "energy_uj").exists():
@@ -39,9 +39,9 @@ class RAPLEnergyMeter(EnergyMeter):
39
39
  domain_name = name_file.read_text().strip()
40
40
  else:
41
41
  domain_name = domain_path.name
42
-
42
+
43
43
  self._domains[domain_name] = domain_path / "energy_uj"
44
-
44
+
45
45
  # Check for sub-domains (core, uncore, dram, etc.)
46
46
  for subdomain_path in domain_path.glob("intel-rapl:*:*"):
47
47
  if (subdomain_path / "energy_uj").exists():
@@ -50,24 +50,24 @@ class RAPLEnergyMeter(EnergyMeter):
50
50
  subdomain_name = name_file.read_text().strip()
51
51
  else:
52
52
  subdomain_name = subdomain_path.name
53
-
53
+
54
54
  self._domains[subdomain_name] = subdomain_path / "energy_uj"
55
-
55
+
56
56
  def _read_energy(self, path: Path) -> int:
57
57
  # Read energy value in microjoules from a RAPL file.
58
58
  try:
59
59
  return int(path.read_text().strip())
60
60
  except (PermissionError, FileNotFoundError, ValueError):
61
61
  return 0
62
-
62
+
63
63
  def is_available(self) -> bool:
64
64
  # Check if RAPL is available on this system.
65
65
  if not self.RAPL_PATH.exists():
66
66
  return False
67
-
67
+
68
68
  if not self._domains:
69
69
  return False
70
-
70
+
71
71
  # Try to read at least one domain
72
72
  for path in self._domains.values():
73
73
  try:
@@ -75,68 +75,62 @@ class RAPLEnergyMeter(EnergyMeter):
75
75
  return True
76
76
  except Exception:
77
77
  continue
78
-
78
+
79
79
  return False
80
-
80
+
81
81
  def start(self) -> None:
82
82
  # Start energy measurement.
83
83
  if self._is_measuring:
84
84
  raise RuntimeError("Already measuring energy")
85
-
85
+
86
86
  self._is_measuring = True
87
87
  self._start_time = time.time()
88
88
  self._power_samples = []
89
-
89
+
90
90
  # Read starting energy values for all domains
91
- self._start_energy = {
92
- name: self._read_energy(path)
93
- for name, path in self._domains.items()
94
- }
95
-
91
+ self._start_energy = {name: self._read_energy(path) for name, path in self._domains.items()}
92
+
96
93
  def stop(self) -> EnergyMetrics:
97
94
  # Stop energy measurement and return results.
98
95
  if not self._is_measuring:
99
96
  raise RuntimeError("Not currently measuring energy")
100
-
97
+
101
98
  end_time = time.time()
102
99
  self._is_measuring = False
103
-
100
+
104
101
  # Read ending energy values
105
- end_energy = {
106
- name: self._read_energy(path)
107
- for name, path in self._domains.items()
108
- }
109
-
102
+ end_energy = {name: self._read_energy(path) for name, path in self._domains.items()}
103
+
110
104
  # Calculate energy consumption per domain (in joules)
111
105
  duration = end_time - self._start_time
112
-
106
+
113
107
  # Handle counter wrap-around (RAPL counters are typically 32-bit)
114
108
  MAX_ENERGY_UJ = 2**32
115
-
109
+
116
110
  domain_energy = {}
117
111
  for name in self._domains:
118
112
  start = self._start_energy.get(name, 0)
119
113
  end = end_energy.get(name, 0)
120
-
114
+
121
115
  if end >= start:
122
116
  delta_uj = end - start
123
117
  else:
124
118
  # Counter wrapped around
125
119
  delta_uj = (MAX_ENERGY_UJ - start) + end
126
-
120
+
127
121
  domain_energy[name] = delta_uj / 1_000_000 # Convert to joules
128
-
122
+
129
123
  # Aggregate metrics
130
124
  total_joules = sum(domain_energy.values())
131
-
125
+
132
126
  # Extract component-specific energy
133
127
  cpu_energy = domain_energy.get("core", 0) or domain_energy.get("package-0", total_joules)
134
128
  dram_energy = domain_energy.get("dram", 0)
135
129
  gpu_energy = domain_energy.get("uncore", None) # Integrated GPU
136
-
130
+
137
131
  # Calculate power
138
132
  watts_avg = total_joules / duration if duration > 0 else 0
139
-
133
+
140
134
  return EnergyMetrics(
141
135
  joules=total_joules,
142
136
  watts_avg=watts_avg,
@@ -151,7 +145,7 @@ class RAPLEnergyMeter(EnergyMeter):
151
145
  start_time=datetime.fromtimestamp(self._start_time),
152
146
  end_time=datetime.fromtimestamp(end_time),
153
147
  )
154
-
148
+
155
149
  def get_available_domains(self) -> List[str]:
156
150
  # Get list of available RAPL domains.
157
151
  return list(self._domains.keys())
@@ -3,15 +3,25 @@
3
3
  from .commit_extractor import CommitExtractor
4
4
  from .data_aggregator import DataAggregator
5
5
  from .data_analyzer import DataAnalyzer
6
- from .github_fetcher import GitHubFetcher
7
- from .local_repo_analyzer import LocalRepoAnalyzer
6
+ from .github_graphql_fetcher import GitHubGraphQLFetcher
7
+ from .local_repo_analyzer import (
8
+ LocalRepoAnalyzer,
9
+ CommitAnalysis,
10
+ RepositoryAnalysis,
11
+ MethodMetrics,
12
+ SourceCodeChange,
13
+ )
8
14
  from .reports import ReportGenerator
9
15
 
10
16
  __all__ = [
11
- "GitHubFetcher",
17
+ "GitHubGraphQLFetcher",
12
18
  "CommitExtractor",
13
19
  "DataAnalyzer",
14
20
  "DataAggregator",
15
21
  "ReportGenerator",
16
22
  "LocalRepoAnalyzer",
23
+ "CommitAnalysis",
24
+ "RepositoryAnalysis",
25
+ "MethodMetrics",
26
+ "SourceCodeChange",
17
27
  ]
@@ -41,7 +41,9 @@ class CommitExtractor:
41
41
  self.github = Github(github_token) if github_token else None
42
42
  self.timeout = timeout
43
43
 
44
- def extract_from_repositories(self, repositories: list[dict[str, Any] | Repository]) -> list[dict[str, Any]]:
44
+ def extract_from_repositories(
45
+ self, repositories: list[dict[str, Any] | Repository]
46
+ ) -> list[dict[str, Any]]:
45
47
  # Extract commits from list of repositories.
46
48
  all_commits = []
47
49
  failed_repos = []
@@ -74,15 +76,17 @@ class CommitExtractor:
74
76
  pbar.update(1)
75
77
  except TimeoutError:
76
78
  signal.alarm(0) # Cancel alarm
77
- repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
78
- colored_print(
79
- f"\nTimeout processing {repo_name} (>{self.timeout}s)", "yellow"
79
+ repo_name = (
80
+ repo.full_name if isinstance(repo, Repository) else repo["full_name"]
80
81
  )
82
+ colored_print(f"\nTimeout processing {repo_name} (>{self.timeout}s)", "yellow")
81
83
  failed_repos.append(repo_name)
82
84
  pbar.update(1)
83
85
  except Exception as e:
84
86
  signal.alarm(0) # Cancel alarm
85
- repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
87
+ repo_name = (
88
+ repo.full_name if isinstance(repo, Repository) else repo["full_name"]
89
+ )
86
90
  colored_print(f"\nError processing {repo_name}: {e}", "yellow")
87
91
  failed_repos.append(repo_name)
88
92
  pbar.update(1)
@@ -1,21 +1,19 @@
1
- """
2
- ================================================================================
3
- DEADCODE - OLD REST API IMPLEMENTATION
4
- ================================================================================
5
-
6
- This file contains the OLD GitHub REST API implementation.
7
- It has been REPLACED by GitHubGraphQLFetcher for better performance.
8
-
9
- Performance comparison:
10
- REST API: 10+ requests for 100 repos, ~2 minutes
11
- GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
12
-
13
- USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
14
-
15
- This file is kept for reference only. Do not use in production.
16
-
17
- ================================================================================
18
- """
1
+ # ================================================================================
2
+ # DEADCODE - OLD REST API IMPLEMENTATION
3
+ # ================================================================================
4
+ #
5
+ # This file contains the OLD GitHub REST API implementation.
6
+ # It has been REPLACED by GitHubGraphQLFetcher for better performance.
7
+ #
8
+ # Performance comparison:
9
+ # REST API: 10+ requests for 100 repos, ~2 minutes
10
+ # GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
11
+ #
12
+ # USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
13
+ #
14
+ # This file is kept for reference only. Do not use in production.
15
+ #
16
+ # ================================================================================
19
17
 
20
18
  # GitHub repository fetcher for green microservices mining.
21
19
 
@@ -1,9 +1,7 @@
1
- """
2
- GitHub GraphQL API fetcher for faster and more efficient repository fetching.
3
-
4
- GraphQL allows fetching exactly the data you need in a single request,
5
- reducing API calls and improving rate limit efficiency.
6
- """
1
+ # GitHub GraphQL API fetcher for faster and more efficient repository fetching.
2
+ #
3
+ # GraphQL allows fetching exactly the data you need in a single request,
4
+ # reducing API calls and improving rate limit efficiency.
7
5
 
8
6
  import json
9
7
  import time
@@ -15,25 +13,21 @@ from greenmining.models.repository import Repository
15
13
 
16
14
 
17
15
  class GitHubGraphQLFetcher:
18
- """
19
- Fetch GitHub repositories using GraphQL API v4.
20
-
21
- Benefits over REST API:
22
- - Fetch repos + commits in 1 request instead of 100+ REST calls
23
- - Get exactly the fields you need (no over-fetching)
24
- - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
25
- - More powerful search capabilities
26
- """
16
+ # Fetch GitHub repositories using GraphQL API v4.
17
+ #
18
+ # Benefits over REST API:
19
+ # - Fetch repos + commits in 1 request instead of 100+ REST calls
20
+ # - Get exactly the fields you need (no over-fetching)
21
+ # - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
22
+ # - More powerful search capabilities
27
23
 
28
24
  GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
29
25
 
30
26
  def __init__(self, token: str):
31
- """
32
- Initialize GraphQL fetcher.
33
-
34
- Args:
35
- token: GitHub personal access token
36
- """
27
+ # Initialize GraphQL fetcher.
28
+ #
29
+ # Args:
30
+ # token: GitHub personal access token
37
31
  self.token = token
38
32
  self.headers = {
39
33
  "Authorization": f"Bearer {token}",
@@ -51,22 +45,20 @@ class GitHubGraphQLFetcher:
51
45
  pushed_after: Optional[str] = None,
52
46
  pushed_before: Optional[str] = None,
53
47
  ) -> List[Repository]:
54
- """
55
- Search GitHub repositories using GraphQL.
56
-
57
- Args:
58
- keywords: Search keywords
59
- max_repos: Maximum number of repositories to fetch
60
- min_stars: Minimum star count
61
- languages: Programming languages to filter
62
- created_after: Created after date (YYYY-MM-DD)
63
- created_before: Created before date (YYYY-MM-DD)
64
- pushed_after: Pushed after date (YYYY-MM-DD)
65
- pushed_before: Pushed before date (YYYY-MM-DD)
66
-
67
- Returns:
68
- List of Repository objects
69
- """
48
+ # Search GitHub repositories using GraphQL.
49
+ #
50
+ # Args:
51
+ # keywords: Search keywords
52
+ # max_repos: Maximum number of repositories to fetch
53
+ # min_stars: Minimum star count
54
+ # languages: Programming languages to filter
55
+ # created_after: Created after date (YYYY-MM-DD)
56
+ # created_before: Created before date (YYYY-MM-DD)
57
+ # pushed_after: Pushed after date (YYYY-MM-DD)
58
+ # pushed_before: Pushed before date (YYYY-MM-DD)
59
+ #
60
+ # Returns:
61
+ # List of Repository objects
70
62
  # Build search query
71
63
  search_query = self._build_search_query(
72
64
  keywords,
@@ -195,7 +187,7 @@ class GitHubGraphQLFetcher:
195
187
  pushed_after: Optional[str],
196
188
  pushed_before: Optional[str],
197
189
  ) -> str:
198
- """Build GitHub search query string."""
190
+ # Build GitHub search query string.
199
191
  query_parts = [keywords]
200
192
 
201
193
  # Star count
@@ -219,7 +211,7 @@ class GitHubGraphQLFetcher:
219
211
  return " ".join(query_parts)
220
212
 
221
213
  def _execute_query(self, query: str, variables: Dict[str, Any]) -> Dict[str, Any]:
222
- """Execute GraphQL query."""
214
+ # Execute GraphQL query.
223
215
  payload = {"query": query, "variables": variables}
224
216
 
225
217
  response = requests.post(
@@ -230,7 +222,7 @@ class GitHubGraphQLFetcher:
230
222
  return response.json()
231
223
 
232
224
  def _parse_repository(self, node: Dict[str, Any]) -> Repository:
233
- """Parse GraphQL repository node to Repository object."""
225
+ # Parse GraphQL repository node to Repository object.
234
226
  # Extract languages
235
227
  languages = []
236
228
  if node.get("languages") and node["languages"].get("nodes"):
@@ -265,20 +257,18 @@ class GitHubGraphQLFetcher:
265
257
  def get_repository_commits(
266
258
  self, owner: str, name: str, max_commits: int = 100
267
259
  ) -> List[Dict[str, Any]]:
268
- """
269
- Fetch commits for a specific repository using GraphQL.
270
-
271
- This is much faster than REST API as it gets all commits in 1-2 requests
272
- instead of paginating through 100 individual REST calls.
273
-
274
- Args:
275
- owner: Repository owner
276
- name: Repository name
277
- max_commits: Maximum commits to fetch
278
-
279
- Returns:
280
- List of commit dictionaries
281
- """
260
+ # Fetch commits for a specific repository using GraphQL.
261
+ #
262
+ # This is much faster than REST API as it gets all commits in 1-2 requests
263
+ # instead of paginating through 100 individual REST calls.
264
+ #
265
+ # Args:
266
+ # owner: Repository owner
267
+ # name: Repository name
268
+ # max_commits: Maximum commits to fetch
269
+ #
270
+ # Returns:
271
+ # List of commit dictionaries
282
272
  query = """
283
273
  query($owner: String!, $name: String!, $first: Int!) {
284
274
  repository(owner: $owner, name: $name) {
@@ -359,7 +349,7 @@ class GitHubGraphQLFetcher:
359
349
  return commits
360
350
 
361
351
  def save_results(self, repositories: List[Repository], output_file: str):
362
- """Save repositories to JSON file."""
352
+ # Save repositories to JSON file.
363
353
  data = {
364
354
  "total_repositories": len(repositories),
365
355
  "repositories": [repo.to_dict() for repo in repositories],