greenmining 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +54 -2
- greenmining/analyzers/__init__.py +9 -0
- greenmining/analyzers/metrics_power_correlator.py +165 -0
- greenmining/analyzers/power_regression.py +212 -0
- greenmining/analyzers/version_power_analyzer.py +246 -0
- greenmining/config.py +46 -34
- greenmining/dashboard/__init__.py +5 -0
- greenmining/dashboard/app.py +200 -0
- greenmining/energy/__init__.py +8 -1
- greenmining/energy/base.py +45 -35
- greenmining/energy/carbon_reporter.py +242 -0
- greenmining/energy/codecarbon_meter.py +25 -24
- greenmining/energy/cpu_meter.py +144 -0
- greenmining/energy/rapl.py +30 -36
- greenmining/services/__init__.py +13 -3
- greenmining/services/commit_extractor.py +9 -5
- greenmining/services/local_repo_analyzer.py +325 -63
- greenmining/services/reports.py +5 -8
- {greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/METADATA +212 -43
- {greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/RECORD +23 -16
- {greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/WHEEL +0 -0
- {greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.5.dist-info → greenmining-1.0.7.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,9 @@ from __future__ import annotations
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
7
|
import shutil
|
|
8
|
+
import subprocess
|
|
8
9
|
import tempfile
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
11
|
from dataclasses import dataclass, field
|
|
10
12
|
from datetime import datetime, timedelta
|
|
11
13
|
from pathlib import Path
|
|
@@ -25,10 +27,62 @@ from greenmining.gsf_patterns import get_pattern_by_keywords, is_green_aware, GS
|
|
|
25
27
|
from greenmining.utils import colored_print
|
|
26
28
|
|
|
27
29
|
|
|
30
|
+
@dataclass
|
|
31
|
+
class MethodMetrics:
|
|
32
|
+
# Per-method analysis metrics from Lizard integration.
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
long_name: str
|
|
36
|
+
filename: str
|
|
37
|
+
nloc: int = 0
|
|
38
|
+
complexity: int = 0
|
|
39
|
+
token_count: int = 0
|
|
40
|
+
parameters: int = 0
|
|
41
|
+
start_line: int = 0
|
|
42
|
+
end_line: int = 0
|
|
43
|
+
|
|
44
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
45
|
+
return {
|
|
46
|
+
"name": self.name,
|
|
47
|
+
"long_name": self.long_name,
|
|
48
|
+
"filename": self.filename,
|
|
49
|
+
"nloc": self.nloc,
|
|
50
|
+
"complexity": self.complexity,
|
|
51
|
+
"token_count": self.token_count,
|
|
52
|
+
"parameters": self.parameters,
|
|
53
|
+
"start_line": self.start_line,
|
|
54
|
+
"end_line": self.end_line,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class SourceCodeChange:
|
|
60
|
+
# Source code before/after a commit for refactoring detection.
|
|
61
|
+
|
|
62
|
+
filename: str
|
|
63
|
+
source_code_before: Optional[str] = None
|
|
64
|
+
source_code_after: Optional[str] = None
|
|
65
|
+
diff: Optional[str] = None
|
|
66
|
+
added_lines: int = 0
|
|
67
|
+
deleted_lines: int = 0
|
|
68
|
+
change_type: str = "" # ADD, DELETE, MODIFY, RENAME
|
|
69
|
+
|
|
70
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
71
|
+
return {
|
|
72
|
+
"filename": self.filename,
|
|
73
|
+
"source_code_before": self.source_code_before,
|
|
74
|
+
"source_code_after": self.source_code_after,
|
|
75
|
+
"diff": self.diff,
|
|
76
|
+
"added_lines": self.added_lines,
|
|
77
|
+
"deleted_lines": self.deleted_lines,
|
|
78
|
+
"change_type": self.change_type,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
28
82
|
@dataclass
|
|
29
83
|
class CommitAnalysis:
|
|
30
84
|
# Analysis result for a single commit.
|
|
31
|
-
|
|
85
|
+
|
|
32
86
|
hash: str
|
|
33
87
|
message: str
|
|
34
88
|
author: str
|
|
@@ -42,21 +96,31 @@ class CommitAnalysis:
|
|
|
42
96
|
files_modified: List[str]
|
|
43
97
|
insertions: int
|
|
44
98
|
deletions: int
|
|
45
|
-
|
|
99
|
+
|
|
46
100
|
# PyDriller DMM metrics
|
|
47
101
|
dmm_unit_size: Optional[float] = None
|
|
48
102
|
dmm_unit_complexity: Optional[float] = None
|
|
49
103
|
dmm_unit_interfacing: Optional[float] = None
|
|
50
|
-
|
|
104
|
+
|
|
51
105
|
# Structural metrics (Lizard)
|
|
52
106
|
total_nloc: int = 0
|
|
53
107
|
total_complexity: int = 0
|
|
54
108
|
max_complexity: int = 0
|
|
55
109
|
methods_count: int = 0
|
|
56
|
-
|
|
110
|
+
|
|
111
|
+
# Method-level analysis (Phase 3.2)
|
|
112
|
+
methods: List[MethodMetrics] = field(default_factory=list)
|
|
113
|
+
|
|
114
|
+
# Source code access (Phase 3.3)
|
|
115
|
+
source_changes: List[SourceCodeChange] = field(default_factory=list)
|
|
116
|
+
|
|
117
|
+
# Energy metrics (Phase 2.2 - populated when energy_tracking=True)
|
|
118
|
+
energy_joules: Optional[float] = None
|
|
119
|
+
energy_watts_avg: Optional[float] = None
|
|
120
|
+
|
|
57
121
|
def to_dict(self) -> Dict[str, Any]:
|
|
58
122
|
# Convert to dictionary.
|
|
59
|
-
|
|
123
|
+
result = {
|
|
60
124
|
"commit_hash": self.hash,
|
|
61
125
|
"message": self.message,
|
|
62
126
|
"author": self.author,
|
|
@@ -79,11 +143,23 @@ class CommitAnalysis:
|
|
|
79
143
|
"methods_count": self.methods_count,
|
|
80
144
|
}
|
|
81
145
|
|
|
146
|
+
if self.methods:
|
|
147
|
+
result["methods"] = [m.to_dict() for m in self.methods]
|
|
148
|
+
|
|
149
|
+
if self.source_changes:
|
|
150
|
+
result["source_changes"] = [s.to_dict() for s in self.source_changes]
|
|
151
|
+
|
|
152
|
+
if self.energy_joules is not None:
|
|
153
|
+
result["energy_joules"] = self.energy_joules
|
|
154
|
+
result["energy_watts_avg"] = self.energy_watts_avg
|
|
155
|
+
|
|
156
|
+
return result
|
|
157
|
+
|
|
82
158
|
|
|
83
159
|
@dataclass
|
|
84
160
|
class RepositoryAnalysis:
|
|
85
161
|
# Complete analysis result for a repository.
|
|
86
|
-
|
|
162
|
+
|
|
87
163
|
url: str
|
|
88
164
|
name: str
|
|
89
165
|
total_commits: int
|
|
@@ -91,10 +167,11 @@ class RepositoryAnalysis:
|
|
|
91
167
|
green_commit_rate: float
|
|
92
168
|
commits: List[CommitAnalysis] = field(default_factory=list)
|
|
93
169
|
process_metrics: Dict[str, Any] = field(default_factory=dict)
|
|
94
|
-
|
|
170
|
+
energy_metrics: Optional[Dict[str, Any]] = None
|
|
171
|
+
|
|
95
172
|
def to_dict(self) -> Dict[str, Any]:
|
|
96
173
|
# Convert to dictionary.
|
|
97
|
-
|
|
174
|
+
result = {
|
|
98
175
|
"url": self.url,
|
|
99
176
|
"name": self.name,
|
|
100
177
|
"total_commits": self.total_commits,
|
|
@@ -103,11 +180,15 @@ class RepositoryAnalysis:
|
|
|
103
180
|
"commits": [c.to_dict() for c in self.commits],
|
|
104
181
|
"process_metrics": self.process_metrics,
|
|
105
182
|
}
|
|
183
|
+
if self.energy_metrics:
|
|
184
|
+
result["energy_metrics"] = self.energy_metrics
|
|
185
|
+
return result
|
|
106
186
|
|
|
107
187
|
|
|
108
188
|
class LocalRepoAnalyzer:
|
|
109
189
|
# Analyze repositories directly from GitHub URLs using PyDriller.
|
|
110
|
-
|
|
190
|
+
# Supports HTTPS URLs, SSH URLs, and private repositories.
|
|
191
|
+
|
|
111
192
|
def __init__(
|
|
112
193
|
self,
|
|
113
194
|
clone_path: Optional[Path] = None,
|
|
@@ -116,8 +197,29 @@ class LocalRepoAnalyzer:
|
|
|
116
197
|
skip_merges: bool = True,
|
|
117
198
|
compute_process_metrics: bool = True,
|
|
118
199
|
cleanup_after: bool = True,
|
|
200
|
+
ssh_key_path: Optional[str] = None,
|
|
201
|
+
github_token: Optional[str] = None,
|
|
202
|
+
energy_tracking: bool = False,
|
|
203
|
+
energy_backend: str = "rapl",
|
|
204
|
+
method_level_analysis: bool = False,
|
|
205
|
+
include_source_code: bool = False,
|
|
206
|
+
process_metrics: str = "standard",
|
|
119
207
|
):
|
|
120
208
|
# Initialize the local repository analyzer.
|
|
209
|
+
# Args:
|
|
210
|
+
# clone_path: Directory to clone repos into
|
|
211
|
+
# max_commits: Maximum commits to analyze per repo
|
|
212
|
+
# days_back: How far back to analyze
|
|
213
|
+
# skip_merges: Skip merge commits
|
|
214
|
+
# compute_process_metrics: Compute PyDriller process metrics
|
|
215
|
+
# cleanup_after: Remove cloned repos after analysis
|
|
216
|
+
# ssh_key_path: Path to SSH private key for private repos
|
|
217
|
+
# github_token: GitHub token for private HTTPS repos
|
|
218
|
+
# energy_tracking: Enable automatic energy measurement
|
|
219
|
+
# energy_backend: Energy measurement backend (rapl, codecarbon)
|
|
220
|
+
# method_level_analysis: Extract per-method metrics via Lizard
|
|
221
|
+
# include_source_code: Include source code before/after in results
|
|
222
|
+
# process_metrics: "standard" or "full" PyDriller process metrics
|
|
121
223
|
self.clone_path = clone_path or Path(tempfile.gettempdir()) / "greenmining_repos"
|
|
122
224
|
self.clone_path.mkdir(parents=True, exist_ok=True)
|
|
123
225
|
self.max_commits = max_commits
|
|
@@ -126,7 +228,53 @@ class LocalRepoAnalyzer:
|
|
|
126
228
|
self.compute_process_metrics = compute_process_metrics
|
|
127
229
|
self.cleanup_after = cleanup_after
|
|
128
230
|
self.gsf_patterns = GSF_PATTERNS
|
|
129
|
-
|
|
231
|
+
|
|
232
|
+
# Phase 1.3: Private repository support
|
|
233
|
+
self.ssh_key_path = ssh_key_path
|
|
234
|
+
self.github_token = github_token
|
|
235
|
+
|
|
236
|
+
# Phase 2.2: Integrated energy tracking
|
|
237
|
+
self.energy_tracking = energy_tracking
|
|
238
|
+
self.energy_backend = energy_backend
|
|
239
|
+
self._energy_meter = None
|
|
240
|
+
if energy_tracking:
|
|
241
|
+
self._init_energy_meter()
|
|
242
|
+
|
|
243
|
+
# Phase 3.2: Method-level analysis
|
|
244
|
+
self.method_level_analysis = method_level_analysis
|
|
245
|
+
|
|
246
|
+
# Phase 3.3: Source code access
|
|
247
|
+
self.include_source_code = include_source_code
|
|
248
|
+
|
|
249
|
+
# Phase 3.1: Full process metrics mode
|
|
250
|
+
self.process_metrics_mode = process_metrics
|
|
251
|
+
|
|
252
|
+
def _init_energy_meter(self):
|
|
253
|
+
# Initialize the energy measurement backend.
|
|
254
|
+
try:
|
|
255
|
+
from greenmining.energy.base import get_energy_meter
|
|
256
|
+
|
|
257
|
+
self._energy_meter = get_energy_meter(self.energy_backend)
|
|
258
|
+
except Exception as e:
|
|
259
|
+
colored_print(f" Warning: Energy tracking unavailable: {e}", "yellow")
|
|
260
|
+
self.energy_tracking = False
|
|
261
|
+
|
|
262
|
+
def _prepare_auth_url(self, url: str) -> str:
|
|
263
|
+
# Prepare authenticated URL for private repositories.
|
|
264
|
+
if self.github_token and url.startswith("https://"):
|
|
265
|
+
# Inject token into HTTPS URL for private repo access
|
|
266
|
+
return url.replace("https://", f"https://x-access-token:{self.github_token}@")
|
|
267
|
+
return url
|
|
268
|
+
|
|
269
|
+
def _setup_ssh_env(self) -> Dict[str, str]:
|
|
270
|
+
# Set up SSH environment for private repository cloning.
|
|
271
|
+
env = os.environ.copy()
|
|
272
|
+
if self.ssh_key_path:
|
|
273
|
+
ssh_key = os.path.expanduser(self.ssh_key_path)
|
|
274
|
+
if os.path.exists(ssh_key):
|
|
275
|
+
env["GIT_SSH_COMMAND"] = f"ssh -i {ssh_key} -o StrictHostKeyChecking=no"
|
|
276
|
+
return env
|
|
277
|
+
|
|
130
278
|
def _parse_repo_url(self, url: str) -> tuple[str, str]:
|
|
131
279
|
# Parse repository URL to extract owner and name.
|
|
132
280
|
# Handle HTTPS URLs
|
|
@@ -134,66 +282,111 @@ class LocalRepoAnalyzer:
|
|
|
134
282
|
match = re.search(https_pattern, url)
|
|
135
283
|
if match:
|
|
136
284
|
return match.group(1), match.group(2).replace(".git", "")
|
|
137
|
-
|
|
285
|
+
|
|
138
286
|
# Handle SSH URLs
|
|
139
287
|
ssh_pattern = r"git@github\.com:([^/]+)/([^/\.]+)"
|
|
140
288
|
match = re.search(ssh_pattern, url)
|
|
141
289
|
if match:
|
|
142
290
|
return match.group(1), match.group(2).replace(".git", "")
|
|
143
|
-
|
|
291
|
+
|
|
144
292
|
raise ValueError(f"Could not parse GitHub URL: {url}")
|
|
145
|
-
|
|
293
|
+
|
|
146
294
|
def _get_pattern_details(self, matched_patterns: List[str]) -> List[Dict[str, Any]]:
|
|
147
295
|
# Get detailed pattern information.
|
|
148
296
|
details = []
|
|
149
297
|
for pattern_id, pattern in self.gsf_patterns.items():
|
|
150
298
|
if pattern["name"] in matched_patterns:
|
|
151
|
-
details.append(
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
299
|
+
details.append(
|
|
300
|
+
{
|
|
301
|
+
"name": pattern["name"],
|
|
302
|
+
"category": pattern["category"],
|
|
303
|
+
"description": pattern["description"],
|
|
304
|
+
"sci_impact": pattern["sci_impact"],
|
|
305
|
+
}
|
|
306
|
+
)
|
|
157
307
|
return details
|
|
158
|
-
|
|
308
|
+
|
|
309
|
+
def _extract_method_metrics(self, commit) -> List[MethodMetrics]:
|
|
310
|
+
# Extract per-method metrics from modified files using Lizard (via PyDriller).
|
|
311
|
+
methods = []
|
|
312
|
+
try:
|
|
313
|
+
for mod in commit.modified_files:
|
|
314
|
+
if mod.methods:
|
|
315
|
+
for method in mod.methods:
|
|
316
|
+
methods.append(
|
|
317
|
+
MethodMetrics(
|
|
318
|
+
name=method.name,
|
|
319
|
+
long_name=method.long_name,
|
|
320
|
+
filename=mod.filename,
|
|
321
|
+
nloc=method.nloc,
|
|
322
|
+
complexity=method.complexity,
|
|
323
|
+
token_count=method.token_count,
|
|
324
|
+
parameters=len(method.parameters),
|
|
325
|
+
start_line=method.start_line,
|
|
326
|
+
end_line=method.end_line,
|
|
327
|
+
)
|
|
328
|
+
)
|
|
329
|
+
except Exception:
|
|
330
|
+
pass
|
|
331
|
+
return methods
|
|
332
|
+
|
|
333
|
+
def _extract_source_changes(self, commit) -> List[SourceCodeChange]:
|
|
334
|
+
# Extract source code before/after for each modified file.
|
|
335
|
+
changes = []
|
|
336
|
+
try:
|
|
337
|
+
for mod in commit.modified_files:
|
|
338
|
+
change = SourceCodeChange(
|
|
339
|
+
filename=mod.filename,
|
|
340
|
+
source_code_before=mod.source_code_before if mod.source_code_before else None,
|
|
341
|
+
source_code_after=mod.source_code if mod.source_code else None,
|
|
342
|
+
diff=mod.diff if mod.diff else None,
|
|
343
|
+
added_lines=mod.added_lines,
|
|
344
|
+
deleted_lines=mod.deleted_lines,
|
|
345
|
+
change_type=mod.change_type.name if mod.change_type else "",
|
|
346
|
+
)
|
|
347
|
+
changes.append(change)
|
|
348
|
+
except Exception:
|
|
349
|
+
pass
|
|
350
|
+
return changes
|
|
351
|
+
|
|
159
352
|
def analyze_commit(self, commit) -> CommitAnalysis:
|
|
160
353
|
# Analyze a single PyDriller commit object.
|
|
161
354
|
message = commit.msg or ""
|
|
162
|
-
|
|
355
|
+
|
|
163
356
|
# Green awareness check
|
|
164
357
|
green_aware = is_green_aware(message)
|
|
165
|
-
|
|
358
|
+
|
|
166
359
|
# GSF pattern matching
|
|
167
360
|
matched_patterns = get_pattern_by_keywords(message)
|
|
168
361
|
pattern_details = self._get_pattern_details(matched_patterns)
|
|
169
|
-
|
|
362
|
+
|
|
170
363
|
# Confidence calculation
|
|
171
364
|
pattern_count = len(matched_patterns)
|
|
172
365
|
confidence = "high" if pattern_count >= 2 else "medium" if pattern_count == 1 else "low"
|
|
173
|
-
|
|
366
|
+
|
|
174
367
|
# File modifications
|
|
175
368
|
files_modified = [mod.filename for mod in commit.modified_files]
|
|
176
369
|
insertions = sum(mod.added_lines for mod in commit.modified_files)
|
|
177
370
|
deletions = sum(mod.deleted_lines for mod in commit.modified_files)
|
|
178
|
-
|
|
371
|
+
|
|
179
372
|
# Delta Maintainability Model (if available)
|
|
180
373
|
dmm_unit_size = None
|
|
181
374
|
dmm_unit_complexity = None
|
|
182
375
|
dmm_unit_interfacing = None
|
|
183
|
-
|
|
376
|
+
|
|
184
377
|
try:
|
|
185
378
|
dmm_unit_size = commit.dmm_unit_size
|
|
186
379
|
dmm_unit_complexity = commit.dmm_unit_complexity
|
|
187
380
|
dmm_unit_interfacing = commit.dmm_unit_interfacing
|
|
188
381
|
except Exception:
|
|
189
382
|
pass # DMM may not be available for all commits
|
|
190
|
-
|
|
383
|
+
|
|
191
384
|
# Structural metrics from Lizard (via PyDriller)
|
|
192
385
|
total_nloc = 0
|
|
193
386
|
total_complexity = 0
|
|
194
387
|
max_complexity = 0
|
|
195
388
|
methods_count = 0
|
|
196
|
-
|
|
389
|
+
|
|
197
390
|
try:
|
|
198
391
|
for mod in commit.modified_files:
|
|
199
392
|
if mod.nloc:
|
|
@@ -206,7 +399,17 @@ class LocalRepoAnalyzer:
|
|
|
206
399
|
methods_count += len(mod.methods)
|
|
207
400
|
except Exception:
|
|
208
401
|
pass # Structural metrics may fail for some files
|
|
209
|
-
|
|
402
|
+
|
|
403
|
+
# Phase 3.2: Method-level analysis
|
|
404
|
+
methods = []
|
|
405
|
+
if self.method_level_analysis:
|
|
406
|
+
methods = self._extract_method_metrics(commit)
|
|
407
|
+
|
|
408
|
+
# Phase 3.3: Source code access
|
|
409
|
+
source_changes = []
|
|
410
|
+
if self.include_source_code:
|
|
411
|
+
source_changes = self._extract_source_changes(commit)
|
|
412
|
+
|
|
210
413
|
return CommitAnalysis(
|
|
211
414
|
hash=commit.hash,
|
|
212
415
|
message=message,
|
|
@@ -228,66 +431,93 @@ class LocalRepoAnalyzer:
|
|
|
228
431
|
total_complexity=total_complexity,
|
|
229
432
|
max_complexity=max_complexity,
|
|
230
433
|
methods_count=methods_count,
|
|
434
|
+
methods=methods,
|
|
435
|
+
source_changes=source_changes,
|
|
231
436
|
)
|
|
232
|
-
|
|
437
|
+
|
|
233
438
|
def analyze_repository(self, url: str) -> RepositoryAnalysis:
|
|
234
439
|
# Analyze a repository from its URL.
|
|
235
440
|
owner, repo_name = self._parse_repo_url(url)
|
|
236
441
|
full_name = f"{owner}/{repo_name}"
|
|
237
|
-
|
|
442
|
+
|
|
238
443
|
colored_print(f"\n Analyzing repository: {full_name}", "cyan")
|
|
239
|
-
|
|
444
|
+
|
|
445
|
+
# Phase 1.3: Prepare authenticated URL for private repos
|
|
446
|
+
auth_url = self._prepare_auth_url(url)
|
|
447
|
+
|
|
240
448
|
# Calculate date range
|
|
241
449
|
since_date = datetime.now() - timedelta(days=self.days_back)
|
|
242
|
-
|
|
450
|
+
|
|
243
451
|
# Configure PyDriller Repository
|
|
244
452
|
repo_config = {
|
|
245
|
-
"path_to_repo":
|
|
453
|
+
"path_to_repo": auth_url,
|
|
246
454
|
"since": since_date,
|
|
247
455
|
"only_no_merge": self.skip_merges,
|
|
248
456
|
}
|
|
249
|
-
|
|
457
|
+
|
|
250
458
|
# Clone to specific path if needed
|
|
251
459
|
local_path = self.clone_path / repo_name
|
|
252
460
|
if local_path.exists():
|
|
253
461
|
shutil.rmtree(local_path)
|
|
254
|
-
|
|
462
|
+
|
|
255
463
|
repo_config["clone_repo_to"] = str(self.clone_path)
|
|
256
|
-
|
|
464
|
+
|
|
257
465
|
colored_print(f" Cloning to: {local_path}", "cyan")
|
|
258
|
-
|
|
466
|
+
|
|
467
|
+
# Phase 2.2: Start energy measurement if enabled
|
|
468
|
+
energy_result = None
|
|
469
|
+
if self.energy_tracking and self._energy_meter:
|
|
470
|
+
try:
|
|
471
|
+
self._energy_meter.start()
|
|
472
|
+
except Exception as e:
|
|
473
|
+
colored_print(f" Warning: Energy measurement start failed: {e}", "yellow")
|
|
474
|
+
|
|
259
475
|
commits_analyzed = []
|
|
260
476
|
commit_count = 0
|
|
261
|
-
|
|
477
|
+
|
|
262
478
|
try:
|
|
263
479
|
for commit in Repository(**repo_config).traverse_commits():
|
|
264
480
|
if commit_count >= self.max_commits:
|
|
265
481
|
break
|
|
266
|
-
|
|
482
|
+
|
|
267
483
|
try:
|
|
268
484
|
analysis = self.analyze_commit(commit)
|
|
269
485
|
commits_analyzed.append(analysis)
|
|
270
486
|
commit_count += 1
|
|
271
|
-
|
|
487
|
+
|
|
272
488
|
if commit_count % 50 == 0:
|
|
273
489
|
colored_print(f" Processed {commit_count} commits...", "cyan")
|
|
274
|
-
|
|
490
|
+
|
|
275
491
|
except Exception as e:
|
|
276
|
-
colored_print(
|
|
492
|
+
colored_print(
|
|
493
|
+
f" Warning: Error analyzing commit {commit.hash[:8]}: {e}", "yellow"
|
|
494
|
+
)
|
|
277
495
|
continue
|
|
278
|
-
|
|
496
|
+
|
|
279
497
|
colored_print(f" Analyzed {len(commits_analyzed)} commits", "green")
|
|
280
|
-
|
|
498
|
+
|
|
499
|
+
# Phase 2.2: Stop energy measurement
|
|
500
|
+
if self.energy_tracking and self._energy_meter:
|
|
501
|
+
try:
|
|
502
|
+
energy_result = self._energy_meter.stop()
|
|
503
|
+
except Exception as e:
|
|
504
|
+
colored_print(f" Warning: Energy measurement stop failed: {e}", "yellow")
|
|
505
|
+
|
|
281
506
|
# Compute process metrics if enabled
|
|
282
507
|
process_metrics = {}
|
|
283
508
|
if self.compute_process_metrics and local_path.exists():
|
|
284
509
|
colored_print(" Computing process metrics...", "cyan")
|
|
285
510
|
process_metrics = self._compute_process_metrics(str(local_path))
|
|
286
|
-
|
|
511
|
+
|
|
287
512
|
# Calculate summary
|
|
288
513
|
green_commits = sum(1 for c in commits_analyzed if c.green_aware)
|
|
289
514
|
green_rate = green_commits / len(commits_analyzed) if commits_analyzed else 0
|
|
290
|
-
|
|
515
|
+
|
|
516
|
+
# Build energy metrics dict
|
|
517
|
+
energy_dict = None
|
|
518
|
+
if energy_result:
|
|
519
|
+
energy_dict = energy_result.to_dict()
|
|
520
|
+
|
|
291
521
|
result = RepositoryAnalysis(
|
|
292
522
|
url=url,
|
|
293
523
|
name=full_name,
|
|
@@ -296,22 +526,23 @@ class LocalRepoAnalyzer:
|
|
|
296
526
|
green_commit_rate=green_rate,
|
|
297
527
|
commits=commits_analyzed,
|
|
298
528
|
process_metrics=process_metrics,
|
|
529
|
+
energy_metrics=energy_dict,
|
|
299
530
|
)
|
|
300
|
-
|
|
531
|
+
|
|
301
532
|
return result
|
|
302
|
-
|
|
533
|
+
|
|
303
534
|
finally:
|
|
304
535
|
# Cleanup if requested
|
|
305
536
|
if self.cleanup_after and local_path.exists():
|
|
306
537
|
colored_print(f" Cleaning up: {local_path}", "cyan")
|
|
307
538
|
shutil.rmtree(local_path, ignore_errors=True)
|
|
308
|
-
|
|
539
|
+
|
|
309
540
|
def _compute_process_metrics(self, repo_path: str) -> Dict[str, Any]:
|
|
310
541
|
# Compute PyDriller process metrics for the repository.
|
|
311
542
|
metrics = {}
|
|
312
543
|
since_date = datetime.now() - timedelta(days=self.days_back)
|
|
313
544
|
to_date = datetime.now()
|
|
314
|
-
|
|
545
|
+
|
|
315
546
|
try:
|
|
316
547
|
# ChangeSet metrics
|
|
317
548
|
cs = ChangeSet(repo_path, since=since_date, to=to_date)
|
|
@@ -319,62 +550,76 @@ class LocalRepoAnalyzer:
|
|
|
319
550
|
metrics["change_set_avg"] = cs.avg()
|
|
320
551
|
except Exception as e:
|
|
321
552
|
colored_print(f" Warning: ChangeSet metrics failed: {e}", "yellow")
|
|
322
|
-
|
|
553
|
+
|
|
323
554
|
try:
|
|
324
555
|
# CodeChurn metrics
|
|
325
556
|
churn = CodeChurn(repo_path, since=since_date, to=to_date)
|
|
326
557
|
metrics["code_churn"] = churn.count()
|
|
327
558
|
except Exception as e:
|
|
328
559
|
colored_print(f" Warning: CodeChurn metrics failed: {e}", "yellow")
|
|
329
|
-
|
|
560
|
+
|
|
330
561
|
try:
|
|
331
562
|
# CommitsCount metrics
|
|
332
563
|
cc = CommitsCount(repo_path, since=since_date, to=to_date)
|
|
333
564
|
metrics["commits_per_file"] = cc.count()
|
|
334
565
|
except Exception as e:
|
|
335
566
|
colored_print(f" Warning: CommitsCount metrics failed: {e}", "yellow")
|
|
336
|
-
|
|
567
|
+
|
|
337
568
|
try:
|
|
338
569
|
# ContributorsCount metrics
|
|
339
570
|
contrib = ContributorsCount(repo_path, since=since_date, to=to_date)
|
|
340
571
|
metrics["contributors_per_file"] = contrib.count()
|
|
341
572
|
except Exception as e:
|
|
342
573
|
colored_print(f" Warning: ContributorsCount metrics failed: {e}", "yellow")
|
|
343
|
-
|
|
574
|
+
|
|
344
575
|
try:
|
|
345
576
|
# ContributorsExperience metrics
|
|
346
577
|
exp = ContributorsExperience(repo_path, since=since_date, to=to_date)
|
|
347
578
|
metrics["contributors_experience"] = exp.count()
|
|
348
579
|
except Exception as e:
|
|
349
580
|
colored_print(f" Warning: ContributorsExperience metrics failed: {e}", "yellow")
|
|
350
|
-
|
|
581
|
+
|
|
351
582
|
try:
|
|
352
583
|
# HistoryComplexity metrics
|
|
353
584
|
hc = HistoryComplexity(repo_path, since=since_date, to=to_date)
|
|
354
585
|
metrics["history_complexity"] = hc.count()
|
|
355
586
|
except Exception as e:
|
|
356
587
|
colored_print(f" Warning: HistoryComplexity metrics failed: {e}", "yellow")
|
|
357
|
-
|
|
588
|
+
|
|
358
589
|
try:
|
|
359
590
|
# HunksCount metrics
|
|
360
591
|
hunks = HunksCount(repo_path, since=since_date, to=to_date)
|
|
361
592
|
metrics["hunks_count"] = hunks.count()
|
|
362
593
|
except Exception as e:
|
|
363
594
|
colored_print(f" Warning: HunksCount metrics failed: {e}", "yellow")
|
|
364
|
-
|
|
595
|
+
|
|
365
596
|
try:
|
|
366
597
|
# LinesCount metrics
|
|
367
598
|
lines = LinesCount(repo_path, since=since_date, to=to_date)
|
|
368
599
|
metrics["lines_count"] = lines.count()
|
|
369
600
|
except Exception as e:
|
|
370
601
|
colored_print(f" Warning: LinesCount metrics failed: {e}", "yellow")
|
|
371
|
-
|
|
602
|
+
|
|
372
603
|
return metrics
|
|
373
|
-
|
|
374
|
-
def analyze_repositories(
|
|
604
|
+
|
|
605
|
+
def analyze_repositories(
|
|
606
|
+
self,
|
|
607
|
+
urls: List[str],
|
|
608
|
+
parallel_workers: int = 1,
|
|
609
|
+
output_format: str = "dict",
|
|
610
|
+
) -> List[RepositoryAnalysis]:
|
|
375
611
|
# Analyze multiple repositories from URLs.
|
|
612
|
+
# Args:
|
|
613
|
+
# urls: List of repository URLs to analyze
|
|
614
|
+
# parallel_workers: Number of concurrent workers (1 = sequential)
|
|
615
|
+
# output_format: Output format (dict, json, csv)
|
|
616
|
+
if parallel_workers <= 1:
|
|
617
|
+
return self._analyze_sequential(urls)
|
|
618
|
+
return self._analyze_parallel(urls, parallel_workers)
|
|
619
|
+
|
|
620
|
+
def _analyze_sequential(self, urls: List[str]) -> List[RepositoryAnalysis]:
|
|
621
|
+
# Analyze repositories sequentially.
|
|
376
622
|
results = []
|
|
377
|
-
|
|
378
623
|
for i, url in enumerate(urls, 1):
|
|
379
624
|
colored_print(f"\n[{i}/{len(urls)}] Processing repository...", "cyan")
|
|
380
625
|
try:
|
|
@@ -383,5 +628,22 @@ class LocalRepoAnalyzer:
|
|
|
383
628
|
except Exception as e:
|
|
384
629
|
colored_print(f" Error analyzing {url}: {e}", "red")
|
|
385
630
|
continue
|
|
386
|
-
|
|
631
|
+
return results
|
|
632
|
+
|
|
633
|
+
def _analyze_parallel(self, urls: List[str], max_workers: int) -> List[RepositoryAnalysis]:
|
|
634
|
+
# Analyze repositories in parallel using thread pool.
|
|
635
|
+
results = []
|
|
636
|
+
colored_print(f"\n Analyzing {len(urls)} repositories with {max_workers} workers", "cyan")
|
|
637
|
+
|
|
638
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
639
|
+
future_to_url = {executor.submit(self.analyze_repository, url): url for url in urls}
|
|
640
|
+
for future in as_completed(future_to_url):
|
|
641
|
+
url = future_to_url[future]
|
|
642
|
+
try:
|
|
643
|
+
result = future.result()
|
|
644
|
+
results.append(result)
|
|
645
|
+
colored_print(f" Completed: {result.name}", "green")
|
|
646
|
+
except Exception as e:
|
|
647
|
+
colored_print(f" Error analyzing {url}: {e}", "red")
|
|
648
|
+
|
|
387
649
|
return results
|
greenmining/services/reports.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# Report generation for green mining analysis.
|
|
2
|
+
"""Report generation module for GreenMining analysis results."""
|
|
2
3
|
|
|
3
4
|
from __future__ import annotations
|
|
4
5
|
|
|
@@ -228,12 +229,10 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
228
229
|
# Pattern descriptions
|
|
229
230
|
pattern_details = []
|
|
230
231
|
for i, pattern in enumerate(patterns[:10], 1):
|
|
231
|
-
pattern_details.append(
|
|
232
|
-
f"""**{i}. {pattern['pattern_name']}**
|
|
232
|
+
pattern_details.append(f"""**{i}. {pattern['pattern_name']}**
|
|
233
233
|
- Frequency: {format_number(pattern['count'])} commits ({format_percentage(pattern['percentage'])})
|
|
234
234
|
- Confidence Distribution: HIGH={conf['HIGH']}, MEDIUM={conf['MEDIUM']}, LOW={conf['LOW']}
|
|
235
|
-
- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}"""
|
|
236
|
-
)
|
|
235
|
+
- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
|
|
237
236
|
|
|
238
237
|
return f"""#### 2.2 Known Green Patterns & Tactics Applied
|
|
239
238
|
|
|
@@ -258,12 +257,10 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
258
257
|
|
|
259
258
|
pattern_list = []
|
|
260
259
|
for pattern in emergent:
|
|
261
|
-
pattern_list.append(
|
|
262
|
-
f"""**Pattern:** {pattern['pattern_name']}
|
|
260
|
+
pattern_list.append(f"""**Pattern:** {pattern['pattern_name']}
|
|
263
261
|
- Occurrences: {pattern['count']}
|
|
264
262
|
- Description: {pattern['description']}
|
|
265
|
-
- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}"""
|
|
266
|
-
)
|
|
263
|
+
- Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
|
|
267
264
|
|
|
268
265
|
return f"""#### 2.3 Emerging Practices Discovered
|
|
269
266
|
|