greenmining 1.2.4__py3-none-any.whl → 1.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +7 -1
- greenmining/analyzers/__init__.py +1 -1
- greenmining/analyzers/code_diff_analyzer.py +6 -6
- greenmining/analyzers/metrics_power_correlator.py +17 -17
- greenmining/analyzers/statistical_analyzer.py +5 -5
- greenmining/analyzers/temporal_analyzer.py +16 -17
- greenmining/controllers/repository_controller.py +1 -3
- greenmining/energy/__init__.py +3 -3
- greenmining/energy/base.py +15 -16
- greenmining/energy/carbon_reporter.py +10 -10
- greenmining/energy/codecarbon_meter.py +4 -6
- greenmining/energy/cpu_meter.py +6 -7
- greenmining/energy/rapl.py +6 -8
- greenmining/models/aggregated_stats.py +2 -3
- greenmining/models/commit.py +2 -2
- greenmining/models/repository.py +5 -6
- greenmining/services/__init__.py +2 -2
- greenmining/services/github_graphql_fetcher.py +8 -8
- greenmining/services/local_repo_analyzer.py +79 -43
- greenmining/services/reports.py +22 -22
- {greenmining-1.2.4.dist-info → greenmining-1.2.6.dist-info}/METADATA +1 -1
- greenmining-1.2.6.dist-info/RECORD +34 -0
- greenmining-1.2.4.dist-info/RECORD +0 -34
- {greenmining-1.2.4.dist-info → greenmining-1.2.6.dist-info}/WHEEL +0 -0
- {greenmining-1.2.4.dist-info → greenmining-1.2.6.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.2.4.dist-info → greenmining-1.2.6.dist-info}/top_level.txt +0 -0
greenmining/models/commit.py
CHANGED
|
@@ -44,12 +44,12 @@ class Commit:
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
@classmethod
|
|
47
|
-
def from_dict(cls, data: dict) ->
|
|
47
|
+
def from_dict(cls, data: dict) -> Commit:
|
|
48
48
|
# Create from dictionary.
|
|
49
49
|
return cls(**{k: v for k, v in data.items() if k in cls.__annotations__})
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
|
-
def from_pydriller_commit(cls, commit, repo_name: str) ->
|
|
52
|
+
def from_pydriller_commit(cls, commit, repo_name: str) -> Commit:
|
|
53
53
|
# Create from PyDriller commit object.
|
|
54
54
|
return cls(
|
|
55
55
|
commit_id=commit.hash,
|
greenmining/models/repository.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
|
-
from typing import Optional
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
@dataclass
|
|
@@ -16,21 +15,21 @@ class Repository:
|
|
|
16
15
|
full_name: str
|
|
17
16
|
url: str
|
|
18
17
|
clone_url: str
|
|
19
|
-
language:
|
|
18
|
+
language: str | None
|
|
20
19
|
stars: int
|
|
21
20
|
forks: int
|
|
22
21
|
watchers: int
|
|
23
22
|
open_issues: int
|
|
24
23
|
last_updated: str
|
|
25
24
|
created_at: str
|
|
26
|
-
description:
|
|
25
|
+
description: str | None
|
|
27
26
|
main_branch: str
|
|
28
27
|
topics: list[str] = field(default_factory=list)
|
|
29
28
|
size: int = 0
|
|
30
29
|
has_issues: bool = True
|
|
31
30
|
has_wiki: bool = True
|
|
32
31
|
archived: bool = False
|
|
33
|
-
license:
|
|
32
|
+
license: str | None = None
|
|
34
33
|
|
|
35
34
|
def to_dict(self) -> dict:
|
|
36
35
|
# Convert to dictionary.
|
|
@@ -59,12 +58,12 @@ class Repository:
|
|
|
59
58
|
}
|
|
60
59
|
|
|
61
60
|
@classmethod
|
|
62
|
-
def from_dict(cls, data: dict) ->
|
|
61
|
+
def from_dict(cls, data: dict) -> Repository:
|
|
63
62
|
# Create from dictionary.
|
|
64
63
|
return cls(**{k: v for k, v in data.items() if k in cls.__annotations__})
|
|
65
64
|
|
|
66
65
|
@classmethod
|
|
67
|
-
def from_github_repo(cls, repo, repo_id: int) ->
|
|
66
|
+
def from_github_repo(cls, repo, repo_id: int) -> Repository:
|
|
68
67
|
# Create from PyGithub repository object.
|
|
69
68
|
return cls(
|
|
70
69
|
repo_id=repo_id,
|
greenmining/services/__init__.py
CHANGED
|
@@ -5,10 +5,10 @@ from .data_aggregator import DataAggregator
|
|
|
5
5
|
from .data_analyzer import DataAnalyzer
|
|
6
6
|
from .github_graphql_fetcher import GitHubGraphQLFetcher
|
|
7
7
|
from .local_repo_analyzer import (
|
|
8
|
-
LocalRepoAnalyzer,
|
|
9
8
|
CommitAnalysis,
|
|
10
|
-
|
|
9
|
+
LocalRepoAnalyzer,
|
|
11
10
|
MethodMetrics,
|
|
11
|
+
RepositoryAnalysis,
|
|
12
12
|
SourceCodeChange,
|
|
13
13
|
)
|
|
14
14
|
from .reports import ReportGenerator
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import time
|
|
5
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
import requests
|
|
8
8
|
|
|
@@ -30,12 +30,12 @@ class GitHubGraphQLFetcher:
|
|
|
30
30
|
keywords: str = "microservices",
|
|
31
31
|
max_repos: int = 100,
|
|
32
32
|
min_stars: int = 100,
|
|
33
|
-
languages: Optional[
|
|
33
|
+
languages: Optional[list[str]] = None,
|
|
34
34
|
created_after: Optional[str] = None,
|
|
35
35
|
created_before: Optional[str] = None,
|
|
36
36
|
pushed_after: Optional[str] = None,
|
|
37
37
|
pushed_before: Optional[str] = None,
|
|
38
|
-
) ->
|
|
38
|
+
) -> list[Repository]:
|
|
39
39
|
# Search GitHub repositories using GraphQL.
|
|
40
40
|
#
|
|
41
41
|
# Args:
|
|
@@ -172,7 +172,7 @@ class GitHubGraphQLFetcher:
|
|
|
172
172
|
self,
|
|
173
173
|
keywords: str,
|
|
174
174
|
min_stars: int,
|
|
175
|
-
languages: Optional[
|
|
175
|
+
languages: Optional[list[str]],
|
|
176
176
|
created_after: Optional[str],
|
|
177
177
|
created_before: Optional[str],
|
|
178
178
|
pushed_after: Optional[str],
|
|
@@ -201,7 +201,7 @@ class GitHubGraphQLFetcher:
|
|
|
201
201
|
|
|
202
202
|
return " ".join(query_parts)
|
|
203
203
|
|
|
204
|
-
def _execute_query(self, query: str, variables:
|
|
204
|
+
def _execute_query(self, query: str, variables: dict[str, Any]) -> dict[str, Any]:
|
|
205
205
|
# Execute GraphQL query.
|
|
206
206
|
payload = {"query": query, "variables": variables}
|
|
207
207
|
|
|
@@ -212,7 +212,7 @@ class GitHubGraphQLFetcher:
|
|
|
212
212
|
response.raise_for_status()
|
|
213
213
|
return response.json()
|
|
214
214
|
|
|
215
|
-
def _parse_repository(self, node:
|
|
215
|
+
def _parse_repository(self, node: dict[str, Any], repo_id: int = 0) -> Repository:
|
|
216
216
|
# Parse GraphQL repository node to Repository object.
|
|
217
217
|
full_name = node.get("nameWithOwner", "")
|
|
218
218
|
owner = full_name.split("/")[0] if "/" in full_name else ""
|
|
@@ -252,7 +252,7 @@ class GitHubGraphQLFetcher:
|
|
|
252
252
|
|
|
253
253
|
def get_repository_commits(
|
|
254
254
|
self, owner: str, name: str, max_commits: int = 100
|
|
255
|
-
) ->
|
|
255
|
+
) -> list[dict[str, Any]]:
|
|
256
256
|
# Fetch commits for a specific repository using GraphQL.
|
|
257
257
|
#
|
|
258
258
|
# Args:
|
|
@@ -341,7 +341,7 @@ class GitHubGraphQLFetcher:
|
|
|
341
341
|
|
|
342
342
|
return commits
|
|
343
343
|
|
|
344
|
-
def save_results(self, repositories:
|
|
344
|
+
def save_results(self, repositories: list[Repository], output_file: str):
|
|
345
345
|
# Save repositories to JSON file.
|
|
346
346
|
data = {
|
|
347
347
|
"total_repositories": len(repositories),
|
|
@@ -5,12 +5,11 @@ from __future__ import annotations
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
7
|
import shutil
|
|
8
|
-
import tempfile
|
|
9
8
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
9
|
from dataclasses import dataclass, field
|
|
11
10
|
from datetime import datetime, timedelta
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
from typing import Any
|
|
12
|
+
from typing import Any
|
|
14
13
|
|
|
15
14
|
from pydriller import Repository
|
|
16
15
|
from pydriller.metrics.process.change_set import ChangeSet
|
|
@@ -22,7 +21,7 @@ from pydriller.metrics.process.history_complexity import HistoryComplexity
|
|
|
22
21
|
from pydriller.metrics.process.hunks_count import HunksCount
|
|
23
22
|
from pydriller.metrics.process.lines_count import LinesCount
|
|
24
23
|
|
|
25
|
-
from greenmining.gsf_patterns import get_pattern_by_keywords, is_green_aware
|
|
24
|
+
from greenmining.gsf_patterns import GSF_PATTERNS, get_pattern_by_keywords, is_green_aware
|
|
26
25
|
from greenmining.utils import colored_print
|
|
27
26
|
|
|
28
27
|
|
|
@@ -40,7 +39,7 @@ class MethodMetrics:
|
|
|
40
39
|
start_line: int = 0
|
|
41
40
|
end_line: int = 0
|
|
42
41
|
|
|
43
|
-
def to_dict(self) ->
|
|
42
|
+
def to_dict(self) -> dict[str, Any]:
|
|
44
43
|
return {
|
|
45
44
|
"name": self.name,
|
|
46
45
|
"long_name": self.long_name,
|
|
@@ -59,14 +58,14 @@ class SourceCodeChange:
|
|
|
59
58
|
# Source code before/after a commit for refactoring detection.
|
|
60
59
|
|
|
61
60
|
filename: str
|
|
62
|
-
source_code_before:
|
|
63
|
-
source_code_after:
|
|
64
|
-
diff:
|
|
61
|
+
source_code_before: str | None = None
|
|
62
|
+
source_code_after: str | None = None
|
|
63
|
+
diff: str | None = None
|
|
65
64
|
added_lines: int = 0
|
|
66
65
|
deleted_lines: int = 0
|
|
67
66
|
change_type: str = "" # ADD, DELETE, MODIFY, RENAME
|
|
68
67
|
|
|
69
|
-
def to_dict(self) ->
|
|
68
|
+
def to_dict(self) -> dict[str, Any]:
|
|
70
69
|
return {
|
|
71
70
|
"filename": self.filename,
|
|
72
71
|
"source_code_before": self.source_code_before,
|
|
@@ -88,18 +87,18 @@ class CommitAnalysis:
|
|
|
88
87
|
author_email: str
|
|
89
88
|
date: datetime
|
|
90
89
|
green_aware: bool
|
|
91
|
-
gsf_patterns_matched:
|
|
90
|
+
gsf_patterns_matched: list[str]
|
|
92
91
|
pattern_count: int
|
|
93
|
-
pattern_details:
|
|
92
|
+
pattern_details: list[dict[str, Any]]
|
|
94
93
|
confidence: str
|
|
95
|
-
files_modified:
|
|
94
|
+
files_modified: list[str]
|
|
96
95
|
insertions: int
|
|
97
96
|
deletions: int
|
|
98
97
|
|
|
99
98
|
# PyDriller DMM metrics
|
|
100
|
-
dmm_unit_size:
|
|
101
|
-
dmm_unit_complexity:
|
|
102
|
-
dmm_unit_interfacing:
|
|
99
|
+
dmm_unit_size: float | None = None
|
|
100
|
+
dmm_unit_complexity: float | None = None
|
|
101
|
+
dmm_unit_interfacing: float | None = None
|
|
103
102
|
|
|
104
103
|
# Structural metrics (Lizard)
|
|
105
104
|
total_nloc: int = 0
|
|
@@ -108,16 +107,16 @@ class CommitAnalysis:
|
|
|
108
107
|
methods_count: int = 0
|
|
109
108
|
|
|
110
109
|
# Method-level analysis (Phase 3.2)
|
|
111
|
-
methods:
|
|
110
|
+
methods: list[MethodMetrics] = field(default_factory=list)
|
|
112
111
|
|
|
113
112
|
# Source code access (Phase 3.3)
|
|
114
|
-
source_changes:
|
|
113
|
+
source_changes: list[SourceCodeChange] = field(default_factory=list)
|
|
115
114
|
|
|
116
115
|
# Energy metrics (Phase 2.2 - populated when energy_tracking=True)
|
|
117
|
-
energy_joules:
|
|
118
|
-
energy_watts_avg:
|
|
116
|
+
energy_joules: float | None = None
|
|
117
|
+
energy_watts_avg: float | None = None
|
|
119
118
|
|
|
120
|
-
def to_dict(self) ->
|
|
119
|
+
def to_dict(self) -> dict[str, Any]:
|
|
121
120
|
# Convert to dictionary.
|
|
122
121
|
result = {
|
|
123
122
|
"commit_hash": self.hash,
|
|
@@ -164,11 +163,11 @@ class RepositoryAnalysis:
|
|
|
164
163
|
total_commits: int
|
|
165
164
|
green_commits: int
|
|
166
165
|
green_commit_rate: float
|
|
167
|
-
commits:
|
|
168
|
-
process_metrics:
|
|
169
|
-
energy_metrics:
|
|
166
|
+
commits: list[CommitAnalysis] = field(default_factory=list)
|
|
167
|
+
process_metrics: dict[str, Any] = field(default_factory=dict)
|
|
168
|
+
energy_metrics: dict[str, Any] | None = None
|
|
170
169
|
|
|
171
|
-
def to_dict(self) ->
|
|
170
|
+
def to_dict(self) -> dict[str, Any]:
|
|
172
171
|
# Convert to dictionary.
|
|
173
172
|
result = {
|
|
174
173
|
"url": self.url,
|
|
@@ -190,22 +189,24 @@ class LocalRepoAnalyzer:
|
|
|
190
189
|
|
|
191
190
|
def __init__(
|
|
192
191
|
self,
|
|
193
|
-
clone_path:
|
|
192
|
+
clone_path: Path | None = None,
|
|
194
193
|
max_commits: int = 500,
|
|
195
194
|
days_back: int = 730,
|
|
196
195
|
skip_merges: bool = True,
|
|
197
196
|
compute_process_metrics: bool = True,
|
|
198
197
|
cleanup_after: bool = True,
|
|
199
|
-
ssh_key_path:
|
|
200
|
-
github_token:
|
|
198
|
+
ssh_key_path: str | None = None,
|
|
199
|
+
github_token: str | None = None,
|
|
201
200
|
energy_tracking: bool = False,
|
|
202
201
|
energy_backend: str = "rapl",
|
|
203
202
|
method_level_analysis: bool = False,
|
|
204
203
|
include_source_code: bool = False,
|
|
205
204
|
process_metrics: str = "standard",
|
|
206
|
-
since_date:
|
|
207
|
-
to_date:
|
|
205
|
+
since_date: datetime | None = None,
|
|
206
|
+
to_date: datetime | None = None,
|
|
208
207
|
commit_order: str = "newest_first",
|
|
208
|
+
shallow_clone: bool = True,
|
|
209
|
+
clone_depth: int | None = None,
|
|
209
210
|
):
|
|
210
211
|
# Initialize the local repository analyzer.
|
|
211
212
|
# Args:
|
|
@@ -223,6 +224,8 @@ class LocalRepoAnalyzer:
|
|
|
223
224
|
# include_source_code: Include source code before/after in results
|
|
224
225
|
# process_metrics: "standard" or "full" PyDriller process metrics
|
|
225
226
|
# commit_order: "newest_first" (default) or "oldest_first"
|
|
227
|
+
# shallow_clone: Use shallow cloning to reduce download size (default True)
|
|
228
|
+
# clone_depth: Git clone depth (auto-calculated from max_commits if None)
|
|
226
229
|
self.clone_path = clone_path or Path.cwd() / "greenmining_repos"
|
|
227
230
|
self.clone_path.mkdir(parents=True, exist_ok=True)
|
|
228
231
|
self.max_commits = max_commits
|
|
@@ -233,6 +236,9 @@ class LocalRepoAnalyzer:
|
|
|
233
236
|
self.compute_process_metrics = compute_process_metrics
|
|
234
237
|
self.cleanup_after = cleanup_after
|
|
235
238
|
self.commit_order = commit_order
|
|
239
|
+
self.shallow_clone = shallow_clone
|
|
240
|
+
# Auto-calculate clone depth: max_commits * 3 to account for merges/skipped commits
|
|
241
|
+
self.clone_depth = clone_depth if clone_depth else max(50, max_commits * 3)
|
|
236
242
|
self.gsf_patterns = GSF_PATTERNS
|
|
237
243
|
|
|
238
244
|
# Phase 1.3: Private repository support
|
|
@@ -272,7 +278,7 @@ class LocalRepoAnalyzer:
|
|
|
272
278
|
return url.replace("https://", f"https://x-access-token:{self.github_token}@")
|
|
273
279
|
return url
|
|
274
280
|
|
|
275
|
-
def _setup_ssh_env(self) ->
|
|
281
|
+
def _setup_ssh_env(self) -> dict[str, str]:
|
|
276
282
|
# Set up SSH environment for private repository cloning.
|
|
277
283
|
env = os.environ.copy()
|
|
278
284
|
if self.ssh_key_path:
|
|
@@ -297,10 +303,10 @@ class LocalRepoAnalyzer:
|
|
|
297
303
|
|
|
298
304
|
raise ValueError(f"Could not parse GitHub URL: {url}")
|
|
299
305
|
|
|
300
|
-
def _get_pattern_details(self, matched_patterns:
|
|
306
|
+
def _get_pattern_details(self, matched_patterns: list[str]) -> list[dict[str, Any]]:
|
|
301
307
|
# Get detailed pattern information.
|
|
302
308
|
details = []
|
|
303
|
-
for
|
|
309
|
+
for _pattern_id, pattern in self.gsf_patterns.items():
|
|
304
310
|
if pattern["name"] in matched_patterns:
|
|
305
311
|
details.append(
|
|
306
312
|
{
|
|
@@ -312,7 +318,7 @@ class LocalRepoAnalyzer:
|
|
|
312
318
|
)
|
|
313
319
|
return details
|
|
314
320
|
|
|
315
|
-
def _extract_method_metrics(self, commit) ->
|
|
321
|
+
def _extract_method_metrics(self, commit) -> list[MethodMetrics]:
|
|
316
322
|
# Extract per-method metrics from modified files using Lizard (via PyDriller).
|
|
317
323
|
methods = []
|
|
318
324
|
try:
|
|
@@ -336,7 +342,7 @@ class LocalRepoAnalyzer:
|
|
|
336
342
|
pass
|
|
337
343
|
return methods
|
|
338
344
|
|
|
339
|
-
def _extract_source_changes(self, commit) ->
|
|
345
|
+
def _extract_source_changes(self, commit) -> list[SourceCodeChange]:
|
|
340
346
|
# Extract source code before/after for each modified file.
|
|
341
347
|
changes = []
|
|
342
348
|
try:
|
|
@@ -473,12 +479,39 @@ class LocalRepoAnalyzer:
|
|
|
473
479
|
clone_parent.mkdir(parents=True, exist_ok=True)
|
|
474
480
|
local_path = clone_parent / repo_name
|
|
475
481
|
|
|
476
|
-
|
|
477
|
-
|
|
482
|
+
# Perform shallow clone manually before PyDriller (much faster!)
|
|
483
|
+
if not local_path.exists():
|
|
484
|
+
import subprocess
|
|
478
485
|
|
|
479
|
-
|
|
486
|
+
clone_cmd = ["git", "clone"]
|
|
487
|
+
if self.shallow_clone:
|
|
488
|
+
clone_cmd.extend(["--depth", str(self.clone_depth)])
|
|
489
|
+
clone_cmd.extend([auth_url, str(local_path)])
|
|
480
490
|
|
|
481
|
-
|
|
491
|
+
colored_print(
|
|
492
|
+
f" Cloning to: {local_path} (depth={self.clone_depth if self.shallow_clone else 'full'})",
|
|
493
|
+
"cyan",
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
subprocess.run(
|
|
498
|
+
clone_cmd,
|
|
499
|
+
capture_output=True,
|
|
500
|
+
text=True,
|
|
501
|
+
check=True,
|
|
502
|
+
timeout=180,
|
|
503
|
+
)
|
|
504
|
+
except subprocess.TimeoutExpired:
|
|
505
|
+
colored_print(" Clone timeout after 180s", "yellow")
|
|
506
|
+
raise
|
|
507
|
+
except subprocess.CalledProcessError as e:
|
|
508
|
+
colored_print(f" Clone failed: {e.stderr}", "red")
|
|
509
|
+
raise
|
|
510
|
+
else:
|
|
511
|
+
colored_print(f" Using existing clone: {local_path}", "cyan")
|
|
512
|
+
|
|
513
|
+
# PyDriller will analyze the already-cloned repo
|
|
514
|
+
repo_config["path_to_repo"] = str(local_path)
|
|
482
515
|
|
|
483
516
|
# Phase 2.2: Start energy measurement if enabled (fresh meter per repo)
|
|
484
517
|
energy_result = None
|
|
@@ -486,6 +519,7 @@ class LocalRepoAnalyzer:
|
|
|
486
519
|
if self.energy_tracking:
|
|
487
520
|
try:
|
|
488
521
|
from greenmining.energy.base import get_energy_meter
|
|
522
|
+
|
|
489
523
|
energy_meter = get_energy_meter(self.energy_backend)
|
|
490
524
|
energy_meter.start()
|
|
491
525
|
except Exception:
|
|
@@ -557,7 +591,7 @@ class LocalRepoAnalyzer:
|
|
|
557
591
|
colored_print(f" Cleaning up: {clone_parent}", "cyan")
|
|
558
592
|
shutil.rmtree(clone_parent, ignore_errors=True)
|
|
559
593
|
|
|
560
|
-
def _compute_process_metrics(self, repo_path: str) ->
|
|
594
|
+
def _compute_process_metrics(self, repo_path: str) -> dict[str, Any]:
|
|
561
595
|
# Compute PyDriller process metrics for the repository.
|
|
562
596
|
metrics = {}
|
|
563
597
|
since_date = datetime.now() - timedelta(days=self.days_back)
|
|
@@ -624,10 +658,10 @@ class LocalRepoAnalyzer:
|
|
|
624
658
|
|
|
625
659
|
def analyze_repositories(
|
|
626
660
|
self,
|
|
627
|
-
urls:
|
|
661
|
+
urls: list[str],
|
|
628
662
|
parallel_workers: int = 1,
|
|
629
663
|
output_format: str = "dict",
|
|
630
|
-
) ->
|
|
664
|
+
) -> list[RepositoryAnalysis]:
|
|
631
665
|
# Analyze multiple repositories from URLs.
|
|
632
666
|
# Args:
|
|
633
667
|
# urls: List of repository URLs to analyze
|
|
@@ -637,7 +671,7 @@ class LocalRepoAnalyzer:
|
|
|
637
671
|
return self._analyze_sequential(urls)
|
|
638
672
|
return self._analyze_parallel(urls, parallel_workers)
|
|
639
673
|
|
|
640
|
-
def _analyze_sequential(self, urls:
|
|
674
|
+
def _analyze_sequential(self, urls: list[str]) -> list[RepositoryAnalysis]:
|
|
641
675
|
# Analyze repositories sequentially.
|
|
642
676
|
results = []
|
|
643
677
|
for i, url in enumerate(urls, 1):
|
|
@@ -653,7 +687,7 @@ class LocalRepoAnalyzer:
|
|
|
653
687
|
continue
|
|
654
688
|
return results
|
|
655
689
|
|
|
656
|
-
def _analyze_parallel(self, urls:
|
|
690
|
+
def _analyze_parallel(self, urls: list[str], max_workers: int) -> list[RepositoryAnalysis]:
|
|
657
691
|
# Analyze repositories in parallel using thread pool.
|
|
658
692
|
results = []
|
|
659
693
|
colored_print(f"\n Analyzing {len(urls)} repositories with {max_workers} workers", "cyan")
|
|
@@ -665,7 +699,9 @@ class LocalRepoAnalyzer:
|
|
|
665
699
|
try:
|
|
666
700
|
result = future.result()
|
|
667
701
|
if result.total_commits == 0:
|
|
668
|
-
colored_print(
|
|
702
|
+
colored_print(
|
|
703
|
+
f" Skipping {result.name}: no commits in date range", "yellow"
|
|
704
|
+
)
|
|
669
705
|
continue
|
|
670
706
|
results.append(result)
|
|
671
707
|
colored_print(f" Completed: {result.name}", "green")
|
greenmining/services/reports.py
CHANGED
|
@@ -76,12 +76,12 @@ class ReportGenerator:
|
|
|
76
76
|
|
|
77
77
|
return f"""### Executive Summary
|
|
78
78
|
|
|
79
|
-
This report presents findings from analyzing **{format_number(summary[
|
|
79
|
+
This report presents findings from analyzing **{format_number(summary["total_commits"])} commits** across **{format_number(summary["total_repos"])} microservice-based repositories** to identify green software engineering practices.
|
|
80
80
|
|
|
81
81
|
**Key Findings:**
|
|
82
82
|
|
|
83
|
-
- **{format_percentage(summary[
|
|
84
|
-
- **{format_number(summary[
|
|
83
|
+
- **{format_percentage(summary["green_aware_percentage"])}** of commits ({format_number(summary["green_aware_count"])}) explicitly mention energy efficiency, performance optimization, or sustainability concerns
|
|
84
|
+
- **{format_number(summary["repos_with_green_commits"])}** out of {format_number(summary["total_repos"])} repositories contain at least one green-aware commit
|
|
85
85
|
- {pattern_text if pattern_text else "Various green software patterns were detected across the analyzed commits."}
|
|
86
86
|
|
|
87
87
|
**Implications:**
|
|
@@ -106,15 +106,15 @@ Repositories were selected from GitHub based on the following criteria:
|
|
|
106
106
|
|
|
107
107
|
- **Keywords:** {search_keywords}
|
|
108
108
|
- **Programming Languages:** {languages}
|
|
109
|
-
- **Minimum Stars:** {metadata.get(
|
|
109
|
+
- **Minimum Stars:** {metadata.get("min_stars", 100)} (to ensure established projects)
|
|
110
110
|
- **Sort Order:** Stars (descending)
|
|
111
|
-
- **Total Repositories:** {metadata.get(
|
|
111
|
+
- **Total Repositories:** {metadata.get("total_repos", 0)}
|
|
112
112
|
|
|
113
113
|
#### 1.2 Data Extraction Approach
|
|
114
114
|
|
|
115
115
|
Commit data was extracted using PyDriller library:
|
|
116
116
|
|
|
117
|
-
- **Commits Analyzed:** {analysis_metadata.get(
|
|
117
|
+
- **Commits Analyzed:** {analysis_metadata.get("total_commits_analyzed", 0)}
|
|
118
118
|
- **Time Window:** Last 2 years (730 days)
|
|
119
119
|
- **Merge Commits:** Excluded
|
|
120
120
|
- **Minimum Commit Message Length:** 10 characters
|
|
@@ -192,8 +192,8 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
192
192
|
|
|
193
193
|
return f"""#### 2.1 Green Awareness in Commits
|
|
194
194
|
|
|
195
|
-
**Total commits analyzed:** {format_number(summary[
|
|
196
|
-
**Commits with explicit green mention:** {format_number(summary[
|
|
195
|
+
**Total commits analyzed:** {format_number(summary["total_commits"])}
|
|
196
|
+
**Commits with explicit green mention:** {format_number(summary["green_aware_count"])} ({format_percentage(summary["green_aware_percentage"])})
|
|
197
197
|
|
|
198
198
|
**Table: Top 10 Repositories with Highest Green Awareness**
|
|
199
199
|
|
|
@@ -224,10 +224,10 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
224
224
|
# Pattern descriptions
|
|
225
225
|
pattern_details = []
|
|
226
226
|
for i, pattern in enumerate(patterns[:10], 1):
|
|
227
|
-
pattern_details.append(f"""**{i}. {pattern[
|
|
228
|
-
- Frequency: {format_number(pattern[
|
|
229
|
-
- Confidence Distribution: HIGH={conf[
|
|
230
|
-
- Example Commits: {
|
|
227
|
+
pattern_details.append(f"""**{i}. {pattern["pattern_name"]}**
|
|
228
|
+
- Frequency: {format_number(pattern["count"])} commits ({format_percentage(pattern["percentage"])})
|
|
229
|
+
- Confidence Distribution: HIGH={conf["HIGH"]}, MEDIUM={conf["MEDIUM"]}, LOW={conf["LOW"]}
|
|
230
|
+
- Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
|
|
231
231
|
|
|
232
232
|
return f"""#### 2.2 Known Green Patterns & Tactics Applied
|
|
233
233
|
|
|
@@ -252,10 +252,10 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
252
252
|
|
|
253
253
|
pattern_list = []
|
|
254
254
|
for pattern in emergent:
|
|
255
|
-
pattern_list.append(f"""**Pattern:** {pattern[
|
|
256
|
-
- Occurrences: {pattern[
|
|
257
|
-
- Description: {pattern[
|
|
258
|
-
- Example Commits: {
|
|
255
|
+
pattern_list.append(f"""**Pattern:** {pattern["pattern_name"]}
|
|
256
|
+
- Occurrences: {pattern["count"]}
|
|
257
|
+
- Description: {pattern["description"]}
|
|
258
|
+
- Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
|
|
259
259
|
|
|
260
260
|
return f"""#### 2.3 Emerging Practices Discovered
|
|
261
261
|
|
|
@@ -299,7 +299,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
299
299
|
if "error" in stats:
|
|
300
300
|
return f"""#### 2.5 Statistical Analysis
|
|
301
301
|
|
|
302
|
-
**Note:** Statistical analysis encountered an error: {stats[
|
|
302
|
+
**Note:** Statistical analysis encountered an error: {stats["error"]}
|
|
303
303
|
"""
|
|
304
304
|
|
|
305
305
|
sections = []
|
|
@@ -366,7 +366,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
366
366
|
if green_vs_nongreen:
|
|
367
367
|
cohens_d = green_vs_nongreen.get("cohens_d", 0)
|
|
368
368
|
magnitude = green_vs_nongreen.get("magnitude", "negligible")
|
|
369
|
-
sections.append(
|
|
369
|
+
sections.append("**Green vs Non-Green Pattern Usage:**")
|
|
370
370
|
sections.append(f"- Cohen's d: {cohens_d:.3f}")
|
|
371
371
|
sections.append(f"- Effect magnitude: {magnitude.capitalize()}")
|
|
372
372
|
sections.append("")
|
|
@@ -495,16 +495,16 @@ Based on the detected patterns, microservice developers primarily focus on:
|
|
|
495
495
|
|
|
496
496
|
#### 5.1 Summary of Key Findings
|
|
497
497
|
|
|
498
|
-
This study analyzed {format_number(summary[
|
|
498
|
+
This study analyzed {format_number(summary["total_commits"])} commits from {format_number(summary["total_repos"])} microservice repositories and found:
|
|
499
499
|
|
|
500
|
-
1. **{format_percentage(summary[
|
|
501
|
-
2. **{format_number(summary[
|
|
500
|
+
1. **{format_percentage(summary["green_aware_percentage"])}** of commits explicitly address energy/sustainability concerns
|
|
501
|
+
2. **{format_number(summary["repos_with_green_commits"])}** repositories demonstrate some level of green awareness
|
|
502
502
|
3. Common green patterns include: {patterns_text}
|
|
503
503
|
|
|
504
504
|
#### 5.2 Answers to Research Questions
|
|
505
505
|
|
|
506
506
|
**RQ1: What percentage of microservice commits explicitly mention energy efficiency?**
|
|
507
|
-
Answer: {format_percentage(summary[
|
|
507
|
+
Answer: {format_percentage(summary["green_aware_percentage"])} of analyzed commits contain explicit mentions.
|
|
508
508
|
|
|
509
509
|
**RQ2: Which green software tactics are developers applying in practice?**
|
|
510
510
|
Answer: Developers primarily apply caching strategies, resource pooling, database optimization, and asynchronous processing patterns.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
greenmining/__init__.py,sha256=FvURFMzA2M-JtfP92RiAaCOVvJjN2qNUuEAQr0SPD4o,4789
|
|
2
|
+
greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
|
|
3
|
+
greenmining/gsf_patterns.py,sha256=UvNJPY3HlAx1SicwUqci40TlLg8lCL0tszSOH4haxQs,55921
|
|
4
|
+
greenmining/utils.py,sha256=-dnLUw9taCzvQ2dk6uc66GAohOFiXJFKs9TLSEPk5kM,2893
|
|
5
|
+
greenmining/analyzers/__init__.py,sha256=FExlzEE2c2TZ82wqTh1il5qcZFhUDKBtB_HB-aC4ynA,416
|
|
6
|
+
greenmining/analyzers/code_diff_analyzer.py,sha256=KVvIYMmTvrjrH0n1EjyXuSfqwWPmlU8mAZ0F4Q1nYaQ,10939
|
|
7
|
+
greenmining/analyzers/metrics_power_correlator.py,sha256=4p2E_JRFjDSRLn10V-slZKvgNFR3H-8OXF6waBN-7DU,5928
|
|
8
|
+
greenmining/analyzers/statistical_analyzer.py,sha256=hMUNC8IjvN30f365CBtUqhrExNKGMPMVb1uKVEdBvwU,5930
|
|
9
|
+
greenmining/analyzers/temporal_analyzer.py,sha256=OoT1lUTimocag8TGgpTMHVdfiyEqnM2yiYA7QeHWZ3g,14203
|
|
10
|
+
greenmining/controllers/__init__.py,sha256=UiAT6zBvC1z_9cJWfzq1cLA0I4r9b2vURHipj8oDczI,180
|
|
11
|
+
greenmining/controllers/repository_controller.py,sha256=8XzeFIpaYzPznlQRaftHxkpBdNmyzchxU40yolZcodw,6011
|
|
12
|
+
greenmining/energy/__init__.py,sha256=WR_BvnHrUmEyDWaOPVpYap_kpat13K-mgtmvMAtXPZQ,558
|
|
13
|
+
greenmining/energy/base.py,sha256=G_II_7tgITVJtXZTLFvB7oMmhK5nJDem1hHkOEcifF0,5850
|
|
14
|
+
greenmining/energy/carbon_reporter.py,sha256=k41M6vcDuEYVK4KnsG2DrT9jV5oO82nJzAfIWz1r6Z4,8261
|
|
15
|
+
greenmining/energy/codecarbon_meter.py,sha256=zhMsZXdk2WRLZf3mU6p7FF2uAn5YlNogqhuMOCm6Xbs,4193
|
|
16
|
+
greenmining/energy/cpu_meter.py,sha256=g7oJzcpbEW-qp-9uUGhHduCj2-RBgry9VpsPIiYOKYE,4975
|
|
17
|
+
greenmining/energy/rapl.py,sha256=CpFa_j_g6UKf4f82CH8DIBZRRdRSqg_4Og51D6kMYVU,5239
|
|
18
|
+
greenmining/models/__init__.py,sha256=2hkB0quhMePvvA1AkYfj5uiF_HyGtXVxn0BU-5m_oSg,302
|
|
19
|
+
greenmining/models/aggregated_stats.py,sha256=il5c0pHF2PAYywbVwbod-SNcdz0q80XJ0AFKo2Gmits,971
|
|
20
|
+
greenmining/models/analysis_result.py,sha256=YICTCEcrJxZ1R8Xaio3AZOjCGwMzC_62BMAL0J_XY1w,1509
|
|
21
|
+
greenmining/models/commit.py,sha256=tkjMXXoMEAPxVR7M9Bf95gUSLqQF4GLeCs6_bBlK1go,2411
|
|
22
|
+
greenmining/models/repository.py,sha256=qsFoBmtmDn71g8WjHel5Zu9Ny4ij98E7n-cgcFNkJWI,2809
|
|
23
|
+
greenmining/services/__init__.py,sha256=QBsyLE5vNcQpyVaT1DD_ThS4pJ7pb4Obl-zUpp2GAnM,690
|
|
24
|
+
greenmining/services/commit_extractor.py,sha256=qBM9QpGzPZRmGMFufJ6gP8eWIuufTowLX8mQxqZwyEU,6996
|
|
25
|
+
greenmining/services/data_aggregator.py,sha256=BU_HUb-8c0n0sa_7VZRB8jIVnaVhRLf-E6KA4ASh-08,19427
|
|
26
|
+
greenmining/services/data_analyzer.py,sha256=0XqW-slrnt7RotrHDweOqKtoN8XIA7y6p7s2Jau6cMg,7431
|
|
27
|
+
greenmining/services/github_graphql_fetcher.py,sha256=WhSbQGMdkb0D4uLcMKW6xZK77c5AkW-nZf718issap4,11527
|
|
28
|
+
greenmining/services/local_repo_analyzer.py,sha256=Ju3UA9LQc2LpIatvpDiKg9aLhQp5HsldsrdWJDU5Rwo,27406
|
|
29
|
+
greenmining/services/reports.py,sha256=QCJZhET3hRkH83htxLkbEP58dE3-7jIZh82Pp60hQcc,23218
|
|
30
|
+
greenmining-1.2.6.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
31
|
+
greenmining-1.2.6.dist-info/METADATA,sha256=M7yBMY3OIVcZnq3JxIvRe6ncQnnK78_U90ezvN1WyVA,10522
|
|
32
|
+
greenmining-1.2.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
33
|
+
greenmining-1.2.6.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
34
|
+
greenmining-1.2.6.dist-info/RECORD,,
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256=erfTupwdZ-PlNwj2YygUyEbLT7aNd2zTfTJJ-NMG1wg,4496
|
|
2
|
-
greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
|
|
3
|
-
greenmining/gsf_patterns.py,sha256=UvNJPY3HlAx1SicwUqci40TlLg8lCL0tszSOH4haxQs,55921
|
|
4
|
-
greenmining/utils.py,sha256=-dnLUw9taCzvQ2dk6uc66GAohOFiXJFKs9TLSEPk5kM,2893
|
|
5
|
-
greenmining/analyzers/__init__.py,sha256=wnBrn8EyAHG_qnesOPAYkZyc-XigXWy2pI3bMeIoLH4,416
|
|
6
|
-
greenmining/analyzers/code_diff_analyzer.py,sha256=1dk68R3O0RZG8gx1cm9B_UlZ1Uwyb_Q3oScRbCVx4tM,10950
|
|
7
|
-
greenmining/analyzers/metrics_power_correlator.py,sha256=MgKXAIYjNihzzyilCd88_AMjZP9sdC6NkCAVbrvvOus,5957
|
|
8
|
-
greenmining/analyzers/statistical_analyzer.py,sha256=PA0w0sytRmMO6N1a2iH7VdA6Icg4DcyBLFXOGq7PepY,5942
|
|
9
|
-
greenmining/analyzers/temporal_analyzer.py,sha256=JfTcAoI20oCFMehGrSRnDqhJTXI-RUbdCTMwDOTW9-g,14259
|
|
10
|
-
greenmining/controllers/__init__.py,sha256=UiAT6zBvC1z_9cJWfzq1cLA0I4r9b2vURHipj8oDczI,180
|
|
11
|
-
greenmining/controllers/repository_controller.py,sha256=sjfbDhyRY59MsKLw0dkxzpe1QZKtm9ScO4E8VFYZy9A,6041
|
|
12
|
-
greenmining/energy/__init__.py,sha256=GoCYh7hitWBoPMtan1HF1yezCHi7o4sa_YUJgGkeJc8,558
|
|
13
|
-
greenmining/energy/base.py,sha256=3hIPgc4B0Nz9V7DTh2Xd6trDRtmozUBBpa5UWRuWzcw,5918
|
|
14
|
-
greenmining/energy/carbon_reporter.py,sha256=bKIFlLhHfYzI4DBu_ff4GW1Psz4oSCAF4NmzQb-EShA,8298
|
|
15
|
-
greenmining/energy/codecarbon_meter.py,sha256=8obsfiJi0V3R_2BMHjTQCZSN52YPvFn5d9q_MKOZVb4,4214
|
|
16
|
-
greenmining/energy/cpu_meter.py,sha256=GmUZsOIzWnAWcuSW4RndDdgszDHzqnBjAIeLBgelZ0w,5001
|
|
17
|
-
greenmining/energy/rapl.py,sha256=b63M1mS7uF9Uo0vFi0z7Qwdo56U1TqxIYQXINhYp9Jo,5292
|
|
18
|
-
greenmining/models/__init__.py,sha256=2hkB0quhMePvvA1AkYfj5uiF_HyGtXVxn0BU-5m_oSg,302
|
|
19
|
-
greenmining/models/aggregated_stats.py,sha256=CZxjwXswvtmYPwpcbodLUsZpsbsNKBDIqvU9DpFO_t0,1004
|
|
20
|
-
greenmining/models/analysis_result.py,sha256=YICTCEcrJxZ1R8Xaio3AZOjCGwMzC_62BMAL0J_XY1w,1509
|
|
21
|
-
greenmining/models/commit.py,sha256=LCwDcRu4-BeCJQdk590oQNZZZM9t8W9FlaHlo9DCVmc,2415
|
|
22
|
-
greenmining/models/repository.py,sha256=MUeCOtVMOsU4Oa_BBoB163Ij5BKytTKwbzoGORJx4rU,2850
|
|
23
|
-
greenmining/services/__init__.py,sha256=ZEMOVut0KRdume_vz58beSNps3YgeoGBXmUjEqNgIhc,690
|
|
24
|
-
greenmining/services/commit_extractor.py,sha256=qBM9QpGzPZRmGMFufJ6gP8eWIuufTowLX8mQxqZwyEU,6996
|
|
25
|
-
greenmining/services/data_aggregator.py,sha256=BU_HUb-8c0n0sa_7VZRB8jIVnaVhRLf-E6KA4ASh-08,19427
|
|
26
|
-
greenmining/services/data_analyzer.py,sha256=0XqW-slrnt7RotrHDweOqKtoN8XIA7y6p7s2Jau6cMg,7431
|
|
27
|
-
greenmining/services/github_graphql_fetcher.py,sha256=ZklXdEAc60KeFL83zRYMwW_-2OwMKpfPY7Wrifl0D50,11539
|
|
28
|
-
greenmining/services/local_repo_analyzer.py,sha256=kmNs6KzW8_hgRdzArqBq2TZ-3Rflh-9Ody0lqYa4Vl4,25915
|
|
29
|
-
greenmining/services/reports.py,sha256=nhJuYiA5tPD_9AjtgSLEnrpW3x15sZXrwIxpxQEBbh0,23219
|
|
30
|
-
greenmining-1.2.4.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
31
|
-
greenmining-1.2.4.dist-info/METADATA,sha256=2-7qoQ9C6nbcQxXKYG2Dv0BvEMtbX6GecWlSXGPCdOo,10522
|
|
32
|
-
greenmining-1.2.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
33
|
-
greenmining-1.2.4.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
34
|
-
greenmining-1.2.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|