greenmining 1.1.6__tar.gz → 1.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {greenmining-1.1.6 → greenmining-1.1.8}/CHANGELOG.md +11 -1
- {greenmining-1.1.6/greenmining.egg-info → greenmining-1.1.8}/PKG-INFO +3 -17
- {greenmining-1.1.6 → greenmining-1.1.8}/README.md +2 -14
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/__init__.py +1 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/metrics_power_correlator.py +1 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/power_regression.py +0 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/qualitative_analyzer.py +1 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/statistical_analyzer.py +0 -32
- greenmining-1.1.8/greenmining/config.py +91 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/controllers/repository_controller.py +0 -2
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/codecarbon_meter.py +0 -21
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/cpu_meter.py +1 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/gsf_patterns.py +41 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/aggregated_stats.py +1 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/commit.py +0 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/repository.py +1 -1
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/commit_extractor.py +2 -41
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/data_aggregator.py +1 -6
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/data_analyzer.py +1 -57
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/local_repo_analyzer.py +1 -2
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/reports.py +1 -6
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/utils.py +0 -87
- {greenmining-1.1.6 → greenmining-1.1.8/greenmining.egg-info}/PKG-INFO +3 -17
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/SOURCES.txt +0 -8
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/requires.txt +0 -3
- {greenmining-1.1.6 → greenmining-1.1.8}/pyproject.toml +1 -4
- greenmining-1.1.6/greenmining/__version__.py +0 -3
- greenmining-1.1.6/greenmining/config.py +0 -200
- greenmining-1.1.6/greenmining/dashboard/__init__.py +0 -5
- greenmining-1.1.6/greenmining/dashboard/app.py +0 -200
- greenmining-1.1.6/greenmining/services/github_fetcher.py +0 -2
- {greenmining-1.1.6 → greenmining-1.1.8}/LICENSE +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/MANIFEST.in +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/__main__.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/__init__.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/code_diff_analyzer.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/temporal_analyzer.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/version_power_analyzer.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/controllers/__init__.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/__init__.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/base.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/carbon_reporter.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/rapl.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/__init__.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/analysis_result.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/presenters/__init__.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/presenters/console_presenter.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/__init__.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/github_graphql_fetcher.py +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/dependency_links.txt +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/top_level.txt +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/setup.cfg +0 -0
- {greenmining-1.1.6 → greenmining-1.1.8}/setup.py +0 -0
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.1.8] - 2026-01-31
|
|
4
|
+
|
|
5
|
+
### Removed
|
|
6
|
+
- Web dashboard module (`greenmining/dashboard/`) and Flask dependency
|
|
7
|
+
- Dashboard documentation page and all dashboard references
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
- ReadTheDocs experiment page not rendering (trailing whitespace in mkdocs nav)
|
|
11
|
+
- Plotly rendering in notebook (nbformat dependency)
|
|
12
|
+
|
|
3
13
|
## [1.1.6] - 2026-01-31
|
|
4
14
|
|
|
5
15
|
### Fixed
|
|
@@ -11,7 +21,7 @@
|
|
|
11
21
|
### Added
|
|
12
22
|
- `since_date` / `to_date` parameters for date-bounded commit analysis
|
|
13
23
|
- `created_before` / `pushed_after` search filters
|
|
14
|
-
-
|
|
24
|
+
- GraphQL API and experiment documentation pages
|
|
15
25
|
- Full process metrics and method-level metrics documentation
|
|
16
26
|
|
|
17
27
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.8
|
|
4
4
|
Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -49,8 +49,6 @@ Requires-Dist: twine; extra == "dev"
|
|
|
49
49
|
Provides-Extra: energy
|
|
50
50
|
Requires-Dist: psutil; extra == "energy"
|
|
51
51
|
Requires-Dist: codecarbon; extra == "energy"
|
|
52
|
-
Provides-Extra: dashboard
|
|
53
|
-
Requires-Dist: flask; extra == "dashboard"
|
|
54
52
|
Provides-Extra: docs
|
|
55
53
|
Requires-Dist: sphinx; extra == "docs"
|
|
56
54
|
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
@@ -70,8 +68,8 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
|
|
|
70
68
|
|
|
71
69
|
`greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
|
|
72
70
|
|
|
73
|
-
- **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
74
|
-
|
|
71
|
+
- **Mine repositories at scale** - Search, Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
72
|
+
|
|
75
73
|
- **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
|
|
76
74
|
- **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
|
|
77
75
|
- **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
|
|
@@ -80,7 +78,6 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
|
|
|
80
78
|
- **Method-level analysis** - Per-method complexity and metrics via Lizard integration
|
|
81
79
|
- **Version power comparison** - Compare power consumption across software versions
|
|
82
80
|
- **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
|
|
83
|
-
- **Web dashboard** - Flask-based interactive visualization of analysis results
|
|
84
81
|
|
|
85
82
|
Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
|
|
86
83
|
|
|
@@ -452,15 +449,6 @@ print(f"Spearman: {correlator.spearman}")
|
|
|
452
449
|
print(f"Feature importance: {correlator.feature_importance}")
|
|
453
450
|
```
|
|
454
451
|
|
|
455
|
-
#### Web Dashboard
|
|
456
|
-
|
|
457
|
-
```python
|
|
458
|
-
from greenmining.dashboard import run_dashboard
|
|
459
|
-
|
|
460
|
-
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
461
|
-
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
462
|
-
```
|
|
463
|
-
|
|
464
452
|
#### Pipeline Batch Analysis
|
|
465
453
|
|
|
466
454
|
```python
|
|
@@ -698,7 +686,6 @@ config = Config(
|
|
|
698
686
|
- **Full Process Metrics**: All 8 process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
699
687
|
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
700
688
|
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
701
|
-
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
702
689
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
703
690
|
|
|
704
691
|
### Energy Measurement
|
|
@@ -857,7 +844,6 @@ ruff check greenmining/ tests/
|
|
|
857
844
|
|
|
858
845
|
```bash
|
|
859
846
|
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
860
|
-
pip install greenmining[dashboard] # flask (web dashboard)
|
|
861
847
|
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
862
848
|
```
|
|
863
849
|
|
|
@@ -11,8 +11,8 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
|
|
|
11
11
|
|
|
12
12
|
`greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
|
|
13
13
|
|
|
14
|
-
- **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
15
|
-
|
|
14
|
+
- **Mine repositories at scale** - Search, Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
15
|
+
|
|
16
16
|
- **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
|
|
17
17
|
- **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
|
|
18
18
|
- **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
|
|
@@ -21,7 +21,6 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
|
|
|
21
21
|
- **Method-level analysis** - Per-method complexity and metrics via Lizard integration
|
|
22
22
|
- **Version power comparison** - Compare power consumption across software versions
|
|
23
23
|
- **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
|
|
24
|
-
- **Web dashboard** - Flask-based interactive visualization of analysis results
|
|
25
24
|
|
|
26
25
|
Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
|
|
27
26
|
|
|
@@ -393,15 +392,6 @@ print(f"Spearman: {correlator.spearman}")
|
|
|
393
392
|
print(f"Feature importance: {correlator.feature_importance}")
|
|
394
393
|
```
|
|
395
394
|
|
|
396
|
-
#### Web Dashboard
|
|
397
|
-
|
|
398
|
-
```python
|
|
399
|
-
from greenmining.dashboard import run_dashboard
|
|
400
|
-
|
|
401
|
-
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
402
|
-
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
403
|
-
```
|
|
404
|
-
|
|
405
395
|
#### Pipeline Batch Analysis
|
|
406
396
|
|
|
407
397
|
```python
|
|
@@ -639,7 +629,6 @@ config = Config(
|
|
|
639
629
|
- **Full Process Metrics**: All 8 process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
640
630
|
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
641
631
|
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
642
|
-
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
643
632
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
644
633
|
|
|
645
634
|
### Energy Measurement
|
|
@@ -798,7 +787,6 @@ ruff check greenmining/ tests/
|
|
|
798
787
|
|
|
799
788
|
```bash
|
|
800
789
|
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
801
|
-
pip install greenmining[dashboard] # flask (web dashboard)
|
|
802
790
|
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
803
791
|
```
|
|
804
792
|
|
|
@@ -135,38 +135,6 @@ class StatisticalAnalyzer:
|
|
|
135
135
|
"significant": bool(p_value < 0.05),
|
|
136
136
|
}
|
|
137
137
|
|
|
138
|
-
def pattern_adoption_rate_analysis(self, commits_df: pd.DataFrame) -> Dict[str, Any]:
|
|
139
|
-
# Analyze pattern adoption rates over repository lifetime.
|
|
140
|
-
results = {}
|
|
141
|
-
|
|
142
|
-
for pattern in commits_df["pattern"].unique():
|
|
143
|
-
pattern_commits = commits_df[commits_df["pattern"] == pattern].sort_values("date")
|
|
144
|
-
|
|
145
|
-
if len(pattern_commits) == 0:
|
|
146
|
-
continue
|
|
147
|
-
|
|
148
|
-
# Time to first adoption
|
|
149
|
-
first_adoption = pattern_commits.iloc[0]["date"]
|
|
150
|
-
repo_start = commits_df["date"].min()
|
|
151
|
-
ttfa_days = (first_adoption - repo_start).days
|
|
152
|
-
|
|
153
|
-
# Adoption frequency over time
|
|
154
|
-
monthly_adoption = pattern_commits.set_index("date").resample("ME").size()
|
|
155
|
-
|
|
156
|
-
# Pattern stickiness (months with at least one adoption)
|
|
157
|
-
total_months = len(commits_df.set_index("date").resample("ME").size())
|
|
158
|
-
active_months = len(monthly_adoption[monthly_adoption > 0])
|
|
159
|
-
stickiness = active_months / total_months if total_months > 0 else 0
|
|
160
|
-
|
|
161
|
-
results[pattern] = {
|
|
162
|
-
"ttfa_days": ttfa_days,
|
|
163
|
-
"total_adoptions": len(pattern_commits),
|
|
164
|
-
"stickiness": stickiness,
|
|
165
|
-
"monthly_adoption_rate": monthly_adoption.mean(),
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
return results
|
|
169
|
-
|
|
170
138
|
def _interpret_correlations(self, significant_pairs: List[Dict[str, Any]]) -> str:
|
|
171
139
|
# Generate interpretation of correlation results.
|
|
172
140
|
if not significant_pairs:
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _load_yaml_config(yaml_path: Path) -> Dict[str, Any]:
|
|
9
|
+
# Load configuration from YAML file if it exists.
|
|
10
|
+
if not yaml_path.exists():
|
|
11
|
+
return {}
|
|
12
|
+
try:
|
|
13
|
+
import yaml
|
|
14
|
+
|
|
15
|
+
with open(yaml_path, "r") as f:
|
|
16
|
+
return yaml.safe_load(f) or {}
|
|
17
|
+
except ImportError:
|
|
18
|
+
return {}
|
|
19
|
+
except Exception:
|
|
20
|
+
return {}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Config:
|
|
24
|
+
# Configuration class for loading from env vars and YAML.
|
|
25
|
+
|
|
26
|
+
def __init__(self, env_file: str = ".env", yaml_file: str = "greenmining.yaml"):
|
|
27
|
+
# Initialize configuration from environment and YAML file.
|
|
28
|
+
env_path = Path(env_file)
|
|
29
|
+
if env_path.exists():
|
|
30
|
+
load_dotenv(env_path)
|
|
31
|
+
else:
|
|
32
|
+
load_dotenv()
|
|
33
|
+
|
|
34
|
+
# Load YAML config
|
|
35
|
+
yaml_path = Path(yaml_file)
|
|
36
|
+
self._yaml_config = _load_yaml_config(yaml_path)
|
|
37
|
+
|
|
38
|
+
# GitHub API Configuration
|
|
39
|
+
self.GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
|
|
40
|
+
if not self.GITHUB_TOKEN or self.GITHUB_TOKEN == "your_github_pat_here":
|
|
41
|
+
raise ValueError("GITHUB_TOKEN not set. Please set it in .env file or environment.")
|
|
42
|
+
|
|
43
|
+
# Search Configuration (YAML: sources.search.*)
|
|
44
|
+
yaml_search = self._yaml_config.get("sources", {}).get("search", {})
|
|
45
|
+
|
|
46
|
+
self.SUPPORTED_LANGUAGES: List[str] = yaml_search.get(
|
|
47
|
+
"languages",
|
|
48
|
+
[
|
|
49
|
+
"Python",
|
|
50
|
+
"JavaScript",
|
|
51
|
+
"TypeScript",
|
|
52
|
+
"Java",
|
|
53
|
+
"C++",
|
|
54
|
+
"C#",
|
|
55
|
+
"Go",
|
|
56
|
+
"Rust",
|
|
57
|
+
"PHP",
|
|
58
|
+
"Ruby",
|
|
59
|
+
"Swift",
|
|
60
|
+
"Kotlin",
|
|
61
|
+
"Scala",
|
|
62
|
+
"R",
|
|
63
|
+
"MATLAB",
|
|
64
|
+
"Dart",
|
|
65
|
+
"Lua",
|
|
66
|
+
"Perl",
|
|
67
|
+
"Haskell",
|
|
68
|
+
"Elixir",
|
|
69
|
+
],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Repository Limits
|
|
73
|
+
self.MIN_STARS = yaml_search.get("min_stars", int(os.getenv("MIN_STARS", "100")))
|
|
74
|
+
self.MAX_REPOS = int(os.getenv("MAX_REPOS", "100"))
|
|
75
|
+
|
|
76
|
+
# Output Configuration (YAML: output.directory)
|
|
77
|
+
yaml_output = self._yaml_config.get("output", {})
|
|
78
|
+
self.OUTPUT_DIR = Path(yaml_output.get("directory", os.getenv("OUTPUT_DIR", "./data")))
|
|
79
|
+
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
|
|
81
|
+
# File Paths
|
|
82
|
+
self.REPOS_FILE = self.OUTPUT_DIR / "repositories.json"
|
|
83
|
+
|
|
84
|
+
def __repr__(self) -> str:
|
|
85
|
+
# String representation of configuration (hiding sensitive data).
|
|
86
|
+
return (
|
|
87
|
+
f"Config("
|
|
88
|
+
f"MAX_REPOS={self.MAX_REPOS}, "
|
|
89
|
+
f"OUTPUT_DIR={self.OUTPUT_DIR}"
|
|
90
|
+
f")"
|
|
91
|
+
)
|
|
@@ -124,24 +124,3 @@ class CodeCarbonMeter(EnergyMeter):
|
|
|
124
124
|
end_time=datetime.fromtimestamp(end_time),
|
|
125
125
|
)
|
|
126
126
|
|
|
127
|
-
def get_carbon_intensity(self) -> Optional[float]:
|
|
128
|
-
# Get current carbon intensity for the configured region.
|
|
129
|
-
if not self._codecarbon_available:
|
|
130
|
-
return None
|
|
131
|
-
|
|
132
|
-
try:
|
|
133
|
-
from codecarbon import EmissionsTracker
|
|
134
|
-
|
|
135
|
-
# Create temporary tracker to get carbon intensity
|
|
136
|
-
tracker = EmissionsTracker(
|
|
137
|
-
project_name="carbon_check",
|
|
138
|
-
country_iso_code=self.country_iso_code,
|
|
139
|
-
save_to_file=False,
|
|
140
|
-
log_level="error",
|
|
141
|
-
)
|
|
142
|
-
tracker.start()
|
|
143
|
-
tracker.stop()
|
|
144
|
-
|
|
145
|
-
return getattr(tracker, "_carbon_intensity", None)
|
|
146
|
-
except Exception:
|
|
147
|
-
return None
|
|
@@ -254,6 +254,35 @@ GSF_PATTERNS = {
|
|
|
254
254
|
"description": "Choose hardware optimized for energy efficiency",
|
|
255
255
|
"sci_impact": "Direct reduction in energy consumption",
|
|
256
256
|
},
|
|
257
|
+
"match_preconfigured_server": {
|
|
258
|
+
"name": "Match Utilization Requirements with Pre-configured Servers",
|
|
259
|
+
"category": "cloud",
|
|
260
|
+
"keywords": [
|
|
261
|
+
"pre-configured server",
|
|
262
|
+
"energy proportionality",
|
|
263
|
+
"server utilization",
|
|
264
|
+
"oversized server",
|
|
265
|
+
"underutilized server",
|
|
266
|
+
"server consolidation",
|
|
267
|
+
],
|
|
268
|
+
"description": "Select pre-configured servers that match utilization needs; one highly utilized server is more energy-efficient than two underutilized ones",
|
|
269
|
+
"sci_impact": "Higher utilization improves energy proportionality; fewer servers reduces embodied carbon",
|
|
270
|
+
},
|
|
271
|
+
"optimize_customer_device_impact": {
|
|
272
|
+
"name": "Optimize Impact on Customer Devices and Equipment",
|
|
273
|
+
"category": "cloud",
|
|
274
|
+
"keywords": [
|
|
275
|
+
"customer device",
|
|
276
|
+
"backward compatible",
|
|
277
|
+
"backwards compatible",
|
|
278
|
+
"older hardware",
|
|
279
|
+
"device lifetime",
|
|
280
|
+
"older browser",
|
|
281
|
+
"end-of-life hardware",
|
|
282
|
+
],
|
|
283
|
+
"description": "Design software to extend customer hardware lifetimes through backward compatibility with older devices, browsers, and operating systems",
|
|
284
|
+
"sci_impact": "Extending device lifetimes reduces embodied carbon; optimizing for older hardware may also reduce energy intensity",
|
|
285
|
+
},
|
|
257
286
|
# ==================== WEB PATTERNS (15+) ====================
|
|
258
287
|
"avoid_chaining_requests": {
|
|
259
288
|
"name": "Avoid Chaining Critical Requests",
|
|
@@ -1555,6 +1584,18 @@ GREEN_KEYWORDS = [
|
|
|
1555
1584
|
"workload",
|
|
1556
1585
|
"overhead",
|
|
1557
1586
|
"footprint",
|
|
1587
|
+
# Server utilization & customer device patterns
|
|
1588
|
+
"pre-configured server",
|
|
1589
|
+
"energy proportionality",
|
|
1590
|
+
"server consolidation",
|
|
1591
|
+
"underutilized server",
|
|
1592
|
+
"oversized server",
|
|
1593
|
+
"backward compatible",
|
|
1594
|
+
"backwards compatible",
|
|
1595
|
+
"customer device",
|
|
1596
|
+
"device lifetime",
|
|
1597
|
+
"older browser",
|
|
1598
|
+
"end-of-life hardware",
|
|
1558
1599
|
]
|
|
1559
1600
|
|
|
1560
1601
|
|
|
@@ -2,21 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import json
|
|
6
5
|
from datetime import datetime, timedelta
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
9
8
|
|
|
10
9
|
from github import Github
|
|
11
10
|
from tqdm import tqdm
|
|
12
11
|
|
|
13
|
-
from greenmining.config import get_config
|
|
14
12
|
from greenmining.models.repository import Repository
|
|
15
13
|
from greenmining.utils import (
|
|
16
14
|
colored_print,
|
|
17
15
|
format_timestamp,
|
|
18
|
-
load_json_file,
|
|
19
|
-
print_banner,
|
|
20
16
|
retry_on_exception,
|
|
21
17
|
save_json_file,
|
|
22
18
|
)
|
|
@@ -110,8 +106,7 @@ class CommitExtractor:
|
|
|
110
106
|
try:
|
|
111
107
|
# Get repository from GitHub API
|
|
112
108
|
if not self.github:
|
|
113
|
-
|
|
114
|
-
self.github = Github(config.GITHUB_TOKEN)
|
|
109
|
+
raise ValueError("github_token is required for commit extraction")
|
|
115
110
|
|
|
116
111
|
gh_repo = self.github.get_repo(repo_name)
|
|
117
112
|
|
|
@@ -143,40 +138,6 @@ class CommitExtractor:
|
|
|
143
138
|
|
|
144
139
|
return commits
|
|
145
140
|
|
|
146
|
-
def _extract_commit_metadata(self, commit, repo_name: str) -> dict[str, Any]:
|
|
147
|
-
# Extract metadata from commit object.
|
|
148
|
-
# Get modified files
|
|
149
|
-
files_changed = []
|
|
150
|
-
lines_added = 0
|
|
151
|
-
lines_deleted = 0
|
|
152
|
-
|
|
153
|
-
try:
|
|
154
|
-
for modified_file in commit.modified_files:
|
|
155
|
-
files_changed.append(modified_file.filename)
|
|
156
|
-
lines_added += modified_file.added_lines
|
|
157
|
-
lines_deleted += modified_file.deleted_lines
|
|
158
|
-
except Exception:
|
|
159
|
-
pass
|
|
160
|
-
|
|
161
|
-
return {
|
|
162
|
-
"commit_id": commit.hash,
|
|
163
|
-
"repo_name": repo_name,
|
|
164
|
-
"date": commit.committer_date.isoformat(),
|
|
165
|
-
"author": commit.author.name,
|
|
166
|
-
"author_email": commit.author.email,
|
|
167
|
-
"message": commit.msg.strip(),
|
|
168
|
-
"files_changed": files_changed[:20], # Limit to 20 files
|
|
169
|
-
"lines_added": lines_added,
|
|
170
|
-
"lines_deleted": lines_deleted,
|
|
171
|
-
"insertions": lines_added,
|
|
172
|
-
"deletions": lines_deleted,
|
|
173
|
-
"is_merge": commit.merge,
|
|
174
|
-
"branches": (
|
|
175
|
-
list(commit.branches) if hasattr(commit, "branches") and commit.branches else []
|
|
176
|
-
),
|
|
177
|
-
"in_main_branch": commit.in_main_branch if hasattr(commit, "in_main_branch") else True,
|
|
178
|
-
}
|
|
179
|
-
|
|
180
141
|
def _extract_commit_metadata_from_github(self, commit, repo_name: str) -> dict[str, Any]:
|
|
181
142
|
# Extract metadata from GitHub API commit object.
|
|
182
143
|
# Get modified files and stats
|
|
@@ -2,26 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import json
|
|
6
5
|
from collections import defaultdict
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
9
8
|
|
|
10
9
|
import pandas as pd
|
|
11
10
|
|
|
12
11
|
from greenmining.analyzers import (
|
|
13
12
|
StatisticalAnalyzer,
|
|
14
13
|
TemporalAnalyzer,
|
|
15
|
-
QualitativeAnalyzer,
|
|
16
14
|
)
|
|
17
|
-
from greenmining.config import get_config
|
|
18
15
|
from greenmining.models.repository import Repository
|
|
19
16
|
from greenmining.utils import (
|
|
20
17
|
colored_print,
|
|
21
18
|
format_number,
|
|
22
19
|
format_percentage,
|
|
23
|
-
load_json_file,
|
|
24
|
-
print_banner,
|
|
25
20
|
save_csv_file,
|
|
26
21
|
save_json_file,
|
|
27
22
|
)
|
|
@@ -2,18 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import json
|
|
6
|
-
import re
|
|
7
5
|
from collections import Counter
|
|
8
6
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
10
8
|
|
|
11
9
|
from tqdm import tqdm
|
|
12
10
|
|
|
13
11
|
from greenmining.analyzers import (
|
|
14
12
|
CodeDiffAnalyzer,
|
|
15
13
|
)
|
|
16
|
-
from greenmining.config import get_config
|
|
17
14
|
from greenmining.gsf_patterns import (
|
|
18
15
|
GREEN_KEYWORDS,
|
|
19
16
|
GSF_PATTERNS,
|
|
@@ -22,11 +19,7 @@ from greenmining.gsf_patterns import (
|
|
|
22
19
|
)
|
|
23
20
|
from greenmining.utils import (
|
|
24
21
|
colored_print,
|
|
25
|
-
create_checkpoint,
|
|
26
22
|
format_timestamp,
|
|
27
|
-
load_checkpoint,
|
|
28
|
-
load_json_file,
|
|
29
|
-
print_banner,
|
|
30
23
|
save_json_file,
|
|
31
24
|
)
|
|
32
25
|
|
|
@@ -156,55 +149,6 @@ class DataAnalyzer:
|
|
|
156
149
|
|
|
157
150
|
return result
|
|
158
151
|
|
|
159
|
-
def _check_green_awareness(self, message: str, files: list[str]) -> tuple[bool, Optional[str]]:
|
|
160
|
-
# Check if commit explicitly mentions green/energy concerns.
|
|
161
|
-
# Check message for green keywords
|
|
162
|
-
for keyword in self.GREEN_KEYWORDS:
|
|
163
|
-
if keyword in message:
|
|
164
|
-
# Extract context around keyword
|
|
165
|
-
pattern = rf".{{0,30}}{re.escape(keyword)}.{{0,30}}"
|
|
166
|
-
match = re.search(pattern, message, re.IGNORECASE)
|
|
167
|
-
if match:
|
|
168
|
-
evidence = match.group(0).strip()
|
|
169
|
-
return True, f"Keyword '{keyword}': {evidence}"
|
|
170
|
-
|
|
171
|
-
# Check file names for patterns
|
|
172
|
-
cache_files = [f for f in files if "cache" in f or "redis" in f]
|
|
173
|
-
if cache_files:
|
|
174
|
-
return True, f"Modified cache-related file: {cache_files[0]}"
|
|
175
|
-
|
|
176
|
-
perf_files = [f for f in files if "performance" in f or "optimization" in f]
|
|
177
|
-
if perf_files:
|
|
178
|
-
return True, f"Modified performance file: {perf_files[0]}"
|
|
179
|
-
|
|
180
|
-
return False, None
|
|
181
|
-
|
|
182
|
-
def _detect_known_pattern(self, message: str, files: list[str]) -> tuple[Optional[str], str]:
|
|
183
|
-
# Detect known green software pattern.
|
|
184
|
-
matches = []
|
|
185
|
-
|
|
186
|
-
# Check each pattern
|
|
187
|
-
for pattern_name, keywords in self.GREEN_PATTERNS.items():
|
|
188
|
-
for keyword in keywords:
|
|
189
|
-
if keyword in message:
|
|
190
|
-
# Calculate confidence based on specificity
|
|
191
|
-
confidence = "HIGH" if len(keyword) > 10 else "MEDIUM"
|
|
192
|
-
matches.append((pattern_name, confidence, len(keyword)))
|
|
193
|
-
|
|
194
|
-
# Check file names for pattern hints
|
|
195
|
-
all_files = " ".join(files)
|
|
196
|
-
for pattern_name, keywords in self.GREEN_PATTERNS.items():
|
|
197
|
-
for keyword in keywords:
|
|
198
|
-
if keyword in all_files:
|
|
199
|
-
matches.append((pattern_name, "MEDIUM", len(keyword)))
|
|
200
|
-
|
|
201
|
-
if not matches:
|
|
202
|
-
return "NONE DETECTED", "NONE"
|
|
203
|
-
|
|
204
|
-
# Return most specific match (longest keyword)
|
|
205
|
-
matches.sort(key=lambda x: x[2], reverse=True)
|
|
206
|
-
return matches[0][0], matches[0][1]
|
|
207
|
-
|
|
208
152
|
def save_results(self, results: list[dict[str, Any]], output_file: Path):
|
|
209
153
|
# Save analysis results to JSON file.
|
|
210
154
|
# Calculate summary statistics
|
|
@@ -5,13 +5,12 @@ from __future__ import annotations
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
7
|
import shutil
|
|
8
|
-
import subprocess
|
|
9
8
|
import tempfile
|
|
10
9
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
11
10
|
from dataclasses import dataclass, field
|
|
12
11
|
from datetime import datetime, timedelta
|
|
13
12
|
from pathlib import Path
|
|
14
|
-
from typing import Any, Dict, List, Optional
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
15
14
|
|
|
16
15
|
from pydriller import Repository
|
|
17
16
|
from pydriller.metrics.process.change_set import ChangeSet
|
|
@@ -1,20 +1,15 @@
|
|
|
1
1
|
# Report generation for green mining analysis.
|
|
2
|
-
"""Report generation module for GreenMining analysis results."""
|
|
3
2
|
|
|
4
3
|
from __future__ import annotations
|
|
5
4
|
|
|
6
|
-
import json
|
|
7
5
|
from datetime import datetime
|
|
8
6
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
10
8
|
|
|
11
|
-
from greenmining.config import get_config
|
|
12
9
|
from greenmining.utils import (
|
|
13
10
|
colored_print,
|
|
14
11
|
format_number,
|
|
15
12
|
format_percentage,
|
|
16
|
-
load_json_file,
|
|
17
|
-
print_banner,
|
|
18
13
|
)
|
|
19
14
|
|
|
20
15
|
|