greenmining 1.1.9__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -1,6 +1,5 @@
1
- # Green Microservices Mining - GSF Pattern Analysis Tool.
1
+ # GreenMining - MSR library for Green IT research.
2
2
 
3
- from greenmining.config import Config
4
3
  from greenmining.controllers.repository_controller import RepositoryController
5
4
  from greenmining.gsf_patterns import (
6
5
  GREEN_KEYWORDS,
@@ -9,24 +8,23 @@ from greenmining.gsf_patterns import (
9
8
  is_green_aware,
10
9
  )
11
10
 
12
- __version__ = "1.1.9"
11
+ __version__ = "1.2.0"
13
12
 
14
13
 
15
14
  def fetch_repositories(
16
15
  github_token: str,
17
- max_repos: int = None,
18
- min_stars: int = None,
16
+ max_repos: int = 100,
17
+ min_stars: int = 100,
19
18
  languages: list = None,
20
19
  keywords: str = None,
21
20
  created_after: str = None,
22
21
  created_before: str = None,
23
22
  pushed_after: str = None,
24
23
  pushed_before: str = None,
24
+ output_dir: str = "./data",
25
25
  ):
26
- # Fetch repositories from GitHub with custom search keywords.
27
- config = Config()
28
- config.GITHUB_TOKEN = github_token
29
- controller = RepositoryController(config)
26
+ # Fetch repositories from GitHub via GraphQL search.
27
+ controller = RepositoryController(github_token, output_dir=output_dir)
30
28
 
31
29
  return controller.fetch_repositories(
32
30
  max_repos=max_repos,
@@ -40,6 +38,27 @@ def fetch_repositories(
40
38
  )
41
39
 
42
40
 
41
+ def clone_repositories(
42
+ repositories: list,
43
+ github_token: str = None,
44
+ output_dir: str = "./data",
45
+ cleanup_existing: bool = False,
46
+ ):
47
+ # Clone repositories into ./greenmining_repos with sanitized directory names.
48
+ # Args:
49
+ # repositories: List of Repository objects (from fetch_repositories)
50
+ # github_token: GitHub token (required for controller init)
51
+ # output_dir: Output directory for metadata files
52
+ # cleanup_existing: Remove existing greenmining_repos/ before cloning
53
+ token = github_token or "unused"
54
+ controller = RepositoryController(token, output_dir=output_dir)
55
+
56
+ return controller.clone_repositories(
57
+ repositories=repositories,
58
+ cleanup_existing=cleanup_existing,
59
+ )
60
+
61
+
43
62
  def analyze_repositories(
44
63
  urls: list,
45
64
  max_commits: int = 500,
@@ -99,12 +118,12 @@ def analyze_repositories(
99
118
 
100
119
 
101
120
  __all__ = [
102
- "Config",
103
121
  "GSF_PATTERNS",
104
122
  "GREEN_KEYWORDS",
105
123
  "is_green_aware",
106
124
  "get_pattern_by_keywords",
107
125
  "fetch_repositories",
126
+ "clone_repositories",
108
127
  "analyze_repositories",
109
128
  "__version__",
110
129
  ]
@@ -3,20 +3,12 @@
3
3
  from .code_diff_analyzer import CodeDiffAnalyzer
4
4
  from .statistical_analyzer import StatisticalAnalyzer
5
5
  from .temporal_analyzer import TemporalAnalyzer
6
- from .qualitative_analyzer import QualitativeAnalyzer
7
- from .power_regression import PowerRegressionDetector, PowerRegression
8
6
  from .metrics_power_correlator import MetricsPowerCorrelator, CorrelationResult
9
- from .version_power_analyzer import VersionPowerAnalyzer, VersionPowerReport
10
7
 
11
8
  __all__ = [
12
9
  "CodeDiffAnalyzer",
13
10
  "StatisticalAnalyzer",
14
11
  "TemporalAnalyzer",
15
- "QualitativeAnalyzer",
16
- "PowerRegressionDetector",
17
- "PowerRegression",
18
12
  "MetricsPowerCorrelator",
19
13
  "CorrelationResult",
20
- "VersionPowerAnalyzer",
21
- "VersionPowerReport",
22
14
  ]
@@ -1,11 +1,9 @@
1
1
  # Repository Controller - Handles repository fetching + cloning operations.
2
- import os
2
+
3
3
  import re
4
4
  import shutil
5
5
  from pathlib import Path
6
- from typing import List, Dict
7
6
 
8
- from greenmining.config import Config
9
7
  from greenmining.models.repository import Repository
10
8
  from greenmining.services.github_graphql_fetcher import GitHubGraphQLFetcher
11
9
  from greenmining.utils import colored_print, load_json_file, save_json_file
@@ -14,86 +12,26 @@ from greenmining.utils import colored_print, load_json_file, save_json_file
14
12
  class RepositoryController:
15
13
  # Controller for GitHub repository operations using GraphQL API.
16
14
 
17
- def __init__(self, config: Config):
18
- # Initialize controller with configuration.
19
- self.config = config
20
- self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
15
+ def __init__(self, github_token: str, output_dir: str = "./data"):
16
+ # Initialize controller with GitHub token and output directory.
17
+ self.graphql_fetcher = GitHubGraphQLFetcher(github_token)
18
+ self.output_dir = Path(output_dir)
19
+ self.output_dir.mkdir(parents=True, exist_ok=True)
20
+ self.repos_file = self.output_dir / "repositories.json"
21
21
  self.repos_dir = Path.cwd() / "greenmining_repos"
22
22
 
23
- def _sanitize_repo_name(self, repo: Repository, index: int = 0) -> str:
24
- """Safe unique dir name: owner_repo[_index]. Handles case collisions."""
25
- base = re.sub(r'[^a-z0-9-]', '_', repo.full_name.replace('/', '_').lower())
26
- name = f"{base}_{index}" if index else base
27
- path = self.repos_dir / name
28
- counter = 1
29
- while path.exists():
30
- name = f"{base}_{counter}"
31
- path = self.repos_dir / name
32
- counter += 1
33
- return name
34
-
35
- def clone_repositories(
36
- self,
37
- repositories: List[Repository],
38
- github_token: str = None,
39
- cleanup: bool = True,
40
- depth: int = 1 # Shallow clone
41
- ) -> List[Dict]:
42
- """Clone repos to ./greenmining_repos/ with unique sanitized names."""
43
- self.repos_dir.mkdir(exist_ok=True)
44
- if cleanup:
45
- shutil.rmtree(self.repos_dir, ignore_errors=True)
46
- self.repos_dir.mkdir(exist_ok=True)
47
- colored_print(f"Cleaned {self.repos_dir}", "yellow")
48
-
49
- results = []
50
- for i, repo in enumerate(repositories, 1):
51
- safe_name = self._sanitize_repo_name(repo, i)
52
- clone_path = self.repos_dir / safe_name
53
-
54
- colored_print(f"[{i}/{len(repositories)}] Cloning {repo.full_name} → {safe_name}", "cyan")
55
-
56
- url = f"https://{github_token}@github.com/{repo.full_name}.git" if github_token else repo.url
57
- cmd = ["git", "clone", f"--depth={depth}", "-v", url, str(clone_path)]
58
-
59
- import subprocess
60
- try:
61
- subprocess.check_call(cmd, cwd=self.repos_dir.parent)
62
- colored_print(f"{safe_name}", "green")
63
- results.append({
64
- "full_name": repo.full_name,
65
- "local_path": str(clone_path),
66
- "success": True
67
- })
68
- except subprocess.CalledProcessError as e:
69
- colored_print(f"{safe_name}: {e}", "red")
70
- results.append({
71
- "full_name": repo.full_name,
72
- "local_path": str(clone_path),
73
- "success": False,
74
- "error": str(e)
75
- })
76
-
77
- # Save map for analyze_repositories
78
- save_json_file(results, self.repos_dir / "clone_results.json")
79
- success_rate = sum(1 for r in results if r["success"]) / len(results) * 100
80
- colored_print(f"Cloned: {success_rate:.1f}% ({self.repos_dir}/clone_results.json)", "green")
81
- return results
82
-
83
-
84
-
85
-
86
-
87
- def fetch_repositories(self, max_repos: int = None, min_stars: int = None,
88
- languages: list[str] = None, keywords: str = None,
89
- created_after: str = None, created_before: str = None,
90
- pushed_after: str = None, pushed_before: str = None) -> list[Repository]:
23
+ def fetch_repositories(
24
+ self,
25
+ max_repos: int = 100,
26
+ min_stars: int = 100,
27
+ languages: list[str] = None,
28
+ keywords: str = None,
29
+ created_after: str = None,
30
+ created_before: str = None,
31
+ pushed_after: str = None,
32
+ pushed_before: str = None,
33
+ ) -> list[Repository]:
91
34
  # Fetch repositories from GitHub using GraphQL API.
92
- max_repos = max_repos or self.config.MAX_REPOS
93
- min_stars = min_stars or self.config.MIN_STARS
94
- languages = languages or self.config.SUPPORTED_LANGUAGES
95
- keywords = keywords
96
-
97
35
  colored_print(f"Fetching up to {max_repos} repositories...", "cyan")
98
36
  colored_print(f" Keywords: {keywords}", "cyan")
99
37
  colored_print(f" Filters: min_stars={min_stars}", "cyan")
@@ -103,10 +41,11 @@ class RepositoryController:
103
41
  f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
104
42
  )
105
43
  if pushed_after or pushed_before:
106
- colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
44
+ colored_print(
45
+ f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan"
46
+ )
107
47
 
108
48
  try:
109
- # Execute GraphQL search
110
49
  repositories = self.graphql_fetcher.search_repositories(
111
50
  keywords=keywords,
112
51
  max_repos=max_repos,
@@ -118,12 +57,11 @@ class RepositoryController:
118
57
  pushed_before=pushed_before,
119
58
  )
120
59
 
121
- # Save to file
122
60
  repo_dicts = [r.to_dict() for r in repositories]
123
- save_json_file(repo_dicts, self.config.REPOS_FILE)
61
+ save_json_file(repo_dicts, self.repos_file)
124
62
 
125
63
  colored_print(f"Fetched {len(repositories)} repositories", "green")
126
- colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
64
+ colored_print(f" Saved to: {self.repos_file}", "cyan")
127
65
 
128
66
  return repositories
129
67
 
@@ -131,12 +69,69 @@ class RepositoryController:
131
69
  colored_print(f"Error fetching repositories: {e}", "red")
132
70
  raise
133
71
 
72
+ def clone_repositories(
73
+ self,
74
+ repositories: list[Repository],
75
+ cleanup_existing: bool = False,
76
+ ) -> list[Path]:
77
+ # Clone repositories into ./greenmining_repos with sanitized directory names.
78
+ self.repos_dir.mkdir(parents=True, exist_ok=True)
79
+
80
+ if cleanup_existing and self.repos_dir.exists():
81
+ shutil.rmtree(self.repos_dir)
82
+ self.repos_dir.mkdir(parents=True, exist_ok=True)
83
+
84
+ cloned_paths = []
85
+ colored_print(f"\nCloning {len(repositories)} repositories into {self.repos_dir}", "cyan")
86
+
87
+ for repo in repositories:
88
+ safe_name = self._sanitize_repo_name(repo)
89
+ local_path = self.repos_dir / safe_name
90
+
91
+ if local_path.exists():
92
+ colored_print(f" Already exists: {safe_name}", "yellow")
93
+ cloned_paths.append(local_path)
94
+ continue
95
+
96
+ try:
97
+ url = repo.url if hasattr(repo, "url") else f"https://github.com/{repo.full_name}"
98
+ colored_print(f" Cloning {repo.full_name} -> {safe_name}", "cyan")
99
+
100
+ import subprocess
101
+
102
+ subprocess.run(
103
+ ["git", "clone", "--depth", "1", url, str(local_path)],
104
+ capture_output=True,
105
+ text=True,
106
+ check=True,
107
+ timeout=120,
108
+ )
109
+ cloned_paths.append(local_path)
110
+ colored_print(f" Cloned: {safe_name}", "green")
111
+
112
+ except Exception as e:
113
+ colored_print(f" Failed to clone {repo.full_name}: {e}", "yellow")
114
+
115
+ colored_print(f"Cloned {len(cloned_paths)}/{len(repositories)} repositories", "green")
116
+ return cloned_paths
117
+
118
+ def _sanitize_repo_name(self, repo: Repository) -> str:
119
+ # Safe unique directory name: owner_repo. Handles case collisions.
120
+ base = re.sub(r"[^a-z0-9-]", "_", repo.full_name.replace("/", "_").lower())
121
+ path = self.repos_dir / base
122
+ if not path.exists():
123
+ return base
124
+ counter = 1
125
+ while (self.repos_dir / f"{base}_{counter}").exists():
126
+ counter += 1
127
+ return f"{base}_{counter}"
128
+
134
129
  def load_repositories(self) -> list[Repository]:
135
130
  # Load repositories from file.
136
- if not self.config.REPOS_FILE.exists():
137
- raise FileNotFoundError(f"No repositories file found at {self.config.REPOS_FILE}")
131
+ if not self.repos_file.exists():
132
+ raise FileNotFoundError(f"No repositories file found at {self.repos_file}")
138
133
 
139
- repo_dicts = load_json_file(self.config.REPOS_FILE)
134
+ repo_dicts = load_json_file(self.repos_file)
140
135
  return [Repository.from_dict(r) for r in repo_dicts]
141
136
 
142
137
  def get_repository_stats(self, repositories: list[Repository]) -> dict:
@@ -221,7 +221,7 @@ class LocalRepoAnalyzer:
221
221
  # method_level_analysis: Extract per-method metrics via Lizard
222
222
  # include_source_code: Include source code before/after in results
223
223
  # process_metrics: "standard" or "full" PyDriller process metrics
224
- self.clone_path = clone_path or Path(tempfile.gettempdir()) / "greenmining_repos"
224
+ self.clone_path = clone_path or Path.cwd() / "greenmining_repos"
225
225
  self.clone_path.mkdir(parents=True, exist_ok=True)
226
226
  self.max_commits = max_commits
227
227
  self.days_back = days_back
@@ -0,0 +1,311 @@
1
+ Metadata-Version: 2.4
2
+ Name: greenmining
3
+ Version: 1.2.0
4
+ Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
+ Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/adam-bouafia/greenmining
8
+ Project-URL: Documentation, https://github.com/adam-bouafia/greenmining#readme
9
+ Project-URL: Linkedin, https://www.linkedin.com/in/adam-bouafia/
10
+ Project-URL: Repository, https://github.com/adam-bouafia/greenmining
11
+ Project-URL: Issues, https://github.com/adam-bouafia/greenmining/issues
12
+ Project-URL: Changelog, https://github.com/adam-bouafia/greenmining/blob/main/CHANGELOG.md
13
+ Keywords: green-software,gsf,msr,mining-software-repositories,green-it,sustainability,carbon-footprint,energy-efficiency,repository-analysis,github-analysis,pydriller,empirical-software-engineering
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Topic :: Software Development :: Quality Assurance
18
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Programming Language :: Python :: 3.13
26
+ Classifier: Operating System :: OS Independent
27
+ Requires-Python: >=3.9
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: PyGithub
31
+ Requires-Dist: PyDriller
32
+ Requires-Dist: pandas
33
+ Requires-Dist: colorama
34
+ Requires-Dist: tabulate
35
+ Requires-Dist: tqdm
36
+ Requires-Dist: matplotlib
37
+ Requires-Dist: plotly
38
+ Requires-Dist: python-dotenv
39
+ Requires-Dist: requests
40
+ Provides-Extra: dev
41
+ Requires-Dist: pytest; extra == "dev"
42
+ Requires-Dist: pytest-cov; extra == "dev"
43
+ Requires-Dist: pytest-mock; extra == "dev"
44
+ Requires-Dist: black; extra == "dev"
45
+ Requires-Dist: ruff; extra == "dev"
46
+ Requires-Dist: mypy; extra == "dev"
47
+ Requires-Dist: build; extra == "dev"
48
+ Requires-Dist: twine; extra == "dev"
49
+ Provides-Extra: energy
50
+ Requires-Dist: psutil; extra == "energy"
51
+ Requires-Dist: codecarbon; extra == "energy"
52
+ Provides-Extra: docs
53
+ Requires-Dist: sphinx; extra == "docs"
54
+ Requires-Dist: sphinx-rtd-theme; extra == "docs"
55
+ Requires-Dist: myst-parser; extra == "docs"
56
+ Dynamic: license-file
57
+
58
+ # greenmining
59
+
60
+ An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
61
+
62
+ [![PyPI](https://img.shields.io/pypi/v/greenmining)](https://pypi.org/project/greenmining/)
63
+ [![Python](https://img.shields.io/pypi/pyversions/greenmining)](https://pypi.org/project/greenmining/)
64
+ [![License](https://img.shields.io/github/license/adam-bouafia/greenmining)](LICENSE)
65
+ [![Documentation](https://img.shields.io/badge/docs-readthedocs-blue)](https://greenmining.readthedocs.io/)
66
+
67
+ ## Overview
68
+
69
+ `greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
70
+
71
+ - **Mine repositories at scale** - Search, fetch, and clone GitHub repositories via GraphQL API with configurable filters
72
+ - **Classify green commits** - Detect 124 sustainability patterns from the Green Software Foundation (GSF) catalog using 332 keywords
73
+ - **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
74
+ - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
75
+ - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
76
+ - **Method-level analysis** - Per-method complexity and metrics via Lizard integration
77
+ - **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
78
+
79
+ ## Installation
80
+
81
+ ### Via pip
82
+
83
+ ```bash
84
+ pip install greenmining
85
+ ```
86
+
87
+ ### With energy measurement
88
+
89
+ ```bash
90
+ pip install greenmining[energy]
91
+ ```
92
+
93
+ ### From source
94
+
95
+ ```bash
96
+ git clone https://github.com/adam-bouafia/greenmining.git
97
+ cd greenmining
98
+ pip install -e .
99
+ ```
100
+
101
+ ## Quick Start
102
+
103
+ ### Pattern Detection
104
+
105
+ ```python
106
+ from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
107
+
108
+ print(f"Total patterns: {len(GSF_PATTERNS)}") # 124 patterns across 15 categories
109
+
110
+ commit_msg = "Optimize Redis caching to reduce energy consumption"
111
+ if is_green_aware(commit_msg):
112
+ patterns = get_pattern_by_keywords(commit_msg)
113
+ print(f"Matched patterns: {patterns}")
114
+ ```
115
+
116
+ ### Fetch Repositories
117
+
118
+ ```python
119
+ from greenmining import fetch_repositories
120
+
121
+ repos = fetch_repositories(
122
+ github_token="your_token",
123
+ max_repos=50,
124
+ min_stars=500,
125
+ keywords="kubernetes cloud-native",
126
+ languages=["Python", "Go"],
127
+ created_after="2020-01-01",
128
+ pushed_after="2023-01-01",
129
+ )
130
+
131
+ for repo in repos[:5]:
132
+ print(f"- {repo.full_name} ({repo.stars} stars)")
133
+ ```
134
+
135
+ ### Clone Repositories
136
+
137
+ ```python
138
+ from greenmining import fetch_repositories, clone_repositories
139
+
140
+ repos = fetch_repositories(github_token="your_token", max_repos=10, keywords="android")
141
+
142
+ # Clone into ./greenmining_repos/ with sanitized directory names
143
+ paths = clone_repositories(repos)
144
+ print(f"Cloned {len(paths)} repositories")
145
+ ```
146
+
147
+ ### Analyze Repositories by URL
148
+
149
+ ```python
150
+ from greenmining import analyze_repositories
151
+
152
+ results = analyze_repositories(
153
+ urls=[
154
+ "https://github.com/kubernetes/kubernetes",
155
+ "https://github.com/istio/istio",
156
+ ],
157
+ max_commits=100,
158
+ parallel_workers=2,
159
+ energy_tracking=True,
160
+ energy_backend="auto",
161
+ method_level_analysis=True,
162
+ include_source_code=True,
163
+ github_token="your_token",
164
+ since_date="2020-01-01",
165
+ to_date="2025-12-31",
166
+ )
167
+
168
+ for result in results:
169
+ print(f"{result.name}: {result.green_commit_rate:.1%} green")
170
+ ```
171
+
172
+ ### Access Pattern Data
173
+
174
+ ```python
175
+ from greenmining import GSF_PATTERNS
176
+
177
+ # Get patterns by category
178
+ cloud = {k: v for k, v in GSF_PATTERNS.items() if v['category'] == 'cloud'}
179
+ print(f"Cloud patterns: {len(cloud)}")
180
+
181
+ # All categories
182
+ categories = set(p['category'] for p in GSF_PATTERNS.values())
183
+ print(f"Categories: {sorted(categories)}")
184
+ ```
185
+
186
+ ### Energy Measurement
187
+
188
+ ```python
189
+ from greenmining.energy import get_energy_meter, CPUEnergyMeter
190
+
191
+ # Auto-detect best backend
192
+ meter = get_energy_meter("auto")
193
+ meter.start()
194
+ # ... your workload ...
195
+ result = meter.stop()
196
+ print(f"Energy: {result.joules:.2f} J, Power: {result.watts_avg:.2f} W")
197
+ ```
198
+
199
+ ### Statistical Analysis
200
+
201
+ ```python
202
+ from greenmining.analyzers import StatisticalAnalyzer, TemporalAnalyzer
203
+
204
+ stat = StatisticalAnalyzer()
205
+ temporal = TemporalAnalyzer(granularity="quarter")
206
+
207
+ # Pattern correlations, effect sizes, temporal trends
208
+ # See experiment notebook for full usage
209
+ ```
210
+
211
+ ### Metrics-to-Power Correlation
212
+
213
+ ```python
214
+ from greenmining.analyzers import MetricsPowerCorrelator
215
+
216
+ correlator = MetricsPowerCorrelator()
217
+ correlator.fit(
218
+ metrics=["complexity", "nloc", "code_churn"],
219
+ metrics_values={
220
+ "complexity": [10, 20, 30, 40],
221
+ "nloc": [100, 200, 300, 400],
222
+ "code_churn": [50, 100, 150, 200],
223
+ },
224
+ power_measurements=[5.0, 8.0, 12.0, 15.0],
225
+ )
226
+ print(f"Feature importance: {correlator.feature_importance}")
227
+ ```
228
+
229
+ ## Features
230
+
231
+ ### Core Capabilities
232
+
233
+ - **Pattern Detection**: 124 sustainability patterns across 15 categories from the GSF catalog
234
+ - **Keyword Analysis**: 332 green software detection keywords
235
+ - **Repository Fetching**: GraphQL API with date, star, and language filters
236
+ - **Repository Cloning**: Sanitized directory names in `./greenmining_repos/`
237
+ - **URL-Based Analysis**: Direct Git-based analysis from GitHub URLs (HTTPS and SSH)
238
+ - **Batch Processing**: Parallel analysis of multiple repositories
239
+ - **Private Repository Support**: Authentication via SSH keys or GitHub tokens
240
+
241
+ ### Analysis & Measurement
242
+
243
+ - **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
244
+ - **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles (AWS, GCP, Azure)
245
+ - **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
246
+ - **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
247
+ - **Source Code Access**: Before/after source code for refactoring detection
248
+ - **Process Metrics**: DMM size, complexity, interfacing via PyDriller
249
+ - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
250
+ - **Multi-format Output**: JSON, CSV, pandas DataFrame
251
+
252
+ ### Energy Backends
253
+
254
+ | Backend | Platform | Metrics | Requirements |
255
+ |---------|----------|---------|--------------|
256
+ | **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
257
+ | **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
258
+ | **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
259
+ | **Auto** | All platforms | Best available backend | Automatic detection |
260
+
261
+ ### GSF Pattern Categories
262
+
263
+ **124 patterns across 15 categories:**
264
+
265
+ | Category | Patterns | Examples |
266
+ |----------|----------|----------|
267
+ | Cloud | 42 | Auto-scaling, serverless, right-sizing, region selection |
268
+ | Web | 17 | CDN, caching, lazy loading, compression |
269
+ | AI/ML | 19 | Model pruning, quantization, edge inference |
270
+ | Database | 5 | Indexing, query optimization, connection pooling |
271
+ | Networking | 8 | Protocol optimization, HTTP/2, gRPC |
272
+ | Network | 6 | Request batching, GraphQL, circuit breakers |
273
+ | Microservices | 4 | Service decomposition, graceful shutdown |
274
+ | Infrastructure | 4 | Alpine containers, IaC, renewable regions |
275
+ | General | 8 | Feature flags, precomputation, background jobs |
276
+ | Others | 11 | Caching, resource, data, async, code, monitoring |
277
+
278
+ ## Development
279
+
280
+ ```bash
281
+ git clone https://github.com/adam-bouafia/greenmining.git
282
+ cd greenmining
283
+ pip install -e ".[dev]"
284
+
285
+ pytest tests/
286
+ black greenmining/ tests/
287
+ ruff check greenmining/ tests/
288
+ ```
289
+
290
+ ## Requirements
291
+
292
+ - Python 3.9+
293
+ - PyGithub, PyDriller, pandas, colorama, tqdm
294
+
295
+ **Optional:**
296
+
297
+ ```bash
298
+ pip install greenmining[energy] # psutil, codecarbon
299
+ pip install greenmining[dev] # pytest, black, ruff, mypy
300
+ ```
301
+
302
+ ## License
303
+
304
+ MIT License - See [LICENSE](LICENSE) for details.
305
+
306
+ ## Links
307
+
308
+ - **GitHub**: https://github.com/adam-bouafia/greenmining
309
+ - **PyPI**: https://pypi.org/project/greenmining/
310
+ - **Documentation**: https://greenmining.readthedocs.io/
311
+ - **Docker Hub**: https://hub.docker.com/r/adambouafia/greenmining