greenmining 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +20 -0
- greenmining/__main__.py +6 -0
- greenmining/__version__.py +3 -0
- greenmining/cli.py +370 -0
- greenmining/config.py +120 -0
- greenmining/controllers/__init__.py +11 -0
- greenmining/controllers/repository_controller.py +117 -0
- greenmining/gsf_patterns.py +802 -0
- greenmining/main.py +37 -0
- greenmining/models/__init__.py +12 -0
- greenmining/models/aggregated_stats.py +30 -0
- greenmining/models/analysis_result.py +48 -0
- greenmining/models/commit.py +71 -0
- greenmining/models/repository.py +89 -0
- greenmining/presenters/__init__.py +11 -0
- greenmining/presenters/console_presenter.py +141 -0
- greenmining/services/__init__.py +13 -0
- greenmining/services/commit_extractor.py +282 -0
- greenmining/services/data_aggregator.py +442 -0
- greenmining/services/data_analyzer.py +333 -0
- greenmining/services/github_fetcher.py +266 -0
- greenmining/services/reports.py +531 -0
- greenmining/utils.py +320 -0
- greenmining-0.1.4.dist-info/METADATA +335 -0
- greenmining-0.1.4.dist-info/RECORD +29 -0
- greenmining-0.1.4.dist-info/WHEEL +5 -0
- greenmining-0.1.4.dist-info/entry_points.txt +2 -0
- greenmining-0.1.4.dist-info/licenses/LICENSE +21 -0
- greenmining-0.1.4.dist-info/top_level.txt +1 -0
greenmining/utils.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""Utility functions for green microservices mining CLI."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from functools import wraps
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Callable, Optional
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from colorama import Fore, Style, init
|
|
12
|
+
|
|
13
|
+
# Initialize colorama
|
|
14
|
+
init(autoreset=True)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def format_timestamp(dt: Optional[datetime] = None) -> str:
|
|
18
|
+
"""Format timestamp in ISO 8601 format.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
dt: Datetime object, defaults to now
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
ISO formatted timestamp string
|
|
25
|
+
"""
|
|
26
|
+
if dt is None:
|
|
27
|
+
dt = datetime.utcnow()
|
|
28
|
+
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def load_json_file(path: Path) -> dict[str, Any]:
|
|
32
|
+
"""Load JSON data from file.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
path: Path to JSON file
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Parsed JSON data
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
FileNotFoundError: If file doesn't exist
|
|
42
|
+
json.JSONDecodeError: If file is not valid JSON
|
|
43
|
+
"""
|
|
44
|
+
if not path.exists():
|
|
45
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
46
|
+
|
|
47
|
+
with open(path, encoding="utf-8") as f:
|
|
48
|
+
return json.load(f)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def save_json_file(data: dict[str, Any], path: Path, indent: int = 2) -> None:
|
|
52
|
+
"""Save data to JSON file.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
data: Data to save
|
|
56
|
+
path: Output file path
|
|
57
|
+
indent: JSON indentation level
|
|
58
|
+
"""
|
|
59
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
62
|
+
json.dump(data, f, indent=indent, ensure_ascii=False)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def load_csv_file(path: Path) -> pd.DataFrame:
|
|
66
|
+
"""Load CSV file as pandas DataFrame.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
path: Path to CSV file
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
DataFrame with CSV data
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
FileNotFoundError: If file doesn't exist
|
|
76
|
+
"""
|
|
77
|
+
if not path.exists():
|
|
78
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
79
|
+
|
|
80
|
+
return pd.read_csv(path)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def save_csv_file(df: pd.DataFrame, path: Path) -> None:
|
|
84
|
+
"""Save DataFrame to CSV file.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
df: DataFrame to save
|
|
88
|
+
path: Output file path
|
|
89
|
+
"""
|
|
90
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
df.to_csv(path, index=False, encoding="utf-8")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def estimate_tokens(text: str) -> int:
|
|
95
|
+
"""Estimate number of tokens in text.
|
|
96
|
+
|
|
97
|
+
Uses rough approximation: 1 token ≈ 4 characters
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
text: Input text
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Estimated token count
|
|
104
|
+
"""
|
|
105
|
+
return len(text) // 4
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def estimate_cost(tokens: int, model: str = "claude-sonnet-4-20250514") -> float:
|
|
109
|
+
"""Estimate API cost based on token usage.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
tokens: Number of tokens
|
|
113
|
+
model: Model name
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Estimated cost in USD
|
|
117
|
+
"""
|
|
118
|
+
# Claude Sonnet 4 pricing (as of Dec 2024)
|
|
119
|
+
# Input: $3 per million tokens
|
|
120
|
+
# Output: $15 per million tokens
|
|
121
|
+
# Average estimate: assume 50% input, 50% output
|
|
122
|
+
|
|
123
|
+
if "sonnet" in model.lower():
|
|
124
|
+
input_cost = 3.0 / 1_000_000 # per token
|
|
125
|
+
output_cost = 15.0 / 1_000_000 # per token
|
|
126
|
+
avg_cost = (input_cost + output_cost) / 2
|
|
127
|
+
return tokens * avg_cost
|
|
128
|
+
|
|
129
|
+
return 0.0
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def retry_on_exception(
|
|
133
|
+
max_retries: int = 3,
|
|
134
|
+
delay: float = 2.0,
|
|
135
|
+
exponential_backoff: bool = True,
|
|
136
|
+
exceptions: tuple = (Exception,),
|
|
137
|
+
) -> Callable:
|
|
138
|
+
"""Decorator to retry function on exception.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
max_retries: Maximum number of retry attempts
|
|
142
|
+
delay: Initial delay between retries in seconds
|
|
143
|
+
exponential_backoff: Use exponential backoff for delays
|
|
144
|
+
exceptions: Tuple of exception types to catch
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Decorated function
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def decorator(func: Callable) -> Callable:
|
|
151
|
+
@wraps(func)
|
|
152
|
+
def wrapper(*args, **kwargs):
|
|
153
|
+
current_delay = delay
|
|
154
|
+
|
|
155
|
+
for attempt in range(max_retries + 1):
|
|
156
|
+
try:
|
|
157
|
+
return func(*args, **kwargs)
|
|
158
|
+
except exceptions as e:
|
|
159
|
+
if attempt == max_retries:
|
|
160
|
+
raise
|
|
161
|
+
|
|
162
|
+
colored_print(f"Attempt {attempt + 1}/{max_retries + 1} failed: {e}", "yellow")
|
|
163
|
+
colored_print(f"Retrying in {current_delay:.1f} seconds...", "yellow")
|
|
164
|
+
|
|
165
|
+
time.sleep(current_delay)
|
|
166
|
+
|
|
167
|
+
if exponential_backoff:
|
|
168
|
+
current_delay *= 2
|
|
169
|
+
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
return wrapper
|
|
173
|
+
|
|
174
|
+
return decorator
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def colored_print(text: str, color: str = "white") -> None:
|
|
178
|
+
"""Print colored text to console.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
text: Text to print
|
|
182
|
+
color: Color name (red, green, yellow, blue, magenta, cyan, white)
|
|
183
|
+
"""
|
|
184
|
+
color_map = {
|
|
185
|
+
"red": Fore.RED,
|
|
186
|
+
"green": Fore.GREEN,
|
|
187
|
+
"yellow": Fore.YELLOW,
|
|
188
|
+
"blue": Fore.BLUE,
|
|
189
|
+
"magenta": Fore.MAGENTA,
|
|
190
|
+
"cyan": Fore.CYAN,
|
|
191
|
+
"white": Fore.WHITE,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
color_code = color_map.get(color.lower(), Fore.WHITE)
|
|
195
|
+
print(f"{color_code}{text}{Style.RESET_ALL}")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def handle_github_rate_limit(response) -> None:
|
|
199
|
+
"""Handle GitHub API rate limiting.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
response: GitHub API response object
|
|
203
|
+
|
|
204
|
+
Raises:
|
|
205
|
+
Exception: If rate limit is exceeded
|
|
206
|
+
"""
|
|
207
|
+
if hasattr(response, "status") and response.status == 403:
|
|
208
|
+
colored_print("GitHub API rate limit exceeded!", "red")
|
|
209
|
+
colored_print("Please wait or use an authenticated token.", "yellow")
|
|
210
|
+
raise Exception("GitHub API rate limit exceeded")
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def format_number(num: int) -> str:
|
|
214
|
+
"""Format large numbers with thousand separators.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
num: Number to format
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Formatted string
|
|
221
|
+
"""
|
|
222
|
+
return f"{num:,}"
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def format_percentage(value: float, decimals: int = 1) -> str:
|
|
226
|
+
"""Format percentage value.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
value: Percentage value (0-100)
|
|
230
|
+
decimals: Number of decimal places
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Formatted percentage string
|
|
234
|
+
"""
|
|
235
|
+
return f"{value:.{decimals}f}%"
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def format_duration(seconds: float) -> str:
|
|
239
|
+
"""Format duration in human-readable format.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
seconds: Duration in seconds
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Formatted duration string (e.g., "2h 15m")
|
|
246
|
+
"""
|
|
247
|
+
if seconds < 60:
|
|
248
|
+
return f"{int(seconds)}s"
|
|
249
|
+
elif seconds < 3600:
|
|
250
|
+
minutes = int(seconds / 60)
|
|
251
|
+
secs = int(seconds % 60)
|
|
252
|
+
return f"{minutes}m {secs}s"
|
|
253
|
+
else:
|
|
254
|
+
hours = int(seconds / 3600)
|
|
255
|
+
minutes = int((seconds % 3600) / 60)
|
|
256
|
+
return f"{hours}h {minutes}m"
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def truncate_text(text: str, max_length: int = 100) -> str:
|
|
260
|
+
"""Truncate text to maximum length.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
text: Input text
|
|
264
|
+
max_length: Maximum length
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Truncated text with ellipsis if needed
|
|
268
|
+
"""
|
|
269
|
+
if len(text) <= max_length:
|
|
270
|
+
return text
|
|
271
|
+
return text[: max_length - 3] + "..."
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def create_checkpoint(checkpoint_file: Path, data: dict[str, Any]) -> None:
|
|
275
|
+
"""Create checkpoint file for resuming operations.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
checkpoint_file: Path to checkpoint file
|
|
279
|
+
data: Checkpoint data
|
|
280
|
+
"""
|
|
281
|
+
save_json_file(data, checkpoint_file)
|
|
282
|
+
colored_print(f"Checkpoint saved: {checkpoint_file}", "green")
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def load_checkpoint(checkpoint_file: Path) -> Optional[dict[str, Any]]:
|
|
286
|
+
"""Load checkpoint data if exists.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
checkpoint_file: Path to checkpoint file
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Checkpoint data or None if doesn't exist
|
|
293
|
+
"""
|
|
294
|
+
if checkpoint_file.exists():
|
|
295
|
+
try:
|
|
296
|
+
return load_json_file(checkpoint_file)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
colored_print(f"Failed to load checkpoint: {e}", "yellow")
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def print_banner(title: str) -> None:
|
|
303
|
+
"""Print formatted banner.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
title: Banner title
|
|
307
|
+
"""
|
|
308
|
+
colored_print("\n" + "=" * 60, "cyan")
|
|
309
|
+
colored_print(f"🔍 {title}", "cyan")
|
|
310
|
+
colored_print("=" * 60 + "\n", "cyan")
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def print_section(title: str) -> None:
|
|
314
|
+
"""Print section header.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
title: Section title
|
|
318
|
+
"""
|
|
319
|
+
colored_print(f"\n📌 {title}", "blue")
|
|
320
|
+
colored_print("-" * 60, "blue")
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: greenmining
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: Green Software Foundation (GSF) patterns mining tool for microservices repositories
|
|
5
|
+
Author-email: Your Name <your.email@example.com>
|
|
6
|
+
Maintainer-email: Your Name <your.email@example.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/yourusername/greenmining
|
|
9
|
+
Project-URL: Documentation, https://github.com/yourusername/greenmining#readme
|
|
10
|
+
Project-URL: Repository, https://github.com/yourusername/greenmining
|
|
11
|
+
Project-URL: Issues, https://github.com/yourusername/greenmining/issues
|
|
12
|
+
Project-URL: Changelog, https://github.com/yourusername/greenmining/blob/main/CHANGELOG.md
|
|
13
|
+
Keywords: green-software,gsf,sustainability,carbon-footprint,microservices,mining,repository-analysis,energy-efficiency,github-analysis
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
26
|
+
Classifier: Operating System :: OS Independent
|
|
27
|
+
Classifier: Environment :: Console
|
|
28
|
+
Requires-Python: >=3.9
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: PyGithub>=2.1.1
|
|
32
|
+
Requires-Dist: PyDriller>=2.5
|
|
33
|
+
Requires-Dist: pandas>=2.2.0
|
|
34
|
+
Requires-Dist: click>=8.1.7
|
|
35
|
+
Requires-Dist: colorama>=0.4.6
|
|
36
|
+
Requires-Dist: tabulate>=0.9.0
|
|
37
|
+
Requires-Dist: tqdm>=4.66.0
|
|
38
|
+
Requires-Dist: matplotlib>=3.8.0
|
|
39
|
+
Requires-Dist: plotly>=5.18.0
|
|
40
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
|
|
45
|
+
Requires-Dist: black>=23.12.0; extra == "dev"
|
|
46
|
+
Requires-Dist: ruff>=0.1.9; extra == "dev"
|
|
47
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
48
|
+
Requires-Dist: build>=1.0.3; extra == "dev"
|
|
49
|
+
Requires-Dist: twine>=4.0.2; extra == "dev"
|
|
50
|
+
Provides-Extra: docs
|
|
51
|
+
Requires-Dist: sphinx>=7.2.0; extra == "docs"
|
|
52
|
+
Requires-Dist: sphinx-rtd-theme>=2.0.0; extra == "docs"
|
|
53
|
+
Requires-Dist: myst-parser>=2.0.0; extra == "docs"
|
|
54
|
+
Dynamic: license-file
|
|
55
|
+
|
|
56
|
+
# greenmining
|
|
57
|
+
|
|
58
|
+
Green mining for microservices repositories.
|
|
59
|
+
|
|
60
|
+
[](https://pypi.org/project/greenmining/)
|
|
61
|
+
[](https://pypi.org/project/greenmining/)
|
|
62
|
+
[](LICENSE)
|
|
63
|
+
|
|
64
|
+
## Overview
|
|
65
|
+
|
|
66
|
+
`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices. It detects 76 official Green Software Foundation patterns across cloud, web, AI, database, networking, and general categories.
|
|
67
|
+
|
|
68
|
+
## Features
|
|
69
|
+
|
|
70
|
+
- 🔍 **76 GSF Patterns**: Detect official Green Software Foundation patterns
|
|
71
|
+
- 📊 **Repository Mining**: Analyze 100+ microservices repositories from GitHub
|
|
72
|
+
- 📈 **Green Awareness Detection**: Identify sustainability-focused commits
|
|
73
|
+
- 📄 **Comprehensive Reports**: Generate analysis reports in multiple formats
|
|
74
|
+
- 🐳 **Docker Support**: Run in containers for consistent environments
|
|
75
|
+
- ⚡ **Fast Analysis**: Parallel processing and checkpoint system
|
|
76
|
+
|
|
77
|
+
## Installation
|
|
78
|
+
|
|
79
|
+
### Via pip
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install greenmining
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### From source
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
git clone https://github.com/adam-bouafia/greenmining.git
|
|
89
|
+
cd greenmining
|
|
90
|
+
pip install -e .
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### With Docker
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
docker pull adambouafia/greenmining:latest
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Quick Start
|
|
100
|
+
|
|
101
|
+
### CLI Usage
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Set your GitHub token
|
|
105
|
+
export GITHUB_TOKEN="your_github_token"
|
|
106
|
+
|
|
107
|
+
# Run full analysis pipeline
|
|
108
|
+
greenmining pipeline --max-repos 100
|
|
109
|
+
|
|
110
|
+
# Fetch repositories
|
|
111
|
+
greenmining fetch --max-repos 100 --min-stars 100
|
|
112
|
+
|
|
113
|
+
# Extract commits
|
|
114
|
+
greenmining extract --max-commits 50
|
|
115
|
+
|
|
116
|
+
# Analyze for green patterns
|
|
117
|
+
greenmining analyze
|
|
118
|
+
|
|
119
|
+
# Generate report
|
|
120
|
+
greenmining report
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Python API
|
|
124
|
+
|
|
125
|
+
#### Basic Pattern Detection
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
|
|
129
|
+
|
|
130
|
+
# Check available patterns
|
|
131
|
+
print(f"Total GSF patterns: {len(GSF_PATTERNS)}") # 76
|
|
132
|
+
|
|
133
|
+
# Detect green awareness in commit messages
|
|
134
|
+
commit_msg = "Optimize Redis caching to reduce energy consumption"
|
|
135
|
+
if is_green_aware(commit_msg):
|
|
136
|
+
patterns = get_pattern_by_keywords(commit_msg)
|
|
137
|
+
print(f"Matched patterns: {patterns}")
|
|
138
|
+
# Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
#### Analyze Repository Commits
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from greenmining.services.github_fetcher import GitHubFetcher
|
|
145
|
+
from greenmining.services.commit_extractor import CommitExtractor
|
|
146
|
+
from greenmining.services.data_analyzer import DataAnalyzer
|
|
147
|
+
from greenmining.config import Config
|
|
148
|
+
|
|
149
|
+
# Initialize services
|
|
150
|
+
config = Config()
|
|
151
|
+
fetcher = GitHubFetcher(config)
|
|
152
|
+
extractor = CommitExtractor(config)
|
|
153
|
+
analyzer = DataAnalyzer(config)
|
|
154
|
+
|
|
155
|
+
# Fetch repositories
|
|
156
|
+
repos = fetcher.fetch_repositories(max_repos=10, min_stars=100)
|
|
157
|
+
|
|
158
|
+
# Extract commits from first repo
|
|
159
|
+
commits = extractor.extract_commits(repos[0], max_commits=50)
|
|
160
|
+
|
|
161
|
+
# Analyze commits for green patterns
|
|
162
|
+
results = []
|
|
163
|
+
for commit in commits:
|
|
164
|
+
result = analyzer.analyze_commit(commit)
|
|
165
|
+
if result['green_aware']:
|
|
166
|
+
results.append(result)
|
|
167
|
+
print(f"Green commit found: {commit.message[:50]}...")
|
|
168
|
+
print(f" Patterns: {result['known_pattern']}")
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
#### Access GSF Patterns Data
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
from greenmining import GSF_PATTERNS
|
|
175
|
+
|
|
176
|
+
# Get all cloud patterns
|
|
177
|
+
cloud_patterns = {
|
|
178
|
+
pid: pattern for pid, pattern in GSF_PATTERNS.items()
|
|
179
|
+
if pattern['category'] == 'cloud'
|
|
180
|
+
}
|
|
181
|
+
print(f"Cloud patterns: {len(cloud_patterns)}")
|
|
182
|
+
|
|
183
|
+
# Get pattern details
|
|
184
|
+
cache_pattern = GSF_PATTERNS['gsf_001']
|
|
185
|
+
print(f"Pattern: {cache_pattern['name']}")
|
|
186
|
+
print(f"Category: {cache_pattern['category']}")
|
|
187
|
+
print(f"Keywords: {cache_pattern['keywords']}")
|
|
188
|
+
print(f"Impact: {cache_pattern['sci_impact']}")
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
#### Generate Custom Reports
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
from greenmining.services.data_aggregator import DataAggregator
|
|
195
|
+
from greenmining.config import Config
|
|
196
|
+
|
|
197
|
+
config = Config()
|
|
198
|
+
aggregator = DataAggregator(config)
|
|
199
|
+
|
|
200
|
+
# Load analysis results
|
|
201
|
+
results = aggregator.load_analysis_results()
|
|
202
|
+
|
|
203
|
+
# Generate statistics
|
|
204
|
+
stats = aggregator.calculate_statistics(results)
|
|
205
|
+
print(f"Total commits analyzed: {stats['total_commits']}")
|
|
206
|
+
print(f"Green-aware commits: {stats['green_aware_count']}")
|
|
207
|
+
print(f"Top patterns: {stats['top_patterns'][:5]}")
|
|
208
|
+
|
|
209
|
+
# Export to CSV
|
|
210
|
+
aggregator.export_to_csv(results, "output.csv")
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
#### Batch Analysis
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
from greenmining.controllers.repository_controller import RepositoryController
|
|
217
|
+
from greenmining.config import Config
|
|
218
|
+
|
|
219
|
+
config = Config()
|
|
220
|
+
controller = RepositoryController(config)
|
|
221
|
+
|
|
222
|
+
# Run full pipeline programmatically
|
|
223
|
+
controller.fetch_repositories(max_repos=50)
|
|
224
|
+
controller.extract_commits(max_commits=100)
|
|
225
|
+
controller.analyze_commits()
|
|
226
|
+
controller.aggregate_results()
|
|
227
|
+
controller.generate_report()
|
|
228
|
+
|
|
229
|
+
print("Analysis complete! Check data/ directory for results.")
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Docker Usage
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
# Run analysis pipeline
|
|
236
|
+
docker run -v $(pwd)/data:/app/data \
|
|
237
|
+
adambouafia/greenmining:latest --help
|
|
238
|
+
|
|
239
|
+
# With custom configuration
|
|
240
|
+
docker run -v $(pwd)/.env:/app/.env:ro \
|
|
241
|
+
-v $(pwd)/data:/app/data \
|
|
242
|
+
adambouafia/greenmining:latest pipeline --max-repos 50
|
|
243
|
+
|
|
244
|
+
# Interactive shell
|
|
245
|
+
docker run -it adambouafia/greenmining:latest /bin/bash
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Configuration
|
|
249
|
+
|
|
250
|
+
Create a `.env` file or set environment variables:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
GITHUB_TOKEN=your_github_personal_access_token
|
|
254
|
+
MAX_REPOS=100
|
|
255
|
+
COMMITS_PER_REPO=50
|
|
256
|
+
OUTPUT_DIR=./data
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## GSF Pattern Categories
|
|
260
|
+
|
|
261
|
+
- **Cloud** (40 patterns): Autoscaling, serverless, right-sizing, region selection
|
|
262
|
+
- **Web** (15 patterns): CDN, caching, lazy loading, compression
|
|
263
|
+
- **AI/ML** (8 patterns): Model optimization, pruning, quantization
|
|
264
|
+
- **Database** (6 patterns): Indexing, query optimization, connection pooling
|
|
265
|
+
- **Networking** (4 patterns): Protocol optimization, connection reuse
|
|
266
|
+
- **General** (3 patterns): Code efficiency, resource management
|
|
267
|
+
|
|
268
|
+
## CLI Commands
|
|
269
|
+
|
|
270
|
+
| Command | Description |
|
|
271
|
+
|---------|-------------|
|
|
272
|
+
| `fetch` | Fetch microservices repositories from GitHub |
|
|
273
|
+
| `extract` | Extract commit history from repositories |
|
|
274
|
+
| `analyze` | Analyze commits for green patterns |
|
|
275
|
+
| `aggregate` | Aggregate analysis results |
|
|
276
|
+
| `report` | Generate comprehensive report |
|
|
277
|
+
| `pipeline` | Run complete analysis pipeline |
|
|
278
|
+
| `status` | Show current analysis status |
|
|
279
|
+
|
|
280
|
+
## Output Files
|
|
281
|
+
|
|
282
|
+
All outputs are saved to the `data/` directory:
|
|
283
|
+
|
|
284
|
+
- `repositories.json` - Repository metadata
|
|
285
|
+
- `commits.json` - Extracted commit data
|
|
286
|
+
- `analysis_results.json` - Pattern analysis results
|
|
287
|
+
- `aggregated_statistics.json` - Summary statistics
|
|
288
|
+
- `green_analysis_results.csv` - CSV export for spreadsheets
|
|
289
|
+
- `green_microservices_analysis.md` - Final report
|
|
290
|
+
|
|
291
|
+
## Development
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
# Clone repository
|
|
295
|
+
git clone https://github.com/adam-bouafia/greenmining.git
|
|
296
|
+
cd greenmining
|
|
297
|
+
|
|
298
|
+
# Install development dependencies
|
|
299
|
+
pip install -e ".[dev]"
|
|
300
|
+
|
|
301
|
+
# Run tests
|
|
302
|
+
pytest tests/
|
|
303
|
+
|
|
304
|
+
# Run with coverage
|
|
305
|
+
pytest --cov=greenmining tests/
|
|
306
|
+
|
|
307
|
+
# Format code
|
|
308
|
+
black greenmining/ tests/
|
|
309
|
+
ruff check greenmining/ tests/
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## Requirements
|
|
313
|
+
|
|
314
|
+
- Python 3.9+
|
|
315
|
+
- PyGithub >= 2.1.1
|
|
316
|
+
- PyDriller >= 2.5
|
|
317
|
+
- pandas >= 2.2.0
|
|
318
|
+
- click >= 8.1.7
|
|
319
|
+
|
|
320
|
+
## License
|
|
321
|
+
|
|
322
|
+
MIT License - See [LICENSE](LICENSE) for details.
|
|
323
|
+
|
|
324
|
+
## Contributing
|
|
325
|
+
|
|
326
|
+
Contributions are welcome! Please open an issue or submit a pull request.
|
|
327
|
+
|
|
328
|
+
## Links
|
|
329
|
+
|
|
330
|
+
- **GitHub**: https://github.com/adam-bouafia/greenmining
|
|
331
|
+
- **PyPI**: https://pypi.org/project/greenmining/
|
|
332
|
+
- **Docker Hub**: https://hub.docker.com/r/adambouafia/greenmining
|
|
333
|
+
- **Documentation**: https://github.com/adam-bouafia/greenmining#readme
|
|
334
|
+
|
|
335
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
greenmining/__init__.py,sha256=ITaqGeXxagpd_NwAF68-WFLmWVP4iNeP6t4hici3ktA,395
|
|
2
|
+
greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
|
|
3
|
+
greenmining/__version__.py,sha256=YXwH4HqFWIba-TCs8Y_eGeQcHAjoSopf1uNTJSxiheI,66
|
|
4
|
+
greenmining/cli.py,sha256=11DEE9bwKDIzj8CbR4-B8re_1cmywPo1CyLGWVGzF9U,13254
|
|
5
|
+
greenmining/config.py,sha256=jTWEIHIwRiQmdoGV5iCILT_nPTka6ZX1DL5ltTIzWJ0,4004
|
|
6
|
+
greenmining/gsf_patterns.py,sha256=jwQ7WZLVLQ-9bqR4W_VYc3HZBC_c1sm0ECDazsfxv-I,29903
|
|
7
|
+
greenmining/main.py,sha256=h8J9OcwyGpVJ-gjSFUS2SZExQQlHV0eDMMjAoI_sgAo,952
|
|
8
|
+
greenmining/utils.py,sha256=-pL8yznf1jSazBMk1ugjPQbtFOQI1E9wRI1NJbHl2xs,7941
|
|
9
|
+
greenmining/controllers/__init__.py,sha256=y-W1Xnnhm4JnrY2QEo5osK8jQs7hpxXovVbHlE334F0,279
|
|
10
|
+
greenmining/controllers/repository_controller.py,sha256=q2JO5LtmpWRbNzVrE9lFBhHU0kcDSlkG3TVIB6mYqGY,4278
|
|
11
|
+
greenmining/models/__init__.py,sha256=K8udzQW2V5LqPowIm5aCiK07LxJZxCt_oW3gz5Qi-mc,397
|
|
12
|
+
greenmining/models/aggregated_stats.py,sha256=SysZD7ZeyvOG4Qsq9B_JdMjI5NpKCNKC63sJ6-s2k2M,984
|
|
13
|
+
greenmining/models/analysis_result.py,sha256=-6hwmickqncRXDGWM3aXBEaOGlddM5G6hnmRTyHFcMs,1525
|
|
14
|
+
greenmining/models/commit.py,sha256=9-PbZmHSyorD1ed13rFkrT9u5XOG7SBT5Nowlr82-PE,2399
|
|
15
|
+
greenmining/models/repository.py,sha256=lpe9Pte6KPCcRvx0aOH16v2PiH3NwjPeQRJYxriKnns,2834
|
|
16
|
+
greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
|
|
17
|
+
greenmining/presenters/console_presenter.py,sha256=jK_8agdEz-_2mqoyMNht-mNA9hXWe9EA8VlAUT_XFxA,5299
|
|
18
|
+
greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
|
|
19
|
+
greenmining/services/commit_extractor.py,sha256=IxON_s6p9Rp4JJN8Q8T0bMLxBtatN4W7bCtk72snBSI,9900
|
|
20
|
+
greenmining/services/data_aggregator.py,sha256=8yb70_lwT85Cn8jVDLUrEZXcGr44UKy8UEFTHbAebZg,16250
|
|
21
|
+
greenmining/services/data_analyzer.py,sha256=ejvfKoG19D1U-b_RBne3e66h2yF4k05gyv3BLnZB9_k,11856
|
|
22
|
+
greenmining/services/github_fetcher.py,sha256=9aHSbZoA8BWL1Cp0cCv2NltXf0Jr7W_mO5d_-7TuOvY,9294
|
|
23
|
+
greenmining/services/reports.py,sha256=cE7XvB2ihD5KwrO4W1Uj_I1h5pELBPF85MjgGFzkgOQ,21829
|
|
24
|
+
greenmining-0.1.4.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
25
|
+
greenmining-0.1.4.dist-info/METADATA,sha256=I8cXBLwYVRogUrDNUQtoidiL3zgip5PqwvszhvmnOLA,9892
|
|
26
|
+
greenmining-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
+
greenmining-0.1.4.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
|
|
28
|
+
greenmining-0.1.4.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
29
|
+
greenmining-0.1.4.dist-info/RECORD,,
|