greenmining 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +1 -1
- greenmining/services/commit_extractor.py +69 -13
- greenmining/services/data_aggregator.py +30 -21
- greenmining/services/data_analyzer.py +14 -9
- {greenmining-0.1.7.dist-info → greenmining-0.1.9.dist-info}/METADATA +6 -6
- {greenmining-0.1.7.dist-info → greenmining-0.1.9.dist-info}/RECORD +10 -10
- {greenmining-0.1.7.dist-info → greenmining-0.1.9.dist-info}/WHEEL +0 -0
- {greenmining-0.1.7.dist-info → greenmining-0.1.9.dist-info}/entry_points.txt +0 -0
- {greenmining-0.1.7.dist-info → greenmining-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {greenmining-0.1.7.dist-info → greenmining-0.1.9.dist-info}/top_level.txt +0 -0
greenmining/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
import click
|
|
9
|
-
from
|
|
9
|
+
from github import Github
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
|
|
12
12
|
from greenmining.config import get_config
|
|
@@ -21,20 +21,28 @@ from greenmining.utils import (
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class CommitExtractor:
|
|
24
|
-
"""Extracts commit data from repositories."""
|
|
25
|
-
|
|
26
|
-
def __init__(
|
|
24
|
+
"""Extracts commit data from repositories using GitHub API."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
max_commits: int = 50,
|
|
29
|
+
skip_merges: bool = True,
|
|
30
|
+
days_back: int = 730,
|
|
31
|
+
github_token: str | None = None,
|
|
32
|
+
):
|
|
27
33
|
"""Initialize commit extractor.
|
|
28
34
|
|
|
29
35
|
Args:
|
|
30
36
|
max_commits: Maximum commits per repository
|
|
31
37
|
skip_merges: Skip merge commits
|
|
32
38
|
days_back: Only analyze commits from last N days
|
|
39
|
+
github_token: GitHub API token (optional)
|
|
33
40
|
"""
|
|
34
41
|
self.max_commits = max_commits
|
|
35
42
|
self.skip_merges = skip_merges
|
|
36
43
|
self.days_back = days_back
|
|
37
44
|
self.cutoff_date = datetime.now() - timedelta(days=days_back)
|
|
45
|
+
self.github = Github(github_token) if github_token else None
|
|
38
46
|
|
|
39
47
|
def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
40
48
|
"""Extract commits from list of repositories.
|
|
@@ -77,7 +85,7 @@ class CommitExtractor:
|
|
|
77
85
|
|
|
78
86
|
@retry_on_exception(max_retries=2, delay=5.0, exceptions=(Exception,))
|
|
79
87
|
def _extract_repo_commits(self, repo: dict[str, Any]) -> list[dict[str, Any]]:
|
|
80
|
-
"""Extract commits from a single repository.
|
|
88
|
+
"""Extract commits from a single repository using GitHub API.
|
|
81
89
|
|
|
82
90
|
Args:
|
|
83
91
|
repo: Repository metadata dictionary
|
|
@@ -86,27 +94,35 @@ class CommitExtractor:
|
|
|
86
94
|
List of commit dictionaries
|
|
87
95
|
"""
|
|
88
96
|
commits = []
|
|
89
|
-
repo_url = repo["clone_url"]
|
|
90
97
|
repo_name = repo["full_name"]
|
|
91
98
|
|
|
92
99
|
try:
|
|
93
|
-
#
|
|
94
|
-
|
|
100
|
+
# Get repository from GitHub API
|
|
101
|
+
if not self.github:
|
|
102
|
+
config = get_config()
|
|
103
|
+
self.github = Github(config.GITHUB_TOKEN)
|
|
95
104
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
105
|
+
gh_repo = self.github.get_repo(repo_name)
|
|
106
|
+
|
|
107
|
+
# Get recent commits (GitHub API returns in reverse chronological order)
|
|
108
|
+
commit_count = 0
|
|
99
109
|
|
|
110
|
+
for commit in gh_repo.get_commits():
|
|
100
111
|
# Skip if reached max commits
|
|
101
112
|
if commit_count >= self.max_commits:
|
|
102
113
|
break
|
|
103
114
|
|
|
115
|
+
# Skip merge commits if requested
|
|
116
|
+
if self.skip_merges and len(commit.parents) > 1:
|
|
117
|
+
continue
|
|
118
|
+
|
|
104
119
|
# Skip trivial commits
|
|
105
|
-
|
|
120
|
+
commit_msg = commit.commit.message
|
|
121
|
+
if not commit_msg or len(commit_msg.strip()) < 10:
|
|
106
122
|
continue
|
|
107
123
|
|
|
108
124
|
# Extract commit data
|
|
109
|
-
commit_data = self.
|
|
125
|
+
commit_data = self._extract_commit_metadata_from_github(commit, repo_name)
|
|
110
126
|
commits.append(commit_data)
|
|
111
127
|
commit_count += 1
|
|
112
128
|
|
|
@@ -158,6 +174,46 @@ class CommitExtractor:
|
|
|
158
174
|
"in_main_branch": commit.in_main_branch if hasattr(commit, "in_main_branch") else True,
|
|
159
175
|
}
|
|
160
176
|
|
|
177
|
+
def _extract_commit_metadata_from_github(self, commit, repo_name: str) -> dict[str, Any]:
|
|
178
|
+
"""Extract metadata from GitHub API commit object.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
commit: GitHub API commit object
|
|
182
|
+
repo_name: Repository name
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Dictionary with commit metadata
|
|
186
|
+
"""
|
|
187
|
+
# Get modified files and stats
|
|
188
|
+
files_changed = []
|
|
189
|
+
lines_added = 0
|
|
190
|
+
lines_deleted = 0
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
for file in commit.files:
|
|
194
|
+
files_changed.append(file.filename)
|
|
195
|
+
lines_added += file.additions
|
|
196
|
+
lines_deleted += file.deletions
|
|
197
|
+
except Exception:
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
"commit_id": commit.sha,
|
|
202
|
+
"repo_name": repo_name,
|
|
203
|
+
"date": commit.commit.committer.date.isoformat(),
|
|
204
|
+
"author": commit.commit.author.name,
|
|
205
|
+
"author_email": commit.commit.author.email,
|
|
206
|
+
"message": commit.commit.message.strip(),
|
|
207
|
+
"files_changed": files_changed[:20], # Limit to 20 files
|
|
208
|
+
"lines_added": lines_added,
|
|
209
|
+
"lines_deleted": lines_deleted,
|
|
210
|
+
"insertions": lines_added,
|
|
211
|
+
"deletions": lines_deleted,
|
|
212
|
+
"is_merge": len(commit.parents) > 1,
|
|
213
|
+
"branches": [],
|
|
214
|
+
"in_main_branch": True,
|
|
215
|
+
}
|
|
216
|
+
|
|
161
217
|
def save_results(self, commits: list[dict[str, Any]], output_file: Path, repos_count: int):
|
|
162
218
|
"""Save extracted commits to JSON file.
|
|
163
219
|
|
|
@@ -72,7 +72,7 @@ class DataAggregator:
|
|
|
72
72
|
green_aware_count = sum(1 for r in results if r.get("green_aware", False))
|
|
73
73
|
|
|
74
74
|
# Count repos with at least one green commit
|
|
75
|
-
repos_with_green = len({r["
|
|
75
|
+
repos_with_green = len({r["repository"] for r in results if r.get("green_aware", False)})
|
|
76
76
|
|
|
77
77
|
return {
|
|
78
78
|
"total_commits": total_commits,
|
|
@@ -91,17 +91,24 @@ class DataAggregator:
|
|
|
91
91
|
)
|
|
92
92
|
|
|
93
93
|
for result in results:
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
94
|
+
# Handle both gsf_patterns_matched (list) and known_pattern (string)
|
|
95
|
+
patterns = result.get("gsf_patterns_matched", [])
|
|
96
|
+
if not patterns: # Fallback to old format
|
|
97
|
+
pattern = result.get("known_pattern")
|
|
98
|
+
if pattern and pattern != "NONE DETECTED":
|
|
99
|
+
patterns = [pattern]
|
|
100
|
+
|
|
101
|
+
confidence = result.get("confidence", result.get("pattern_confidence", "low")).upper()
|
|
102
|
+
|
|
103
|
+
for pattern in patterns:
|
|
98
104
|
pattern_data[pattern]["count"] += 1
|
|
99
105
|
if confidence in ["HIGH", "MEDIUM", "LOW"]:
|
|
100
106
|
pattern_data[pattern][confidence] += 1
|
|
101
107
|
|
|
102
108
|
# Store example commits (max 3)
|
|
103
109
|
if len(pattern_data[pattern]["example_commits"]) < 3:
|
|
104
|
-
|
|
110
|
+
commit_id = result.get("commit_hash", result.get("commit_id", "unknown"))
|
|
111
|
+
pattern_data[pattern]["example_commits"].append(commit_id)
|
|
105
112
|
|
|
106
113
|
# Convert to list format
|
|
107
114
|
patterns_list = []
|
|
@@ -153,15 +160,21 @@ class DataAggregator:
|
|
|
153
160
|
|
|
154
161
|
# Group commits by repository
|
|
155
162
|
for result in results:
|
|
156
|
-
repo_commits[result["
|
|
163
|
+
repo_commits[result["repository"]].append(result)
|
|
157
164
|
|
|
158
165
|
# Calculate stats for each repo
|
|
159
166
|
repo_stats = []
|
|
160
167
|
for repo_name, commits in repo_commits.items():
|
|
161
168
|
green_commits = [c for c in commits if c.get("green_aware", False)]
|
|
162
|
-
patterns
|
|
163
|
-
|
|
164
|
-
|
|
169
|
+
# Get all patterns from commits (gsf_patterns_matched is a list)
|
|
170
|
+
patterns = []
|
|
171
|
+
for c in commits:
|
|
172
|
+
patterns_list = c.get("gsf_patterns_matched", [])
|
|
173
|
+
if not patterns_list: # Fallback
|
|
174
|
+
pattern = c.get("known_pattern")
|
|
175
|
+
if pattern and pattern != "NONE DETECTED":
|
|
176
|
+
patterns_list = [pattern]
|
|
177
|
+
patterns.extend(patterns_list)
|
|
165
178
|
unique_patterns = list(set(patterns))
|
|
166
179
|
|
|
167
180
|
repo_stats.append(
|
|
@@ -191,7 +204,7 @@ class DataAggregator:
|
|
|
191
204
|
# Group commits by language
|
|
192
205
|
language_commits = defaultdict(list)
|
|
193
206
|
for result in results:
|
|
194
|
-
language = repo_language_map.get(result["
|
|
207
|
+
language = repo_language_map.get(result["repository"], "Unknown")
|
|
195
208
|
language_commits[language].append(result)
|
|
196
209
|
|
|
197
210
|
# Calculate stats for each language
|
|
@@ -239,18 +252,14 @@ class DataAggregator:
|
|
|
239
252
|
for result in analysis_results:
|
|
240
253
|
csv_data.append(
|
|
241
254
|
{
|
|
242
|
-
"
|
|
243
|
-
"repo_name": result
|
|
255
|
+
"commit_hash": result.get("commit_hash", result.get("commit_id", "")),
|
|
256
|
+
"repo_name": result.get("repository", ""),
|
|
244
257
|
"date": result.get("date", ""),
|
|
245
|
-
"
|
|
258
|
+
"message": result.get("message", "")[:200], # Truncate
|
|
246
259
|
"green_aware": result.get("green_aware", False),
|
|
247
|
-
"
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
else ""
|
|
251
|
-
),
|
|
252
|
-
"known_pattern": result.get("known_pattern", ""),
|
|
253
|
-
"pattern_confidence": result.get("pattern_confidence", ""),
|
|
260
|
+
"gsf_patterns": ", ".join(result.get("gsf_patterns_matched", [])),
|
|
261
|
+
"pattern_count": result.get("pattern_count", 0),
|
|
262
|
+
"confidence": result.get("confidence", ""),
|
|
254
263
|
"lines_added": result.get("lines_added", 0),
|
|
255
264
|
"lines_deleted": result.get("lines_deleted", 0),
|
|
256
265
|
}
|
|
@@ -114,8 +114,8 @@ class DataAnalyzer:
|
|
|
114
114
|
return {
|
|
115
115
|
"commit_hash": commit.get("hash", commit.get("commit_id", "unknown")),
|
|
116
116
|
"repository": commit.get("repository", commit.get("repo_name", "unknown")),
|
|
117
|
-
"author": commit.get("author_name", "unknown"),
|
|
118
|
-
"date": commit.get("
|
|
117
|
+
"author": commit.get("author", commit.get("author_name", "unknown")),
|
|
118
|
+
"date": commit.get("date", commit.get("author_date", "unknown")),
|
|
119
119
|
"message": message,
|
|
120
120
|
# Research Question 1: Green awareness
|
|
121
121
|
"green_aware": green_aware,
|
|
@@ -125,10 +125,9 @@ class DataAnalyzer:
|
|
|
125
125
|
"pattern_details": pattern_details,
|
|
126
126
|
"confidence": confidence,
|
|
127
127
|
# Additional metadata
|
|
128
|
-
"files_modified": commit.get("
|
|
129
|
-
"insertions": commit.get("
|
|
130
|
-
"deletions": commit.get("
|
|
131
|
-
"lines_deleted": commit.get("lines_deleted", 0),
|
|
128
|
+
"files_modified": commit.get("files_changed", commit.get("modified_files", [])),
|
|
129
|
+
"insertions": commit.get("lines_added", commit.get("insertions", 0)),
|
|
130
|
+
"deletions": commit.get("lines_deleted", commit.get("deletions", 0)),
|
|
132
131
|
}
|
|
133
132
|
|
|
134
133
|
def _check_green_awareness(self, message: str, files: list[str]) -> tuple[bool, Optional[str]]:
|
|
@@ -205,9 +204,15 @@ class DataAnalyzer:
|
|
|
205
204
|
"""
|
|
206
205
|
# Calculate summary statistics
|
|
207
206
|
green_aware_count = sum(1 for r in results if r["green_aware"])
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
207
|
+
|
|
208
|
+
# Count all matched patterns (results have gsf_patterns_matched which is a list)
|
|
209
|
+
all_patterns = []
|
|
210
|
+
for r in results:
|
|
211
|
+
patterns = r.get("gsf_patterns_matched", [])
|
|
212
|
+
if patterns: # If there are matched patterns
|
|
213
|
+
all_patterns.extend(patterns)
|
|
214
|
+
|
|
215
|
+
pattern_counts = Counter(all_patterns)
|
|
211
216
|
|
|
212
217
|
data = {
|
|
213
218
|
"metadata": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.9
|
|
4
|
+
Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
|
|
5
5
|
Author-email: Your Name <your.email@example.com>
|
|
6
6
|
Maintainer-email: Your Name <your.email@example.com>
|
|
7
7
|
License: MIT
|
|
@@ -63,11 +63,11 @@ Green mining for microservices repositories.
|
|
|
63
63
|
|
|
64
64
|
## Overview
|
|
65
65
|
|
|
66
|
-
`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices. It detects 76
|
|
66
|
+
`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects 76 sustainable software patterns across cloud, web, AI, database, networking, and general categories.
|
|
67
67
|
|
|
68
68
|
## Features
|
|
69
69
|
|
|
70
|
-
- 🔍 **76
|
|
70
|
+
- 🔍 **76 Sustainability Patterns**: Detect energy-efficient and environmentally conscious coding practices
|
|
71
71
|
- 📊 **Repository Mining**: Analyze 100+ microservices repositories from GitHub
|
|
72
72
|
- 📈 **Green Awareness Detection**: Identify sustainability-focused commits
|
|
73
73
|
- 📄 **Comprehensive Reports**: Generate analysis reports in multiple formats
|
|
@@ -128,7 +128,7 @@ greenmining report
|
|
|
128
128
|
from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
|
|
129
129
|
|
|
130
130
|
# Check available patterns
|
|
131
|
-
print(f"Total
|
|
131
|
+
print(f"Total patterns: {len(GSF_PATTERNS)}") # 76
|
|
132
132
|
|
|
133
133
|
# Detect green awareness in commit messages
|
|
134
134
|
commit_msg = "Optimize Redis caching to reduce energy consumption"
|
|
@@ -168,7 +168,7 @@ for commit in commits:
|
|
|
168
168
|
print(f" Patterns: {result['known_pattern']}")
|
|
169
169
|
```
|
|
170
170
|
|
|
171
|
-
#### Access
|
|
171
|
+
#### Access Sustainability Patterns Data
|
|
172
172
|
|
|
173
173
|
```python
|
|
174
174
|
from greenmining import GSF_PATTERNS
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256
|
|
1
|
+
greenmining/__init__.py,sha256=-pLhkoePJtUXVyzBfTmT1iW56AvFgmNwf54hQckdYI8,395
|
|
2
2
|
greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
|
|
3
3
|
greenmining/__version__.py,sha256=Hry6u6QztktMYf7nqf0jPXFaA0b7lmr6pjdAaVRXDaE,66
|
|
4
4
|
greenmining/cli.py,sha256=11DEE9bwKDIzj8CbR4-B8re_1cmywPo1CyLGWVGzF9U,13254
|
|
@@ -16,14 +16,14 @@ greenmining/models/repository.py,sha256=lpe9Pte6KPCcRvx0aOH16v2PiH3NwjPeQRJYxriK
|
|
|
16
16
|
greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
|
|
17
17
|
greenmining/presenters/console_presenter.py,sha256=jK_8agdEz-_2mqoyMNht-mNA9hXWe9EA8VlAUT_XFxA,5299
|
|
18
18
|
greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
|
|
19
|
-
greenmining/services/commit_extractor.py,sha256=
|
|
20
|
-
greenmining/services/data_aggregator.py,sha256=
|
|
21
|
-
greenmining/services/data_analyzer.py,sha256=
|
|
19
|
+
greenmining/services/commit_extractor.py,sha256=XB7Y1HKeQ4OpgEz0yAjKDPdiQcq07QCQ5Xrx9AxGfrM,11814
|
|
20
|
+
greenmining/services/data_aggregator.py,sha256=eXAHrzpafLJ14HRSFy70TCuamQpi43C6KcP2cG5IBGU,16877
|
|
21
|
+
greenmining/services/data_analyzer.py,sha256=RHHxw2y-thjCtVEL_GmvPJPWdWNa-C6jMzjy5QZk4eI,12051
|
|
22
22
|
greenmining/services/github_fetcher.py,sha256=9aHSbZoA8BWL1Cp0cCv2NltXf0Jr7W_mO5d_-7TuOvY,9294
|
|
23
23
|
greenmining/services/reports.py,sha256=cE7XvB2ihD5KwrO4W1Uj_I1h5pELBPF85MjgGFzkgOQ,21829
|
|
24
|
-
greenmining-0.1.
|
|
25
|
-
greenmining-0.1.
|
|
26
|
-
greenmining-0.1.
|
|
27
|
-
greenmining-0.1.
|
|
28
|
-
greenmining-0.1.
|
|
29
|
-
greenmining-0.1.
|
|
24
|
+
greenmining-0.1.9.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
25
|
+
greenmining-0.1.9.dist-info/METADATA,sha256=momgAH0mimUN2PjbgixNXXlraeP8unuIx2sgbyQT2ks,9969
|
|
26
|
+
greenmining-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
+
greenmining-0.1.9.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
|
|
28
|
+
greenmining-0.1.9.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
29
|
+
greenmining-0.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|