greenmining 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +1 -1
- greenmining/services/commit_extractor.py +69 -13
- {greenmining-0.1.7.dist-info → greenmining-0.1.8.dist-info}/METADATA +6 -6
- {greenmining-0.1.7.dist-info → greenmining-0.1.8.dist-info}/RECORD +8 -8
- {greenmining-0.1.7.dist-info → greenmining-0.1.8.dist-info}/WHEEL +0 -0
- {greenmining-0.1.7.dist-info → greenmining-0.1.8.dist-info}/entry_points.txt +0 -0
- {greenmining-0.1.7.dist-info → greenmining-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {greenmining-0.1.7.dist-info → greenmining-0.1.8.dist-info}/top_level.txt +0 -0
greenmining/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
import click
|
|
9
|
-
from
|
|
9
|
+
from github import Github
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
|
|
12
12
|
from greenmining.config import get_config
|
|
@@ -21,20 +21,28 @@ from greenmining.utils import (
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class CommitExtractor:
|
|
24
|
-
"""Extracts commit data from repositories."""
|
|
25
|
-
|
|
26
|
-
def __init__(
|
|
24
|
+
"""Extracts commit data from repositories using GitHub API."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
max_commits: int = 50,
|
|
29
|
+
skip_merges: bool = True,
|
|
30
|
+
days_back: int = 730,
|
|
31
|
+
github_token: str | None = None,
|
|
32
|
+
):
|
|
27
33
|
"""Initialize commit extractor.
|
|
28
34
|
|
|
29
35
|
Args:
|
|
30
36
|
max_commits: Maximum commits per repository
|
|
31
37
|
skip_merges: Skip merge commits
|
|
32
38
|
days_back: Only analyze commits from last N days
|
|
39
|
+
github_token: GitHub API token (optional)
|
|
33
40
|
"""
|
|
34
41
|
self.max_commits = max_commits
|
|
35
42
|
self.skip_merges = skip_merges
|
|
36
43
|
self.days_back = days_back
|
|
37
44
|
self.cutoff_date = datetime.now() - timedelta(days=days_back)
|
|
45
|
+
self.github = Github(github_token) if github_token else None
|
|
38
46
|
|
|
39
47
|
def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
40
48
|
"""Extract commits from list of repositories.
|
|
@@ -77,7 +85,7 @@ class CommitExtractor:
|
|
|
77
85
|
|
|
78
86
|
@retry_on_exception(max_retries=2, delay=5.0, exceptions=(Exception,))
|
|
79
87
|
def _extract_repo_commits(self, repo: dict[str, Any]) -> list[dict[str, Any]]:
|
|
80
|
-
"""Extract commits from a single repository.
|
|
88
|
+
"""Extract commits from a single repository using GitHub API.
|
|
81
89
|
|
|
82
90
|
Args:
|
|
83
91
|
repo: Repository metadata dictionary
|
|
@@ -86,27 +94,35 @@ class CommitExtractor:
|
|
|
86
94
|
List of commit dictionaries
|
|
87
95
|
"""
|
|
88
96
|
commits = []
|
|
89
|
-
repo_url = repo["clone_url"]
|
|
90
97
|
repo_name = repo["full_name"]
|
|
91
98
|
|
|
92
99
|
try:
|
|
93
|
-
#
|
|
94
|
-
|
|
100
|
+
# Get repository from GitHub API
|
|
101
|
+
if not self.github:
|
|
102
|
+
config = get_config()
|
|
103
|
+
self.github = Github(config.GITHUB_TOKEN)
|
|
95
104
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
105
|
+
gh_repo = self.github.get_repo(repo_name)
|
|
106
|
+
|
|
107
|
+
# Get recent commits (GitHub API returns in reverse chronological order)
|
|
108
|
+
commit_count = 0
|
|
99
109
|
|
|
110
|
+
for commit in gh_repo.get_commits():
|
|
100
111
|
# Skip if reached max commits
|
|
101
112
|
if commit_count >= self.max_commits:
|
|
102
113
|
break
|
|
103
114
|
|
|
115
|
+
# Skip merge commits if requested
|
|
116
|
+
if self.skip_merges and len(commit.parents) > 1:
|
|
117
|
+
continue
|
|
118
|
+
|
|
104
119
|
# Skip trivial commits
|
|
105
|
-
|
|
120
|
+
commit_msg = commit.commit.message
|
|
121
|
+
if not commit_msg or len(commit_msg.strip()) < 10:
|
|
106
122
|
continue
|
|
107
123
|
|
|
108
124
|
# Extract commit data
|
|
109
|
-
commit_data = self.
|
|
125
|
+
commit_data = self._extract_commit_metadata_from_github(commit, repo_name)
|
|
110
126
|
commits.append(commit_data)
|
|
111
127
|
commit_count += 1
|
|
112
128
|
|
|
@@ -158,6 +174,46 @@ class CommitExtractor:
|
|
|
158
174
|
"in_main_branch": commit.in_main_branch if hasattr(commit, "in_main_branch") else True,
|
|
159
175
|
}
|
|
160
176
|
|
|
177
|
+
def _extract_commit_metadata_from_github(self, commit, repo_name: str) -> dict[str, Any]:
|
|
178
|
+
"""Extract metadata from GitHub API commit object.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
commit: GitHub API commit object
|
|
182
|
+
repo_name: Repository name
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Dictionary with commit metadata
|
|
186
|
+
"""
|
|
187
|
+
# Get modified files and stats
|
|
188
|
+
files_changed = []
|
|
189
|
+
lines_added = 0
|
|
190
|
+
lines_deleted = 0
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
for file in commit.files:
|
|
194
|
+
files_changed.append(file.filename)
|
|
195
|
+
lines_added += file.additions
|
|
196
|
+
lines_deleted += file.deletions
|
|
197
|
+
except Exception:
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
"commit_id": commit.sha,
|
|
202
|
+
"repo_name": repo_name,
|
|
203
|
+
"date": commit.commit.committer.date.isoformat(),
|
|
204
|
+
"author": commit.commit.author.name,
|
|
205
|
+
"author_email": commit.commit.author.email,
|
|
206
|
+
"message": commit.commit.message.strip(),
|
|
207
|
+
"files_changed": files_changed[:20], # Limit to 20 files
|
|
208
|
+
"lines_added": lines_added,
|
|
209
|
+
"lines_deleted": lines_deleted,
|
|
210
|
+
"insertions": lines_added,
|
|
211
|
+
"deletions": lines_deleted,
|
|
212
|
+
"is_merge": len(commit.parents) > 1,
|
|
213
|
+
"branches": [],
|
|
214
|
+
"in_main_branch": True,
|
|
215
|
+
}
|
|
216
|
+
|
|
161
217
|
def save_results(self, commits: list[dict[str, Any]], output_file: Path, repos_count: int):
|
|
162
218
|
"""Save extracted commits to JSON file.
|
|
163
219
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.8
|
|
4
|
+
Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
|
|
5
5
|
Author-email: Your Name <your.email@example.com>
|
|
6
6
|
Maintainer-email: Your Name <your.email@example.com>
|
|
7
7
|
License: MIT
|
|
@@ -63,11 +63,11 @@ Green mining for microservices repositories.
|
|
|
63
63
|
|
|
64
64
|
## Overview
|
|
65
65
|
|
|
66
|
-
`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices. It detects 76
|
|
66
|
+
`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects 76 sustainable software patterns across cloud, web, AI, database, networking, and general categories.
|
|
67
67
|
|
|
68
68
|
## Features
|
|
69
69
|
|
|
70
|
-
- 🔍 **76
|
|
70
|
+
- 🔍 **76 Sustainability Patterns**: Detect energy-efficient and environmentally conscious coding practices
|
|
71
71
|
- 📊 **Repository Mining**: Analyze 100+ microservices repositories from GitHub
|
|
72
72
|
- 📈 **Green Awareness Detection**: Identify sustainability-focused commits
|
|
73
73
|
- 📄 **Comprehensive Reports**: Generate analysis reports in multiple formats
|
|
@@ -128,7 +128,7 @@ greenmining report
|
|
|
128
128
|
from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
|
|
129
129
|
|
|
130
130
|
# Check available patterns
|
|
131
|
-
print(f"Total
|
|
131
|
+
print(f"Total patterns: {len(GSF_PATTERNS)}") # 76
|
|
132
132
|
|
|
133
133
|
# Detect green awareness in commit messages
|
|
134
134
|
commit_msg = "Optimize Redis caching to reduce energy consumption"
|
|
@@ -168,7 +168,7 @@ for commit in commits:
|
|
|
168
168
|
print(f" Patterns: {result['known_pattern']}")
|
|
169
169
|
```
|
|
170
170
|
|
|
171
|
-
#### Access
|
|
171
|
+
#### Access Sustainability Patterns Data
|
|
172
172
|
|
|
173
173
|
```python
|
|
174
174
|
from greenmining import GSF_PATTERNS
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256=
|
|
1
|
+
greenmining/__init__.py,sha256=M4E_cikw37tzcNodowPaRe60fk5FuAu9Exw6QDSAmQ4,395
|
|
2
2
|
greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
|
|
3
3
|
greenmining/__version__.py,sha256=Hry6u6QztktMYf7nqf0jPXFaA0b7lmr6pjdAaVRXDaE,66
|
|
4
4
|
greenmining/cli.py,sha256=11DEE9bwKDIzj8CbR4-B8re_1cmywPo1CyLGWVGzF9U,13254
|
|
@@ -16,14 +16,14 @@ greenmining/models/repository.py,sha256=lpe9Pte6KPCcRvx0aOH16v2PiH3NwjPeQRJYxriK
|
|
|
16
16
|
greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
|
|
17
17
|
greenmining/presenters/console_presenter.py,sha256=jK_8agdEz-_2mqoyMNht-mNA9hXWe9EA8VlAUT_XFxA,5299
|
|
18
18
|
greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
|
|
19
|
-
greenmining/services/commit_extractor.py,sha256=
|
|
19
|
+
greenmining/services/commit_extractor.py,sha256=XB7Y1HKeQ4OpgEz0yAjKDPdiQcq07QCQ5Xrx9AxGfrM,11814
|
|
20
20
|
greenmining/services/data_aggregator.py,sha256=8yb70_lwT85Cn8jVDLUrEZXcGr44UKy8UEFTHbAebZg,16250
|
|
21
21
|
greenmining/services/data_analyzer.py,sha256=ejvfKoG19D1U-b_RBne3e66h2yF4k05gyv3BLnZB9_k,11856
|
|
22
22
|
greenmining/services/github_fetcher.py,sha256=9aHSbZoA8BWL1Cp0cCv2NltXf0Jr7W_mO5d_-7TuOvY,9294
|
|
23
23
|
greenmining/services/reports.py,sha256=cE7XvB2ihD5KwrO4W1Uj_I1h5pELBPF85MjgGFzkgOQ,21829
|
|
24
|
-
greenmining-0.1.
|
|
25
|
-
greenmining-0.1.
|
|
26
|
-
greenmining-0.1.
|
|
27
|
-
greenmining-0.1.
|
|
28
|
-
greenmining-0.1.
|
|
29
|
-
greenmining-0.1.
|
|
24
|
+
greenmining-0.1.8.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
25
|
+
greenmining-0.1.8.dist-info/METADATA,sha256=4o6y5WcKQHRyGZOK2NcyIPk1GxVytZzyGMeI7R7j5TI,9969
|
|
26
|
+
greenmining-0.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
+
greenmining-0.1.8.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
|
|
28
|
+
greenmining-0.1.8.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
29
|
+
greenmining-0.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|