greenmining 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -8,7 +8,7 @@ from greenmining.gsf_patterns import (
8
8
  is_green_aware,
9
9
  )
10
10
 
11
- __version__ = "0.1.0"
11
+ __version__ = "0.1.8"
12
12
 
13
13
  __all__ = [
14
14
  "Config",
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from typing import Any, Optional
7
7
 
8
8
  import click
9
- from pydriller import Repository
9
+ from github import Github
10
10
  from tqdm import tqdm
11
11
 
12
12
  from greenmining.config import get_config
@@ -21,20 +21,28 @@ from greenmining.utils import (
21
21
 
22
22
 
23
23
  class CommitExtractor:
24
- """Extracts commit data from repositories."""
25
-
26
- def __init__(self, max_commits: int = 50, skip_merges: bool = True, days_back: int = 730):
24
+ """Extracts commit data from repositories using GitHub API."""
25
+
26
+ def __init__(
27
+ self,
28
+ max_commits: int = 50,
29
+ skip_merges: bool = True,
30
+ days_back: int = 730,
31
+ github_token: str | None = None,
32
+ ):
27
33
  """Initialize commit extractor.
28
34
 
29
35
  Args:
30
36
  max_commits: Maximum commits per repository
31
37
  skip_merges: Skip merge commits
32
38
  days_back: Only analyze commits from last N days
39
+ github_token: GitHub API token (optional)
33
40
  """
34
41
  self.max_commits = max_commits
35
42
  self.skip_merges = skip_merges
36
43
  self.days_back = days_back
37
44
  self.cutoff_date = datetime.now() - timedelta(days=days_back)
45
+ self.github = Github(github_token) if github_token else None
38
46
 
39
47
  def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
40
48
  """Extract commits from list of repositories.
@@ -77,7 +85,7 @@ class CommitExtractor:
77
85
 
78
86
  @retry_on_exception(max_retries=2, delay=5.0, exceptions=(Exception,))
79
87
  def _extract_repo_commits(self, repo: dict[str, Any]) -> list[dict[str, Any]]:
80
- """Extract commits from a single repository.
88
+ """Extract commits from a single repository using GitHub API.
81
89
 
82
90
  Args:
83
91
  repo: Repository metadata dictionary
@@ -86,27 +94,35 @@ class CommitExtractor:
86
94
  List of commit dictionaries
87
95
  """
88
96
  commits = []
89
- repo_url = repo["clone_url"]
90
97
  repo_name = repo["full_name"]
91
98
 
92
99
  try:
93
- # Use PyDriller to traverse commits
94
- commit_count = 0
100
+ # Get repository from GitHub API
101
+ if not self.github:
102
+ config = get_config()
103
+ self.github = Github(config.GITHUB_TOKEN)
95
104
 
96
- for commit in Repository(
97
- repo_url, only_no_merge=self.skip_merges, since=self.cutoff_date
98
- ).traverse_commits():
105
+ gh_repo = self.github.get_repo(repo_name)
106
+
107
+ # Get recent commits (GitHub API returns in reverse chronological order)
108
+ commit_count = 0
99
109
 
110
+ for commit in gh_repo.get_commits():
100
111
  # Skip if reached max commits
101
112
  if commit_count >= self.max_commits:
102
113
  break
103
114
 
115
+ # Skip merge commits if requested
116
+ if self.skip_merges and len(commit.parents) > 1:
117
+ continue
118
+
104
119
  # Skip trivial commits
105
- if not commit.msg or len(commit.msg.strip()) < 10:
120
+ commit_msg = commit.commit.message
121
+ if not commit_msg or len(commit_msg.strip()) < 10:
106
122
  continue
107
123
 
108
124
  # Extract commit data
109
- commit_data = self._extract_commit_metadata(commit, repo_name)
125
+ commit_data = self._extract_commit_metadata_from_github(commit, repo_name)
110
126
  commits.append(commit_data)
111
127
  commit_count += 1
112
128
 
@@ -158,6 +174,46 @@ class CommitExtractor:
158
174
  "in_main_branch": commit.in_main_branch if hasattr(commit, "in_main_branch") else True,
159
175
  }
160
176
 
177
+ def _extract_commit_metadata_from_github(self, commit, repo_name: str) -> dict[str, Any]:
178
+ """Extract metadata from GitHub API commit object.
179
+
180
+ Args:
181
+ commit: GitHub API commit object
182
+ repo_name: Repository name
183
+
184
+ Returns:
185
+ Dictionary with commit metadata
186
+ """
187
+ # Get modified files and stats
188
+ files_changed = []
189
+ lines_added = 0
190
+ lines_deleted = 0
191
+
192
+ try:
193
+ for file in commit.files:
194
+ files_changed.append(file.filename)
195
+ lines_added += file.additions
196
+ lines_deleted += file.deletions
197
+ except Exception:
198
+ pass
199
+
200
+ return {
201
+ "commit_id": commit.sha,
202
+ "repo_name": repo_name,
203
+ "date": commit.commit.committer.date.isoformat(),
204
+ "author": commit.commit.author.name,
205
+ "author_email": commit.commit.author.email,
206
+ "message": commit.commit.message.strip(),
207
+ "files_changed": files_changed[:20], # Limit to 20 files
208
+ "lines_added": lines_added,
209
+ "lines_deleted": lines_deleted,
210
+ "insertions": lines_added,
211
+ "deletions": lines_deleted,
212
+ "is_merge": len(commit.parents) > 1,
213
+ "branches": [],
214
+ "in_main_branch": True,
215
+ }
216
+
161
217
  def save_results(self, commits: list[dict[str, Any]], output_file: Path, repos_count: int):
162
218
  """Save extracted commits to JSON file.
163
219
 
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 0.1.7
4
- Summary: Green Software Foundation (GSF) patterns mining tool for microservices repositories
3
+ Version: 0.1.8
4
+ Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
5
5
  Author-email: Your Name <your.email@example.com>
6
6
  Maintainer-email: Your Name <your.email@example.com>
7
7
  License: MIT
@@ -63,11 +63,11 @@ Green mining for microservices repositories.
63
63
 
64
64
  ## Overview
65
65
 
66
- `greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices. It detects 76 official Green Software Foundation patterns across cloud, web, AI, database, networking, and general categories.
66
+ `greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects 76 sustainable software patterns across cloud, web, AI, database, networking, and general categories.
67
67
 
68
68
  ## Features
69
69
 
70
- - 🔍 **76 GSF Patterns**: Detect official Green Software Foundation patterns
70
+ - 🔍 **76 Sustainability Patterns**: Detect energy-efficient and environmentally conscious coding practices
71
71
  - 📊 **Repository Mining**: Analyze 100+ microservices repositories from GitHub
72
72
  - 📈 **Green Awareness Detection**: Identify sustainability-focused commits
73
73
  - 📄 **Comprehensive Reports**: Generate analysis reports in multiple formats
@@ -128,7 +128,7 @@ greenmining report
128
128
  from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
129
129
 
130
130
  # Check available patterns
131
- print(f"Total GSF patterns: {len(GSF_PATTERNS)}") # 76
131
+ print(f"Total patterns: {len(GSF_PATTERNS)}") # 76
132
132
 
133
133
  # Detect green awareness in commit messages
134
134
  commit_msg = "Optimize Redis caching to reduce energy consumption"
@@ -168,7 +168,7 @@ for commit in commits:
168
168
  print(f" Patterns: {result['known_pattern']}")
169
169
  ```
170
170
 
171
- #### Access GSF Patterns Data
171
+ #### Access Sustainability Patterns Data
172
172
 
173
173
  ```python
174
174
  from greenmining import GSF_PATTERNS
@@ -1,4 +1,4 @@
1
- greenmining/__init__.py,sha256=ITaqGeXxagpd_NwAF68-WFLmWVP4iNeP6t4hici3ktA,395
1
+ greenmining/__init__.py,sha256=M4E_cikw37tzcNodowPaRe60fk5FuAu9Exw6QDSAmQ4,395
2
2
  greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
3
3
  greenmining/__version__.py,sha256=Hry6u6QztktMYf7nqf0jPXFaA0b7lmr6pjdAaVRXDaE,66
4
4
  greenmining/cli.py,sha256=11DEE9bwKDIzj8CbR4-B8re_1cmywPo1CyLGWVGzF9U,13254
@@ -16,14 +16,14 @@ greenmining/models/repository.py,sha256=lpe9Pte6KPCcRvx0aOH16v2PiH3NwjPeQRJYxriK
16
16
  greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
17
17
  greenmining/presenters/console_presenter.py,sha256=jK_8agdEz-_2mqoyMNht-mNA9hXWe9EA8VlAUT_XFxA,5299
18
18
  greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
19
- greenmining/services/commit_extractor.py,sha256=IxON_s6p9Rp4JJN8Q8T0bMLxBtatN4W7bCtk72snBSI,9900
19
+ greenmining/services/commit_extractor.py,sha256=XB7Y1HKeQ4OpgEz0yAjKDPdiQcq07QCQ5Xrx9AxGfrM,11814
20
20
  greenmining/services/data_aggregator.py,sha256=8yb70_lwT85Cn8jVDLUrEZXcGr44UKy8UEFTHbAebZg,16250
21
21
  greenmining/services/data_analyzer.py,sha256=ejvfKoG19D1U-b_RBne3e66h2yF4k05gyv3BLnZB9_k,11856
22
22
  greenmining/services/github_fetcher.py,sha256=9aHSbZoA8BWL1Cp0cCv2NltXf0Jr7W_mO5d_-7TuOvY,9294
23
23
  greenmining/services/reports.py,sha256=cE7XvB2ihD5KwrO4W1Uj_I1h5pELBPF85MjgGFzkgOQ,21829
24
- greenmining-0.1.7.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
25
- greenmining-0.1.7.dist-info/METADATA,sha256=OeDR3EVi-N2aIzjXSdpPeD6bsWJZxZGHZnsTGpjw4F4,9892
26
- greenmining-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
- greenmining-0.1.7.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
28
- greenmining-0.1.7.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
29
- greenmining-0.1.7.dist-info/RECORD,,
24
+ greenmining-0.1.8.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
25
+ greenmining-0.1.8.dist-info/METADATA,sha256=4o6y5WcKQHRyGZOK2NcyIPk1GxVytZzyGMeI7R7j5TI,9969
26
+ greenmining-0.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ greenmining-0.1.8.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
28
+ greenmining-0.1.8.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
29
+ greenmining-0.1.8.dist-info/RECORD,,