greenmining 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -8,7 +8,7 @@ from greenmining.gsf_patterns import (
8
8
  is_green_aware,
9
9
  )
10
10
 
11
- __version__ = "0.1.0"
11
+ __version__ = "0.1.9"
12
12
 
13
13
  __all__ = [
14
14
  "Config",
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from typing import Any, Optional
7
7
 
8
8
  import click
9
- from pydriller import Repository
9
+ from github import Github
10
10
  from tqdm import tqdm
11
11
 
12
12
  from greenmining.config import get_config
@@ -21,20 +21,28 @@ from greenmining.utils import (
21
21
 
22
22
 
23
23
  class CommitExtractor:
24
- """Extracts commit data from repositories."""
25
-
26
- def __init__(self, max_commits: int = 50, skip_merges: bool = True, days_back: int = 730):
24
+ """Extracts commit data from repositories using GitHub API."""
25
+
26
+ def __init__(
27
+ self,
28
+ max_commits: int = 50,
29
+ skip_merges: bool = True,
30
+ days_back: int = 730,
31
+ github_token: str | None = None,
32
+ ):
27
33
  """Initialize commit extractor.
28
34
 
29
35
  Args:
30
36
  max_commits: Maximum commits per repository
31
37
  skip_merges: Skip merge commits
32
38
  days_back: Only analyze commits from last N days
39
+ github_token: GitHub API token (optional)
33
40
  """
34
41
  self.max_commits = max_commits
35
42
  self.skip_merges = skip_merges
36
43
  self.days_back = days_back
37
44
  self.cutoff_date = datetime.now() - timedelta(days=days_back)
45
+ self.github = Github(github_token) if github_token else None
38
46
 
39
47
  def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
40
48
  """Extract commits from list of repositories.
@@ -77,7 +85,7 @@ class CommitExtractor:
77
85
 
78
86
  @retry_on_exception(max_retries=2, delay=5.0, exceptions=(Exception,))
79
87
  def _extract_repo_commits(self, repo: dict[str, Any]) -> list[dict[str, Any]]:
80
- """Extract commits from a single repository.
88
+ """Extract commits from a single repository using GitHub API.
81
89
 
82
90
  Args:
83
91
  repo: Repository metadata dictionary
@@ -86,27 +94,35 @@ class CommitExtractor:
86
94
  List of commit dictionaries
87
95
  """
88
96
  commits = []
89
- repo_url = repo["clone_url"]
90
97
  repo_name = repo["full_name"]
91
98
 
92
99
  try:
93
- # Use PyDriller to traverse commits
94
- commit_count = 0
100
+ # Get repository from GitHub API
101
+ if not self.github:
102
+ config = get_config()
103
+ self.github = Github(config.GITHUB_TOKEN)
95
104
 
96
- for commit in Repository(
97
- repo_url, only_no_merge=self.skip_merges, since=self.cutoff_date
98
- ).traverse_commits():
105
+ gh_repo = self.github.get_repo(repo_name)
106
+
107
+ # Get recent commits (GitHub API returns in reverse chronological order)
108
+ commit_count = 0
99
109
 
110
+ for commit in gh_repo.get_commits():
100
111
  # Skip if reached max commits
101
112
  if commit_count >= self.max_commits:
102
113
  break
103
114
 
115
+ # Skip merge commits if requested
116
+ if self.skip_merges and len(commit.parents) > 1:
117
+ continue
118
+
104
119
  # Skip trivial commits
105
- if not commit.msg or len(commit.msg.strip()) < 10:
120
+ commit_msg = commit.commit.message
121
+ if not commit_msg or len(commit_msg.strip()) < 10:
106
122
  continue
107
123
 
108
124
  # Extract commit data
109
- commit_data = self._extract_commit_metadata(commit, repo_name)
125
+ commit_data = self._extract_commit_metadata_from_github(commit, repo_name)
110
126
  commits.append(commit_data)
111
127
  commit_count += 1
112
128
 
@@ -158,6 +174,46 @@ class CommitExtractor:
158
174
  "in_main_branch": commit.in_main_branch if hasattr(commit, "in_main_branch") else True,
159
175
  }
160
176
 
177
+ def _extract_commit_metadata_from_github(self, commit, repo_name: str) -> dict[str, Any]:
178
+ """Extract metadata from GitHub API commit object.
179
+
180
+ Args:
181
+ commit: GitHub API commit object
182
+ repo_name: Repository name
183
+
184
+ Returns:
185
+ Dictionary with commit metadata
186
+ """
187
+ # Get modified files and stats
188
+ files_changed = []
189
+ lines_added = 0
190
+ lines_deleted = 0
191
+
192
+ try:
193
+ for file in commit.files:
194
+ files_changed.append(file.filename)
195
+ lines_added += file.additions
196
+ lines_deleted += file.deletions
197
+ except Exception:
198
+ pass
199
+
200
+ return {
201
+ "commit_id": commit.sha,
202
+ "repo_name": repo_name,
203
+ "date": commit.commit.committer.date.isoformat(),
204
+ "author": commit.commit.author.name,
205
+ "author_email": commit.commit.author.email,
206
+ "message": commit.commit.message.strip(),
207
+ "files_changed": files_changed[:20], # Limit to 20 files
208
+ "lines_added": lines_added,
209
+ "lines_deleted": lines_deleted,
210
+ "insertions": lines_added,
211
+ "deletions": lines_deleted,
212
+ "is_merge": len(commit.parents) > 1,
213
+ "branches": [],
214
+ "in_main_branch": True,
215
+ }
216
+
161
217
  def save_results(self, commits: list[dict[str, Any]], output_file: Path, repos_count: int):
162
218
  """Save extracted commits to JSON file.
163
219
 
@@ -72,7 +72,7 @@ class DataAggregator:
72
72
  green_aware_count = sum(1 for r in results if r.get("green_aware", False))
73
73
 
74
74
  # Count repos with at least one green commit
75
- repos_with_green = len({r["repo_name"] for r in results if r.get("green_aware", False)})
75
+ repos_with_green = len({r["repository"] for r in results if r.get("green_aware", False)})
76
76
 
77
77
  return {
78
78
  "total_commits": total_commits,
@@ -91,17 +91,24 @@ class DataAggregator:
91
91
  )
92
92
 
93
93
  for result in results:
94
- pattern = result.get("known_pattern")
95
- confidence = result.get("pattern_confidence", "NONE")
96
-
97
- if pattern and pattern != "NONE DETECTED":
94
+ # Handle both gsf_patterns_matched (list) and known_pattern (string)
95
+ patterns = result.get("gsf_patterns_matched", [])
96
+ if not patterns: # Fallback to old format
97
+ pattern = result.get("known_pattern")
98
+ if pattern and pattern != "NONE DETECTED":
99
+ patterns = [pattern]
100
+
101
+ confidence = result.get("confidence", result.get("pattern_confidence", "low")).upper()
102
+
103
+ for pattern in patterns:
98
104
  pattern_data[pattern]["count"] += 1
99
105
  if confidence in ["HIGH", "MEDIUM", "LOW"]:
100
106
  pattern_data[pattern][confidence] += 1
101
107
 
102
108
  # Store example commits (max 3)
103
109
  if len(pattern_data[pattern]["example_commits"]) < 3:
104
- pattern_data[pattern]["example_commits"].append(result["commit_id"])
110
+ commit_id = result.get("commit_hash", result.get("commit_id", "unknown"))
111
+ pattern_data[pattern]["example_commits"].append(commit_id)
105
112
 
106
113
  # Convert to list format
107
114
  patterns_list = []
@@ -153,15 +160,21 @@ class DataAggregator:
153
160
 
154
161
  # Group commits by repository
155
162
  for result in results:
156
- repo_commits[result["repo_name"]].append(result)
163
+ repo_commits[result["repository"]].append(result)
157
164
 
158
165
  # Calculate stats for each repo
159
166
  repo_stats = []
160
167
  for repo_name, commits in repo_commits.items():
161
168
  green_commits = [c for c in commits if c.get("green_aware", False)]
162
- patterns = [
163
- c.get("known_pattern") for c in commits if c.get("known_pattern") != "NONE DETECTED"
164
- ]
169
+ # Get all patterns from commits (gsf_patterns_matched is a list)
170
+ patterns = []
171
+ for c in commits:
172
+ patterns_list = c.get("gsf_patterns_matched", [])
173
+ if not patterns_list: # Fallback
174
+ pattern = c.get("known_pattern")
175
+ if pattern and pattern != "NONE DETECTED":
176
+ patterns_list = [pattern]
177
+ patterns.extend(patterns_list)
165
178
  unique_patterns = list(set(patterns))
166
179
 
167
180
  repo_stats.append(
@@ -191,7 +204,7 @@ class DataAggregator:
191
204
  # Group commits by language
192
205
  language_commits = defaultdict(list)
193
206
  for result in results:
194
- language = repo_language_map.get(result["repo_name"], "Unknown")
207
+ language = repo_language_map.get(result["repository"], "Unknown")
195
208
  language_commits[language].append(result)
196
209
 
197
210
  # Calculate stats for each language
@@ -239,18 +252,14 @@ class DataAggregator:
239
252
  for result in analysis_results:
240
253
  csv_data.append(
241
254
  {
242
- "commit_id": result["commit_id"],
243
- "repo_name": result["repo_name"],
255
+ "commit_hash": result.get("commit_hash", result.get("commit_id", "")),
256
+ "repo_name": result.get("repository", ""),
244
257
  "date": result.get("date", ""),
245
- "commit_message": result.get("commit_message", "")[:200], # Truncate
258
+ "message": result.get("message", "")[:200], # Truncate
246
259
  "green_aware": result.get("green_aware", False),
247
- "green_evidence": (
248
- result.get("green_evidence", "")[:200]
249
- if result.get("green_evidence")
250
- else ""
251
- ),
252
- "known_pattern": result.get("known_pattern", ""),
253
- "pattern_confidence": result.get("pattern_confidence", ""),
260
+ "gsf_patterns": ", ".join(result.get("gsf_patterns_matched", [])),
261
+ "pattern_count": result.get("pattern_count", 0),
262
+ "confidence": result.get("confidence", ""),
254
263
  "lines_added": result.get("lines_added", 0),
255
264
  "lines_deleted": result.get("lines_deleted", 0),
256
265
  }
@@ -114,8 +114,8 @@ class DataAnalyzer:
114
114
  return {
115
115
  "commit_hash": commit.get("hash", commit.get("commit_id", "unknown")),
116
116
  "repository": commit.get("repository", commit.get("repo_name", "unknown")),
117
- "author": commit.get("author_name", "unknown"),
118
- "date": commit.get("author_date", commit.get("date", "unknown")),
117
+ "author": commit.get("author", commit.get("author_name", "unknown")),
118
+ "date": commit.get("date", commit.get("author_date", "unknown")),
119
119
  "message": message,
120
120
  # Research Question 1: Green awareness
121
121
  "green_aware": green_aware,
@@ -125,10 +125,9 @@ class DataAnalyzer:
125
125
  "pattern_details": pattern_details,
126
126
  "confidence": confidence,
127
127
  # Additional metadata
128
- "files_modified": commit.get("modified_files", commit.get("files_changed", [])),
129
- "insertions": commit.get("insertions", commit.get("lines_added", 0)),
130
- "deletions": commit.get("deletions", commit.get("lines_deleted", 0)),
131
- "lines_deleted": commit.get("lines_deleted", 0),
128
+ "files_modified": commit.get("files_changed", commit.get("modified_files", [])),
129
+ "insertions": commit.get("lines_added", commit.get("insertions", 0)),
130
+ "deletions": commit.get("lines_deleted", commit.get("deletions", 0)),
132
131
  }
133
132
 
134
133
  def _check_green_awareness(self, message: str, files: list[str]) -> tuple[bool, Optional[str]]:
@@ -205,9 +204,15 @@ class DataAnalyzer:
205
204
  """
206
205
  # Calculate summary statistics
207
206
  green_aware_count = sum(1 for r in results if r["green_aware"])
208
- pattern_counts = Counter(
209
- r["known_pattern"] for r in results if r["known_pattern"] != "NONE DETECTED"
210
- )
207
+
208
+ # Count all matched patterns (results have gsf_patterns_matched which is a list)
209
+ all_patterns = []
210
+ for r in results:
211
+ patterns = r.get("gsf_patterns_matched", [])
212
+ if patterns: # If there are matched patterns
213
+ all_patterns.extend(patterns)
214
+
215
+ pattern_counts = Counter(all_patterns)
211
216
 
212
217
  data = {
213
218
  "metadata": {
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 0.1.7
4
- Summary: Green Software Foundation (GSF) patterns mining tool for microservices repositories
3
+ Version: 0.1.9
4
+ Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
5
5
  Author-email: Your Name <your.email@example.com>
6
6
  Maintainer-email: Your Name <your.email@example.com>
7
7
  License: MIT
@@ -63,11 +63,11 @@ Green mining for microservices repositories.
63
63
 
64
64
  ## Overview
65
65
 
66
- `greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices. It detects 76 official Green Software Foundation patterns across cloud, web, AI, database, networking, and general categories.
66
+ `greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects 76 sustainable software patterns across cloud, web, AI, database, networking, and general categories.
67
67
 
68
68
  ## Features
69
69
 
70
- - 🔍 **76 GSF Patterns**: Detect official Green Software Foundation patterns
70
+ - 🔍 **76 Sustainability Patterns**: Detect energy-efficient and environmentally conscious coding practices
71
71
  - 📊 **Repository Mining**: Analyze 100+ microservices repositories from GitHub
72
72
  - 📈 **Green Awareness Detection**: Identify sustainability-focused commits
73
73
  - 📄 **Comprehensive Reports**: Generate analysis reports in multiple formats
@@ -128,7 +128,7 @@ greenmining report
128
128
  from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
129
129
 
130
130
  # Check available patterns
131
- print(f"Total GSF patterns: {len(GSF_PATTERNS)}") # 76
131
+ print(f"Total patterns: {len(GSF_PATTERNS)}") # 76
132
132
 
133
133
  # Detect green awareness in commit messages
134
134
  commit_msg = "Optimize Redis caching to reduce energy consumption"
@@ -168,7 +168,7 @@ for commit in commits:
168
168
  print(f" Patterns: {result['known_pattern']}")
169
169
  ```
170
170
 
171
- #### Access GSF Patterns Data
171
+ #### Access Sustainability Patterns Data
172
172
 
173
173
  ```python
174
174
  from greenmining import GSF_PATTERNS
@@ -1,4 +1,4 @@
1
- greenmining/__init__.py,sha256=ITaqGeXxagpd_NwAF68-WFLmWVP4iNeP6t4hici3ktA,395
1
+ greenmining/__init__.py,sha256=-pLhkoePJtUXVyzBfTmT1iW56AvFgmNwf54hQckdYI8,395
2
2
  greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
3
3
  greenmining/__version__.py,sha256=Hry6u6QztktMYf7nqf0jPXFaA0b7lmr6pjdAaVRXDaE,66
4
4
  greenmining/cli.py,sha256=11DEE9bwKDIzj8CbR4-B8re_1cmywPo1CyLGWVGzF9U,13254
@@ -16,14 +16,14 @@ greenmining/models/repository.py,sha256=lpe9Pte6KPCcRvx0aOH16v2PiH3NwjPeQRJYxriK
16
16
  greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
17
17
  greenmining/presenters/console_presenter.py,sha256=jK_8agdEz-_2mqoyMNht-mNA9hXWe9EA8VlAUT_XFxA,5299
18
18
  greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
19
- greenmining/services/commit_extractor.py,sha256=IxON_s6p9Rp4JJN8Q8T0bMLxBtatN4W7bCtk72snBSI,9900
20
- greenmining/services/data_aggregator.py,sha256=8yb70_lwT85Cn8jVDLUrEZXcGr44UKy8UEFTHbAebZg,16250
21
- greenmining/services/data_analyzer.py,sha256=ejvfKoG19D1U-b_RBne3e66h2yF4k05gyv3BLnZB9_k,11856
19
+ greenmining/services/commit_extractor.py,sha256=XB7Y1HKeQ4OpgEz0yAjKDPdiQcq07QCQ5Xrx9AxGfrM,11814
20
+ greenmining/services/data_aggregator.py,sha256=eXAHrzpafLJ14HRSFy70TCuamQpi43C6KcP2cG5IBGU,16877
21
+ greenmining/services/data_analyzer.py,sha256=RHHxw2y-thjCtVEL_GmvPJPWdWNa-C6jMzjy5QZk4eI,12051
22
22
  greenmining/services/github_fetcher.py,sha256=9aHSbZoA8BWL1Cp0cCv2NltXf0Jr7W_mO5d_-7TuOvY,9294
23
23
  greenmining/services/reports.py,sha256=cE7XvB2ihD5KwrO4W1Uj_I1h5pELBPF85MjgGFzkgOQ,21829
24
- greenmining-0.1.7.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
25
- greenmining-0.1.7.dist-info/METADATA,sha256=OeDR3EVi-N2aIzjXSdpPeD6bsWJZxZGHZnsTGpjw4F4,9892
26
- greenmining-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
- greenmining-0.1.7.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
28
- greenmining-0.1.7.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
29
- greenmining-0.1.7.dist-info/RECORD,,
24
+ greenmining-0.1.9.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
25
+ greenmining-0.1.9.dist-info/METADATA,sha256=momgAH0mimUN2PjbgixNXXlraeP8unuIx2sgbyQT2ks,9969
26
+ greenmining-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ greenmining-0.1.9.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
28
+ greenmining-0.1.9.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
29
+ greenmining-0.1.9.dist-info/RECORD,,