greenmining 1.0.4__tar.gz → 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {greenmining-1.0.4/greenmining.egg-info → greenmining-1.0.5}/PKG-INFO +10 -24
- {greenmining-1.0.4 → greenmining-1.0.5}/README.md +8 -22
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/__init__.py +1 -1
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/__version__.py +1 -1
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/github_fetcher.py +16 -18
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/github_graphql_fetcher.py +45 -55
- {greenmining-1.0.4 → greenmining-1.0.5/greenmining.egg-info}/PKG-INFO +10 -24
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining.egg-info/requires.txt +1 -1
- {greenmining-1.0.4 → greenmining-1.0.5}/pyproject.toml +2 -2
- {greenmining-1.0.4 → greenmining-1.0.5}/setup.py +1 -1
- {greenmining-1.0.4 → greenmining-1.0.5}/CHANGELOG.md +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/LICENSE +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/MANIFEST.in +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/__main__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/analyzers/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/analyzers/code_diff_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/analyzers/qualitative_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/analyzers/statistical_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/analyzers/temporal_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/config.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/controllers/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/controllers/repository_controller.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/energy/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/energy/base.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/energy/codecarbon_meter.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/energy/rapl.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/gsf_patterns.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/models/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/models/aggregated_stats.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/models/analysis_result.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/models/commit.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/models/repository.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/presenters/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/presenters/console_presenter.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/commit_extractor.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/data_aggregator.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/data_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/local_repo_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/reports.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining/utils.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining.egg-info/SOURCES.txt +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining.egg-info/dependency_links.txt +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/greenmining.egg-info/top_level.txt +0 -0
- {greenmining-1.0.4 → greenmining-1.0.5}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -43,7 +43,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
|
|
|
43
43
|
Requires-Dist: black>=23.12.0; extra == "dev"
|
|
44
44
|
Requires-Dist: ruff>=0.1.9; extra == "dev"
|
|
45
45
|
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
46
|
-
Requires-Dist: build>=1.0.
|
|
46
|
+
Requires-Dist: build>=1.0.5; extra == "dev"
|
|
47
47
|
Requires-Dist: twine>=4.0.2; extra == "dev"
|
|
48
48
|
Provides-Extra: docs
|
|
49
49
|
Requires-Dist: sphinx>=7.2.0; extra == "docs"
|
|
@@ -61,7 +61,7 @@ Green mining for microservices repositories.
|
|
|
61
61
|
|
|
62
62
|
## Overview
|
|
63
63
|
|
|
64
|
-
`greenmining` is a Python library
|
|
64
|
+
`greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
|
|
65
65
|
|
|
66
66
|
## Installation
|
|
67
67
|
|
|
@@ -105,7 +105,7 @@ if is_green_aware(commit_msg):
|
|
|
105
105
|
# Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
|
|
106
106
|
```
|
|
107
107
|
|
|
108
|
-
#### Fetch Repositories with Custom Keywords
|
|
108
|
+
#### Fetch Repositories with Custom Keywords
|
|
109
109
|
|
|
110
110
|
```python
|
|
111
111
|
from greenmining import fetch_repositories
|
|
@@ -144,8 +144,6 @@ for repo in repos[:5]:
|
|
|
144
144
|
```python
|
|
145
145
|
from greenmining.services.commit_extractor import CommitExtractor
|
|
146
146
|
from greenmining.services.data_analyzer import DataAnalyzer
|
|
147
|
-
from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
|
|
148
|
-
from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
|
|
149
147
|
from greenmining import fetch_repositories
|
|
150
148
|
|
|
151
149
|
# Fetch repositories with custom keywords
|
|
@@ -195,18 +193,6 @@ for commit in commits:
|
|
|
195
193
|
results.append(result)
|
|
196
194
|
print(f"Green commit found: {commit.message[:50]}...")
|
|
197
195
|
print(f" Patterns: {result['known_pattern']}")
|
|
198
|
-
|
|
199
|
-
# Access NLP analysis results (NEW)
|
|
200
|
-
if 'nlp_analysis' in result:
|
|
201
|
-
nlp = result['nlp_analysis']
|
|
202
|
-
print(f" NLP: {nlp['morphological_count']} morphological matches, "
|
|
203
|
-
f"{nlp['semantic_count']} semantic matches")
|
|
204
|
-
|
|
205
|
-
# Access ML features (NEW)
|
|
206
|
-
if 'ml_features' in result:
|
|
207
|
-
ml = result['ml_features']['text']
|
|
208
|
-
print(f" ML Features: {ml['word_count']} words, "
|
|
209
|
-
f"keyword density: {ml['keyword_density']:.2f}")
|
|
210
196
|
```
|
|
211
197
|
|
|
212
198
|
#### Access Sustainability Patterns Data
|
|
@@ -242,7 +228,7 @@ print(f"Available categories: {sorted(categories)}")
|
|
|
242
228
|
# 'monitoring', 'network', 'networking', 'resource', 'web']
|
|
243
229
|
```
|
|
244
230
|
|
|
245
|
-
#### Advanced Analysis: Temporal Trends
|
|
231
|
+
#### Advanced Analysis: Temporal Trends
|
|
246
232
|
|
|
247
233
|
```python
|
|
248
234
|
from greenmining.services.data_aggregator import DataAggregator
|
|
@@ -374,7 +360,7 @@ repositories = fetch_repositories(
|
|
|
374
360
|
min_stars=10,
|
|
375
361
|
keywords="software engineering",
|
|
376
362
|
)
|
|
377
|
-
print(f"
|
|
363
|
+
print(f"Fetched {len(repositories)} repositories")
|
|
378
364
|
|
|
379
365
|
# STAGE 2: Extract Commits
|
|
380
366
|
print("\nExtracting commits...")
|
|
@@ -386,7 +372,7 @@ extractor = CommitExtractor(
|
|
|
386
372
|
timeout=120,
|
|
387
373
|
)
|
|
388
374
|
all_commits = extractor.extract_from_repositories(repositories)
|
|
389
|
-
print(f"
|
|
375
|
+
print(f"Extracted {len(all_commits)} commits")
|
|
390
376
|
|
|
391
377
|
# Save commits
|
|
392
378
|
extractor.save_results(
|
|
@@ -405,8 +391,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
|
|
|
405
391
|
# Count green-aware commits
|
|
406
392
|
green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
|
|
407
393
|
green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
|
|
408
|
-
print(f"
|
|
409
|
-
print(f"
|
|
394
|
+
print(f"Analyzed {len(analyzed_commits)} commits")
|
|
395
|
+
print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
|
|
410
396
|
|
|
411
397
|
# Save analysis
|
|
412
398
|
analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
|
|
@@ -434,7 +420,7 @@ print("\n" + "="*80)
|
|
|
434
420
|
print("ANALYSIS COMPLETE")
|
|
435
421
|
print("="*80)
|
|
436
422
|
aggregator.print_summary(results)
|
|
437
|
-
print(f"\
|
|
423
|
+
print(f"\nResults saved in: {output_dir.absolute()}")
|
|
438
424
|
```
|
|
439
425
|
|
|
440
426
|
**What this example does:**
|
|
@@ -8,7 +8,7 @@ Green mining for microservices repositories.
|
|
|
8
8
|
|
|
9
9
|
## Overview
|
|
10
10
|
|
|
11
|
-
`greenmining` is a Python library
|
|
11
|
+
`greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
|
|
12
12
|
|
|
13
13
|
## Installation
|
|
14
14
|
|
|
@@ -52,7 +52,7 @@ if is_green_aware(commit_msg):
|
|
|
52
52
|
# Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
-
#### Fetch Repositories with Custom Keywords
|
|
55
|
+
#### Fetch Repositories with Custom Keywords
|
|
56
56
|
|
|
57
57
|
```python
|
|
58
58
|
from greenmining import fetch_repositories
|
|
@@ -91,8 +91,6 @@ for repo in repos[:5]:
|
|
|
91
91
|
```python
|
|
92
92
|
from greenmining.services.commit_extractor import CommitExtractor
|
|
93
93
|
from greenmining.services.data_analyzer import DataAnalyzer
|
|
94
|
-
from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
|
|
95
|
-
from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
|
|
96
94
|
from greenmining import fetch_repositories
|
|
97
95
|
|
|
98
96
|
# Fetch repositories with custom keywords
|
|
@@ -142,18 +140,6 @@ for commit in commits:
|
|
|
142
140
|
results.append(result)
|
|
143
141
|
print(f"Green commit found: {commit.message[:50]}...")
|
|
144
142
|
print(f" Patterns: {result['known_pattern']}")
|
|
145
|
-
|
|
146
|
-
# Access NLP analysis results (NEW)
|
|
147
|
-
if 'nlp_analysis' in result:
|
|
148
|
-
nlp = result['nlp_analysis']
|
|
149
|
-
print(f" NLP: {nlp['morphological_count']} morphological matches, "
|
|
150
|
-
f"{nlp['semantic_count']} semantic matches")
|
|
151
|
-
|
|
152
|
-
# Access ML features (NEW)
|
|
153
|
-
if 'ml_features' in result:
|
|
154
|
-
ml = result['ml_features']['text']
|
|
155
|
-
print(f" ML Features: {ml['word_count']} words, "
|
|
156
|
-
f"keyword density: {ml['keyword_density']:.2f}")
|
|
157
143
|
```
|
|
158
144
|
|
|
159
145
|
#### Access Sustainability Patterns Data
|
|
@@ -189,7 +175,7 @@ print(f"Available categories: {sorted(categories)}")
|
|
|
189
175
|
# 'monitoring', 'network', 'networking', 'resource', 'web']
|
|
190
176
|
```
|
|
191
177
|
|
|
192
|
-
#### Advanced Analysis: Temporal Trends
|
|
178
|
+
#### Advanced Analysis: Temporal Trends
|
|
193
179
|
|
|
194
180
|
```python
|
|
195
181
|
from greenmining.services.data_aggregator import DataAggregator
|
|
@@ -321,7 +307,7 @@ repositories = fetch_repositories(
|
|
|
321
307
|
min_stars=10,
|
|
322
308
|
keywords="software engineering",
|
|
323
309
|
)
|
|
324
|
-
print(f"
|
|
310
|
+
print(f"Fetched {len(repositories)} repositories")
|
|
325
311
|
|
|
326
312
|
# STAGE 2: Extract Commits
|
|
327
313
|
print("\nExtracting commits...")
|
|
@@ -333,7 +319,7 @@ extractor = CommitExtractor(
|
|
|
333
319
|
timeout=120,
|
|
334
320
|
)
|
|
335
321
|
all_commits = extractor.extract_from_repositories(repositories)
|
|
336
|
-
print(f"
|
|
322
|
+
print(f"Extracted {len(all_commits)} commits")
|
|
337
323
|
|
|
338
324
|
# Save commits
|
|
339
325
|
extractor.save_results(
|
|
@@ -352,8 +338,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
|
|
|
352
338
|
# Count green-aware commits
|
|
353
339
|
green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
|
|
354
340
|
green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
|
|
355
|
-
print(f"
|
|
356
|
-
print(f"
|
|
341
|
+
print(f"Analyzed {len(analyzed_commits)} commits")
|
|
342
|
+
print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
|
|
357
343
|
|
|
358
344
|
# Save analysis
|
|
359
345
|
analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
|
|
@@ -381,7 +367,7 @@ print("\n" + "="*80)
|
|
|
381
367
|
print("ANALYSIS COMPLETE")
|
|
382
368
|
print("="*80)
|
|
383
369
|
aggregator.print_summary(results)
|
|
384
|
-
print(f"\
|
|
370
|
+
print(f"\nResults saved in: {output_dir.absolute()}")
|
|
385
371
|
```
|
|
386
372
|
|
|
387
373
|
**What this example does:**
|
|
@@ -1,21 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
================================================================================
|
|
18
|
-
"""
|
|
1
|
+
# ================================================================================
|
|
2
|
+
# DEADCODE - OLD REST API IMPLEMENTATION
|
|
3
|
+
# ================================================================================
|
|
4
|
+
#
|
|
5
|
+
# This file contains the OLD GitHub REST API implementation.
|
|
6
|
+
# It has been REPLACED by GitHubGraphQLFetcher for better performance.
|
|
7
|
+
#
|
|
8
|
+
# Performance comparison:
|
|
9
|
+
# REST API: 10+ requests for 100 repos, ~2 minutes
|
|
10
|
+
# GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
|
|
11
|
+
#
|
|
12
|
+
# USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
|
|
13
|
+
#
|
|
14
|
+
# This file is kept for reference only. Do not use in production.
|
|
15
|
+
#
|
|
16
|
+
# ================================================================================
|
|
19
17
|
|
|
20
18
|
# GitHub repository fetcher for green microservices mining.
|
|
21
19
|
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
reducing API calls and improving rate limit efficiency.
|
|
6
|
-
"""
|
|
1
|
+
# GitHub GraphQL API fetcher for faster and more efficient repository fetching.
|
|
2
|
+
#
|
|
3
|
+
# GraphQL allows fetching exactly the data you need in a single request,
|
|
4
|
+
# reducing API calls and improving rate limit efficiency.
|
|
7
5
|
|
|
8
6
|
import json
|
|
9
7
|
import time
|
|
@@ -15,25 +13,21 @@ from greenmining.models.repository import Repository
|
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
class GitHubGraphQLFetcher:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
-
|
|
25
|
-
- More powerful search capabilities
|
|
26
|
-
"""
|
|
16
|
+
# Fetch GitHub repositories using GraphQL API v4.
|
|
17
|
+
#
|
|
18
|
+
# Benefits over REST API:
|
|
19
|
+
# - Fetch repos + commits in 1 request instead of 100+ REST calls
|
|
20
|
+
# - Get exactly the fields you need (no over-fetching)
|
|
21
|
+
# - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
|
|
22
|
+
# - More powerful search capabilities
|
|
27
23
|
|
|
28
24
|
GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
|
|
29
25
|
|
|
30
26
|
def __init__(self, token: str):
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
token: GitHub personal access token
|
|
36
|
-
"""
|
|
27
|
+
# Initialize GraphQL fetcher.
|
|
28
|
+
#
|
|
29
|
+
# Args:
|
|
30
|
+
# token: GitHub personal access token
|
|
37
31
|
self.token = token
|
|
38
32
|
self.headers = {
|
|
39
33
|
"Authorization": f"Bearer {token}",
|
|
@@ -51,22 +45,20 @@ class GitHubGraphQLFetcher:
|
|
|
51
45
|
pushed_after: Optional[str] = None,
|
|
52
46
|
pushed_before: Optional[str] = None,
|
|
53
47
|
) -> List[Repository]:
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
List of Repository objects
|
|
69
|
-
"""
|
|
48
|
+
# Search GitHub repositories using GraphQL.
|
|
49
|
+
#
|
|
50
|
+
# Args:
|
|
51
|
+
# keywords: Search keywords
|
|
52
|
+
# max_repos: Maximum number of repositories to fetch
|
|
53
|
+
# min_stars: Minimum star count
|
|
54
|
+
# languages: Programming languages to filter
|
|
55
|
+
# created_after: Created after date (YYYY-MM-DD)
|
|
56
|
+
# created_before: Created before date (YYYY-MM-DD)
|
|
57
|
+
# pushed_after: Pushed after date (YYYY-MM-DD)
|
|
58
|
+
# pushed_before: Pushed before date (YYYY-MM-DD)
|
|
59
|
+
#
|
|
60
|
+
# Returns:
|
|
61
|
+
# List of Repository objects
|
|
70
62
|
# Build search query
|
|
71
63
|
search_query = self._build_search_query(
|
|
72
64
|
keywords,
|
|
@@ -195,7 +187,7 @@ class GitHubGraphQLFetcher:
|
|
|
195
187
|
pushed_after: Optional[str],
|
|
196
188
|
pushed_before: Optional[str],
|
|
197
189
|
) -> str:
|
|
198
|
-
|
|
190
|
+
# Build GitHub search query string.
|
|
199
191
|
query_parts = [keywords]
|
|
200
192
|
|
|
201
193
|
# Star count
|
|
@@ -219,7 +211,7 @@ class GitHubGraphQLFetcher:
|
|
|
219
211
|
return " ".join(query_parts)
|
|
220
212
|
|
|
221
213
|
def _execute_query(self, query: str, variables: Dict[str, Any]) -> Dict[str, Any]:
|
|
222
|
-
|
|
214
|
+
# Execute GraphQL query.
|
|
223
215
|
payload = {"query": query, "variables": variables}
|
|
224
216
|
|
|
225
217
|
response = requests.post(
|
|
@@ -230,7 +222,7 @@ class GitHubGraphQLFetcher:
|
|
|
230
222
|
return response.json()
|
|
231
223
|
|
|
232
224
|
def _parse_repository(self, node: Dict[str, Any]) -> Repository:
|
|
233
|
-
|
|
225
|
+
# Parse GraphQL repository node to Repository object.
|
|
234
226
|
# Extract languages
|
|
235
227
|
languages = []
|
|
236
228
|
if node.get("languages") and node["languages"].get("nodes"):
|
|
@@ -265,20 +257,18 @@ class GitHubGraphQLFetcher:
|
|
|
265
257
|
def get_repository_commits(
|
|
266
258
|
self, owner: str, name: str, max_commits: int = 100
|
|
267
259
|
) -> List[Dict[str, Any]]:
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
List of commit dictionaries
|
|
281
|
-
"""
|
|
260
|
+
# Fetch commits for a specific repository using GraphQL.
|
|
261
|
+
#
|
|
262
|
+
# This is much faster than REST API as it gets all commits in 1-2 requests
|
|
263
|
+
# instead of paginating through 100 individual REST calls.
|
|
264
|
+
#
|
|
265
|
+
# Args:
|
|
266
|
+
# owner: Repository owner
|
|
267
|
+
# name: Repository name
|
|
268
|
+
# max_commits: Maximum commits to fetch
|
|
269
|
+
#
|
|
270
|
+
# Returns:
|
|
271
|
+
# List of commit dictionaries
|
|
282
272
|
query = """
|
|
283
273
|
query($owner: String!, $name: String!, $first: Int!) {
|
|
284
274
|
repository(owner: $owner, name: $name) {
|
|
@@ -359,7 +349,7 @@ class GitHubGraphQLFetcher:
|
|
|
359
349
|
return commits
|
|
360
350
|
|
|
361
351
|
def save_results(self, repositories: List[Repository], output_file: str):
|
|
362
|
-
|
|
352
|
+
# Save repositories to JSON file.
|
|
363
353
|
data = {
|
|
364
354
|
"total_repositories": len(repositories),
|
|
365
355
|
"repositories": [repo.to_dict() for repo in repositories],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -43,7 +43,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
|
|
|
43
43
|
Requires-Dist: black>=23.12.0; extra == "dev"
|
|
44
44
|
Requires-Dist: ruff>=0.1.9; extra == "dev"
|
|
45
45
|
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
46
|
-
Requires-Dist: build>=1.0.
|
|
46
|
+
Requires-Dist: build>=1.0.5; extra == "dev"
|
|
47
47
|
Requires-Dist: twine>=4.0.2; extra == "dev"
|
|
48
48
|
Provides-Extra: docs
|
|
49
49
|
Requires-Dist: sphinx>=7.2.0; extra == "docs"
|
|
@@ -61,7 +61,7 @@ Green mining for microservices repositories.
|
|
|
61
61
|
|
|
62
62
|
## Overview
|
|
63
63
|
|
|
64
|
-
`greenmining` is a Python library
|
|
64
|
+
`greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
|
|
65
65
|
|
|
66
66
|
## Installation
|
|
67
67
|
|
|
@@ -105,7 +105,7 @@ if is_green_aware(commit_msg):
|
|
|
105
105
|
# Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
|
|
106
106
|
```
|
|
107
107
|
|
|
108
|
-
#### Fetch Repositories with Custom Keywords
|
|
108
|
+
#### Fetch Repositories with Custom Keywords
|
|
109
109
|
|
|
110
110
|
```python
|
|
111
111
|
from greenmining import fetch_repositories
|
|
@@ -144,8 +144,6 @@ for repo in repos[:5]:
|
|
|
144
144
|
```python
|
|
145
145
|
from greenmining.services.commit_extractor import CommitExtractor
|
|
146
146
|
from greenmining.services.data_analyzer import DataAnalyzer
|
|
147
|
-
from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
|
|
148
|
-
from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
|
|
149
147
|
from greenmining import fetch_repositories
|
|
150
148
|
|
|
151
149
|
# Fetch repositories with custom keywords
|
|
@@ -195,18 +193,6 @@ for commit in commits:
|
|
|
195
193
|
results.append(result)
|
|
196
194
|
print(f"Green commit found: {commit.message[:50]}...")
|
|
197
195
|
print(f" Patterns: {result['known_pattern']}")
|
|
198
|
-
|
|
199
|
-
# Access NLP analysis results (NEW)
|
|
200
|
-
if 'nlp_analysis' in result:
|
|
201
|
-
nlp = result['nlp_analysis']
|
|
202
|
-
print(f" NLP: {nlp['morphological_count']} morphological matches, "
|
|
203
|
-
f"{nlp['semantic_count']} semantic matches")
|
|
204
|
-
|
|
205
|
-
# Access ML features (NEW)
|
|
206
|
-
if 'ml_features' in result:
|
|
207
|
-
ml = result['ml_features']['text']
|
|
208
|
-
print(f" ML Features: {ml['word_count']} words, "
|
|
209
|
-
f"keyword density: {ml['keyword_density']:.2f}")
|
|
210
196
|
```
|
|
211
197
|
|
|
212
198
|
#### Access Sustainability Patterns Data
|
|
@@ -242,7 +228,7 @@ print(f"Available categories: {sorted(categories)}")
|
|
|
242
228
|
# 'monitoring', 'network', 'networking', 'resource', 'web']
|
|
243
229
|
```
|
|
244
230
|
|
|
245
|
-
#### Advanced Analysis: Temporal Trends
|
|
231
|
+
#### Advanced Analysis: Temporal Trends
|
|
246
232
|
|
|
247
233
|
```python
|
|
248
234
|
from greenmining.services.data_aggregator import DataAggregator
|
|
@@ -374,7 +360,7 @@ repositories = fetch_repositories(
|
|
|
374
360
|
min_stars=10,
|
|
375
361
|
keywords="software engineering",
|
|
376
362
|
)
|
|
377
|
-
print(f"
|
|
363
|
+
print(f"Fetched {len(repositories)} repositories")
|
|
378
364
|
|
|
379
365
|
# STAGE 2: Extract Commits
|
|
380
366
|
print("\nExtracting commits...")
|
|
@@ -386,7 +372,7 @@ extractor = CommitExtractor(
|
|
|
386
372
|
timeout=120,
|
|
387
373
|
)
|
|
388
374
|
all_commits = extractor.extract_from_repositories(repositories)
|
|
389
|
-
print(f"
|
|
375
|
+
print(f"Extracted {len(all_commits)} commits")
|
|
390
376
|
|
|
391
377
|
# Save commits
|
|
392
378
|
extractor.save_results(
|
|
@@ -405,8 +391,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
|
|
|
405
391
|
# Count green-aware commits
|
|
406
392
|
green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
|
|
407
393
|
green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
|
|
408
|
-
print(f"
|
|
409
|
-
print(f"
|
|
394
|
+
print(f"Analyzed {len(analyzed_commits)} commits")
|
|
395
|
+
print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
|
|
410
396
|
|
|
411
397
|
# Save analysis
|
|
412
398
|
analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
|
|
@@ -434,7 +420,7 @@ print("\n" + "="*80)
|
|
|
434
420
|
print("ANALYSIS COMPLETE")
|
|
435
421
|
print("="*80)
|
|
436
422
|
aggregator.print_summary(results)
|
|
437
|
-
print(f"\
|
|
423
|
+
print(f"\nResults saved in: {output_dir.absolute()}")
|
|
438
424
|
```
|
|
439
425
|
|
|
440
426
|
**What this example does:**
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "greenmining"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.5"
|
|
8
8
|
description = "Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -60,7 +60,7 @@ dev = [
|
|
|
60
60
|
"black>=23.12.0",
|
|
61
61
|
"ruff>=0.1.9",
|
|
62
62
|
"mypy>=1.8.0",
|
|
63
|
-
"build>=1.0.
|
|
63
|
+
"build>=1.0.5",
|
|
64
64
|
"twine>=4.0.2"
|
|
65
65
|
]
|
|
66
66
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|