greenmining 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +1 -1
- greenmining/__version__.py +1 -1
- greenmining/services/commit_extractor.py +12 -8
- greenmining/services/data_aggregator.py +8 -2
- {greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/METADATA +114 -1
- {greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/RECORD +10 -10
- {greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/WHEEL +0 -0
- {greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/entry_points.txt +0 -0
- {greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/top_level.txt +0 -0
greenmining/__init__.py
CHANGED
greenmining/__version__.py
CHANGED
|
@@ -12,6 +12,7 @@ from github import Github
|
|
|
12
12
|
from tqdm import tqdm
|
|
13
13
|
|
|
14
14
|
from greenmining.config import get_config
|
|
15
|
+
from greenmining.models.repository import Repository
|
|
15
16
|
from greenmining.utils import (
|
|
16
17
|
colored_print,
|
|
17
18
|
format_timestamp,
|
|
@@ -49,11 +50,11 @@ class CommitExtractor:
|
|
|
49
50
|
self.github = Github(github_token) if github_token else None
|
|
50
51
|
self.timeout = timeout
|
|
51
52
|
|
|
52
|
-
def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
53
|
+
def extract_from_repositories(self, repositories: list[dict[str, Any] | Repository]) -> list[dict[str, Any]]:
|
|
53
54
|
"""Extract commits from list of repositories.
|
|
54
55
|
|
|
55
56
|
Args:
|
|
56
|
-
repositories: List of repository metadata
|
|
57
|
+
repositories: List of repository metadata (dicts or Repository objects)
|
|
57
58
|
|
|
58
59
|
Returns:
|
|
59
60
|
List of commit data dictionaries
|
|
@@ -89,15 +90,17 @@ class CommitExtractor:
|
|
|
89
90
|
pbar.update(1)
|
|
90
91
|
except TimeoutError:
|
|
91
92
|
signal.alarm(0) # Cancel alarm
|
|
93
|
+
repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
|
|
92
94
|
colored_print(
|
|
93
|
-
f"\nTimeout processing {
|
|
95
|
+
f"\nTimeout processing {repo_name} (>{self.timeout}s)", "yellow"
|
|
94
96
|
)
|
|
95
|
-
failed_repos.append(
|
|
97
|
+
failed_repos.append(repo_name)
|
|
96
98
|
pbar.update(1)
|
|
97
99
|
except Exception as e:
|
|
98
100
|
signal.alarm(0) # Cancel alarm
|
|
99
|
-
|
|
100
|
-
|
|
101
|
+
repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
|
|
102
|
+
colored_print(f"\nError processing {repo_name}: {e}", "yellow")
|
|
103
|
+
failed_repos.append(repo_name)
|
|
101
104
|
pbar.update(1)
|
|
102
105
|
|
|
103
106
|
if failed_repos:
|
|
@@ -114,13 +117,14 @@ class CommitExtractor:
|
|
|
114
117
|
"""Extract commits from a single repository using GitHub API.
|
|
115
118
|
|
|
116
119
|
Args:
|
|
117
|
-
repo: Repository metadata
|
|
120
|
+
repo: Repository metadata (dict or Repository object)
|
|
118
121
|
|
|
119
122
|
Returns:
|
|
120
123
|
List of commit dictionaries
|
|
121
124
|
"""
|
|
122
125
|
commits = []
|
|
123
|
-
|
|
126
|
+
# Handle both Repository objects and dicts
|
|
127
|
+
repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
|
|
124
128
|
|
|
125
129
|
try:
|
|
126
130
|
# Get repository from GitHub API
|
|
@@ -16,6 +16,7 @@ from greenmining.analyzers import (
|
|
|
16
16
|
QualitativeAnalyzer,
|
|
17
17
|
)
|
|
18
18
|
from greenmining.config import get_config
|
|
19
|
+
from greenmining.models.repository import Repository
|
|
19
20
|
from greenmining.utils import (
|
|
20
21
|
colored_print,
|
|
21
22
|
format_number,
|
|
@@ -270,8 +271,13 @@ class DataAggregator:
|
|
|
270
271
|
self, results: list[dict[str, Any]], repos: list[dict[str, Any]]
|
|
271
272
|
) -> list[dict[str, Any]]:
|
|
272
273
|
"""Generate per-language statistics."""
|
|
273
|
-
# Create repo name to language mapping
|
|
274
|
-
repo_language_map = {
|
|
274
|
+
# Create repo name to language mapping (handle both Repository objects and dicts)
|
|
275
|
+
repo_language_map = {}
|
|
276
|
+
for repo in repos:
|
|
277
|
+
if isinstance(repo, Repository):
|
|
278
|
+
repo_language_map[repo.full_name] = repo.language or "Unknown"
|
|
279
|
+
else:
|
|
280
|
+
repo_language_map[repo["full_name"]] = repo.get("language", "Unknown")
|
|
275
281
|
|
|
276
282
|
# Group commits by language
|
|
277
283
|
language_commits = defaultdict(list)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -397,6 +397,119 @@ controller.generate_report()
|
|
|
397
397
|
print("Analysis complete! Check data/ directory for results.")
|
|
398
398
|
```
|
|
399
399
|
|
|
400
|
+
#### Complete Working Example: Full Pipeline
|
|
401
|
+
|
|
402
|
+
This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
|
|
403
|
+
|
|
404
|
+
```python
|
|
405
|
+
import os
|
|
406
|
+
from pathlib import Path
|
|
407
|
+
from dotenv import load_dotenv
|
|
408
|
+
|
|
409
|
+
# Load environment variables
|
|
410
|
+
load_dotenv()
|
|
411
|
+
|
|
412
|
+
# Import from greenmining package
|
|
413
|
+
from greenmining import fetch_repositories
|
|
414
|
+
from greenmining.services.commit_extractor import CommitExtractor
|
|
415
|
+
from greenmining.services.data_analyzer import DataAnalyzer
|
|
416
|
+
from greenmining.services.data_aggregator import DataAggregator
|
|
417
|
+
|
|
418
|
+
# Configuration
|
|
419
|
+
token = os.getenv("GITHUB_TOKEN")
|
|
420
|
+
output_dir = Path("results")
|
|
421
|
+
output_dir.mkdir(exist_ok=True)
|
|
422
|
+
|
|
423
|
+
# STAGE 1: Fetch Repositories
|
|
424
|
+
print("Fetching repositories...")
|
|
425
|
+
repositories = fetch_repositories(
|
|
426
|
+
github_token=token,
|
|
427
|
+
max_repos=100,
|
|
428
|
+
min_stars=10,
|
|
429
|
+
keywords="software engineering",
|
|
430
|
+
)
|
|
431
|
+
print(f"✓ Fetched {len(repositories)} repositories")
|
|
432
|
+
|
|
433
|
+
# STAGE 2: Extract Commits
|
|
434
|
+
print("\nExtracting commits...")
|
|
435
|
+
extractor = CommitExtractor(
|
|
436
|
+
github_token=token,
|
|
437
|
+
max_commits=1000,
|
|
438
|
+
skip_merges=True,
|
|
439
|
+
days_back=730,
|
|
440
|
+
timeout=120,
|
|
441
|
+
)
|
|
442
|
+
all_commits = extractor.extract_from_repositories(repositories)
|
|
443
|
+
print(f"✓ Extracted {len(all_commits)} commits")
|
|
444
|
+
|
|
445
|
+
# Save commits
|
|
446
|
+
extractor.save_results(
|
|
447
|
+
all_commits,
|
|
448
|
+
output_dir / "commits.json",
|
|
449
|
+
len(repositories)
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# STAGE 3: Analyze Commits
|
|
453
|
+
print("\nAnalyzing commits...")
|
|
454
|
+
analyzer = DataAnalyzer(
|
|
455
|
+
enable_nlp=True,
|
|
456
|
+
enable_ml_features=True,
|
|
457
|
+
enable_diff_analysis=False, # Set to True for detailed code analysis (slower)
|
|
458
|
+
)
|
|
459
|
+
analyzed_commits = analyzer.analyze_commits(all_commits)
|
|
460
|
+
|
|
461
|
+
# Count green-aware commits
|
|
462
|
+
green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
|
|
463
|
+
green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
|
|
464
|
+
print(f"✓ Analyzed {len(analyzed_commits)} commits")
|
|
465
|
+
print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
|
|
466
|
+
|
|
467
|
+
# Save analysis
|
|
468
|
+
analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
|
|
469
|
+
|
|
470
|
+
# STAGE 4: Aggregate Results
|
|
471
|
+
print("\nAggregating results...")
|
|
472
|
+
aggregator = DataAggregator(
|
|
473
|
+
enable_enhanced_stats=True,
|
|
474
|
+
enable_temporal=True,
|
|
475
|
+
temporal_granularity="quarter",
|
|
476
|
+
)
|
|
477
|
+
results = aggregator.aggregate(analyzed_commits, repositories)
|
|
478
|
+
|
|
479
|
+
# STAGE 5: Save Results
|
|
480
|
+
print("\nSaving results...")
|
|
481
|
+
aggregator.save_results(
|
|
482
|
+
results,
|
|
483
|
+
output_dir / "aggregated.json",
|
|
484
|
+
output_dir / "aggregated.csv",
|
|
485
|
+
analyzed_commits
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# Print summary
|
|
489
|
+
print("\n" + "="*80)
|
|
490
|
+
print("ANALYSIS COMPLETE")
|
|
491
|
+
print("="*80)
|
|
492
|
+
aggregator.print_summary(results)
|
|
493
|
+
print(f"\n📁 Results saved in: {output_dir.absolute()}")
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
**What this example does:**
|
|
497
|
+
|
|
498
|
+
1. **Fetches repositories** from GitHub based on keywords and filters
|
|
499
|
+
2. **Extracts commits** from each repository (up to 1000 per repo)
|
|
500
|
+
3. **Analyzes commits** for green software patterns using NLP and ML
|
|
501
|
+
4. **Aggregates results** with temporal analysis and enhanced statistics
|
|
502
|
+
5. **Saves results** to JSON and CSV files for further analysis
|
|
503
|
+
|
|
504
|
+
**Expected output files:**
|
|
505
|
+
- `commits.json` - All extracted commits with metadata
|
|
506
|
+
- `analyzed.json` - Commits analyzed for green patterns
|
|
507
|
+
- `aggregated.json` - Summary statistics and pattern distributions
|
|
508
|
+
- `aggregated.csv` - Tabular format for spreadsheet analysis
|
|
509
|
+
- `metadata.json` - Experiment configuration and timing
|
|
510
|
+
|
|
511
|
+
**Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
|
|
512
|
+
|
|
400
513
|
### Docker Usage
|
|
401
514
|
|
|
402
515
|
```bash
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256=
|
|
1
|
+
greenmining/__init__.py,sha256=p_pk0TmyP34o97wTYVTHkXe7qpGtH43GUVD_iCadrYY,1763
|
|
2
2
|
greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
|
|
3
|
-
greenmining/__version__.py,sha256=
|
|
3
|
+
greenmining/__version__.py,sha256=3OgUZ5K2OXa9_-2kjlgye1N6G_QeQDeex2uw33Ja6Cs,66
|
|
4
4
|
greenmining/cli.py,sha256=40eKDEZHNeDVb91xKBG70VfPk45mwb4YjuVCC2efVPA,17458
|
|
5
5
|
greenmining/config.py,sha256=1_puT52zNS589hTxEZ3UCqRC_Qw5Jw2UupUPNbNz_hs,5195
|
|
6
6
|
greenmining/gsf_patterns.py,sha256=Prsk_stnQrfOsk0x0zn-zdevbueAnPfGDM4XNA9PbdA,54664
|
|
@@ -23,14 +23,14 @@ greenmining/models/repository.py,sha256=k1X9UYZYLl0RznohOHx_Y5wur-ZBvLcNyc9vPVAr
|
|
|
23
23
|
greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
|
|
24
24
|
greenmining/presenters/console_presenter.py,sha256=ykJ9Hgors2dRTqQNaqCTxH4fd49F0AslQTgUOr_csI0,5347
|
|
25
25
|
greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
|
|
26
|
-
greenmining/services/commit_extractor.py,sha256=
|
|
27
|
-
greenmining/services/data_aggregator.py,sha256=
|
|
26
|
+
greenmining/services/commit_extractor.py,sha256=FSgoHpMvoqjZ6b1UQYtwfUaLVX_GDfiR0BVd51y-gYk,13126
|
|
27
|
+
greenmining/services/data_aggregator.py,sha256=OqJvQZp9xaZaSmbwWoiHAHECAghd8agbhVmStDvebOU,24054
|
|
28
28
|
greenmining/services/data_analyzer.py,sha256=HZDQLFZDCwCUGIzRjypyXC09Fl_-zaxhly74n3siwQc,16325
|
|
29
29
|
greenmining/services/github_fetcher.py,sha256=J47-plM_NKXwHDSWNBuSUZMnZnGP6wXiJyrVfeWT9ug,11360
|
|
30
30
|
greenmining/services/reports.py,sha256=NCNI9SCTnSLeAO8WmkNIdkB0hr-XyVpuzV0sovOoUOM,27107
|
|
31
|
-
greenmining-1.0.
|
|
32
|
-
greenmining-1.0.
|
|
33
|
-
greenmining-1.0.
|
|
34
|
-
greenmining-1.0.
|
|
35
|
-
greenmining-1.0.
|
|
36
|
-
greenmining-1.0.
|
|
31
|
+
greenmining-1.0.3.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
32
|
+
greenmining-1.0.3.dist-info/METADATA,sha256=0Hj5qXVUkuJhIUBZBRhSysc3zx6L3py0HpZg9vKcl7Y,29260
|
|
33
|
+
greenmining-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
34
|
+
greenmining-1.0.3.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
|
|
35
|
+
greenmining-1.0.3.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
36
|
+
greenmining-1.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|