greenmining 1.0.5__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {greenmining-1.0.5 → greenmining-1.0.7}/CHANGELOG.md +0 -1
- {greenmining-1.0.5/greenmining.egg-info → greenmining-1.0.7}/PKG-INFO +212 -43
- {greenmining-1.0.5 → greenmining-1.0.7}/README.md +204 -40
- greenmining-1.0.7/greenmining/__init__.py +95 -0
- greenmining-1.0.7/greenmining/analyzers/__init__.py +22 -0
- greenmining-1.0.7/greenmining/analyzers/metrics_power_correlator.py +165 -0
- greenmining-1.0.7/greenmining/analyzers/power_regression.py +212 -0
- greenmining-1.0.7/greenmining/analyzers/version_power_analyzer.py +246 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/config.py +46 -34
- greenmining-1.0.7/greenmining/dashboard/__init__.py +5 -0
- greenmining-1.0.7/greenmining/dashboard/app.py +200 -0
- greenmining-1.0.7/greenmining/energy/__init__.py +20 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/energy/base.py +45 -35
- greenmining-1.0.7/greenmining/energy/carbon_reporter.py +242 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/energy/codecarbon_meter.py +25 -24
- greenmining-1.0.7/greenmining/energy/cpu_meter.py +144 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/energy/rapl.py +30 -36
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/__init__.py +13 -3
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/commit_extractor.py +9 -5
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/local_repo_analyzer.py +325 -63
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/reports.py +5 -8
- {greenmining-1.0.5 → greenmining-1.0.7/greenmining.egg-info}/PKG-INFO +212 -43
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/SOURCES.txt +14 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/requires.txt +7 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/pyproject.toml +18 -6
- greenmining-1.0.5/greenmining/__init__.py +0 -43
- greenmining-1.0.5/greenmining/analyzers/__init__.py +0 -13
- greenmining-1.0.5/greenmining/energy/__init__.py +0 -13
- {greenmining-1.0.5 → greenmining-1.0.7}/LICENSE +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/MANIFEST.in +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/__main__.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/__version__.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/code_diff_analyzer.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/qualitative_analyzer.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/statistical_analyzer.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/temporal_analyzer.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/controllers/__init__.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/controllers/repository_controller.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/gsf_patterns.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/__init__.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/aggregated_stats.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/analysis_result.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/commit.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/repository.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/presenters/__init__.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/presenters/console_presenter.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/data_aggregator.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/data_analyzer.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/github_fetcher.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/github_graphql_fetcher.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/utils.py +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/dependency_links.txt +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/top_level.txt +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/setup.cfg +0 -0
- {greenmining-1.0.5 → greenmining-1.0.7}/setup.py +0 -0
|
@@ -84,7 +84,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
84
84
|
- Pattern matching engine
|
|
85
85
|
- Green awareness detection
|
|
86
86
|
- Data analysis and reporting
|
|
87
|
-
- CLI interface with Click
|
|
88
87
|
- Docker support with multi-stage builds
|
|
89
88
|
- GitHub Actions CI/CD pipeline
|
|
90
89
|
- PyPI publishing workflow
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 1.0.7
|
|
4
|
+
Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/adam-bouafia/greenmining
|
|
@@ -9,7 +9,7 @@ Project-URL: Documentation, https://github.com/adam-bouafia/greenmining#readme
|
|
|
9
9
|
Project-URL: Repository, https://github.com/adam-bouafia/greenmining
|
|
10
10
|
Project-URL: Issues, https://github.com/adam-bouafia/greenmining/issues
|
|
11
11
|
Project-URL: Changelog, https://github.com/adam-bouafia/greenmining/blob/main/CHANGELOG.md
|
|
12
|
-
Keywords: green-software,gsf,sustainability,carbon-footprint,
|
|
12
|
+
Keywords: green-software,gsf,msr,mining-software-repositories,green-it,sustainability,carbon-footprint,energy-efficiency,repository-analysis,github-analysis,pydriller,empirical-software-engineering
|
|
13
13
|
Classifier: Development Status :: 3 - Alpha
|
|
14
14
|
Classifier: Intended Audience :: Developers
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -45,6 +45,11 @@ Requires-Dist: ruff>=0.1.9; extra == "dev"
|
|
|
45
45
|
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
46
46
|
Requires-Dist: build>=1.0.5; extra == "dev"
|
|
47
47
|
Requires-Dist: twine>=4.0.2; extra == "dev"
|
|
48
|
+
Provides-Extra: energy
|
|
49
|
+
Requires-Dist: psutil>=5.9.0; extra == "energy"
|
|
50
|
+
Requires-Dist: codecarbon>=2.3.0; extra == "energy"
|
|
51
|
+
Provides-Extra: dashboard
|
|
52
|
+
Requires-Dist: flask>=3.0.0; extra == "dashboard"
|
|
48
53
|
Provides-Extra: docs
|
|
49
54
|
Requires-Dist: sphinx>=7.2.0; extra == "docs"
|
|
50
55
|
Requires-Dist: sphinx-rtd-theme>=2.0.0; extra == "docs"
|
|
@@ -53,15 +58,30 @@ Dynamic: license-file
|
|
|
53
58
|
|
|
54
59
|
# greenmining
|
|
55
60
|
|
|
56
|
-
|
|
61
|
+
An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
|
|
57
62
|
|
|
58
63
|
[](https://pypi.org/project/greenmining/)
|
|
59
64
|
[](https://pypi.org/project/greenmining/)
|
|
60
65
|
[](LICENSE)
|
|
66
|
+
[](https://greenmining.readthedocs.io/)
|
|
61
67
|
|
|
62
68
|
## Overview
|
|
63
69
|
|
|
64
|
-
`greenmining` is a Python library for
|
|
70
|
+
`greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
|
|
71
|
+
|
|
72
|
+
- **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
73
|
+
- **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
|
|
74
|
+
- **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
|
|
75
|
+
- **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
|
|
76
|
+
- **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
|
|
77
|
+
- **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
|
|
78
|
+
- **Power regression detection** - Identify commits that increased energy consumption
|
|
79
|
+
- **Method-level analysis** - Per-method complexity and metrics via Lizard integration
|
|
80
|
+
- **Version power comparison** - Compare power consumption across software versions
|
|
81
|
+
- **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
|
|
82
|
+
- **Web dashboard** - Flask-based interactive visualization of analysis results
|
|
83
|
+
|
|
84
|
+
Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
|
|
65
85
|
|
|
66
86
|
## Installation
|
|
67
87
|
|
|
@@ -310,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
|
|
|
310
330
|
aggregator.export_to_csv(results, "output.csv")
|
|
311
331
|
```
|
|
312
332
|
|
|
313
|
-
####
|
|
333
|
+
#### URL-Based Repository Analysis
|
|
334
|
+
|
|
335
|
+
```python
|
|
336
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
337
|
+
|
|
338
|
+
analyzer = LocalRepoAnalyzer(
|
|
339
|
+
max_commits=200,
|
|
340
|
+
cleanup_after=True,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
344
|
+
|
|
345
|
+
print(f"Repository: {result.name}")
|
|
346
|
+
print(f"Commits analyzed: {result.total_commits}")
|
|
347
|
+
print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
|
|
348
|
+
|
|
349
|
+
for commit in result.commits[:5]:
|
|
350
|
+
if commit.green_aware:
|
|
351
|
+
print(f" {commit.message[:60]}...")
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
#### Batch Analysis with Parallelism
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
from greenmining import analyze_repositories
|
|
358
|
+
|
|
359
|
+
results = analyze_repositories(
|
|
360
|
+
urls=[
|
|
361
|
+
"https://github.com/kubernetes/kubernetes",
|
|
362
|
+
"https://github.com/istio/istio",
|
|
363
|
+
"https://github.com/envoyproxy/envoy",
|
|
364
|
+
],
|
|
365
|
+
max_commits=100,
|
|
366
|
+
parallel_workers=3,
|
|
367
|
+
energy_tracking=True,
|
|
368
|
+
energy_backend="auto",
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
for result in results:
|
|
372
|
+
print(f"{result.name}: {result.green_commit_rate:.1%} green")
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
#### Private Repository Analysis
|
|
376
|
+
|
|
377
|
+
```python
|
|
378
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
379
|
+
|
|
380
|
+
# HTTPS with token
|
|
381
|
+
analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
|
|
382
|
+
result = analyzer.analyze_repository("https://github.com/company/private-repo")
|
|
383
|
+
|
|
384
|
+
# SSH with key
|
|
385
|
+
analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
|
|
386
|
+
result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
#### Power Regression Detection
|
|
390
|
+
|
|
391
|
+
```python
|
|
392
|
+
from greenmining.analyzers import PowerRegressionDetector
|
|
393
|
+
|
|
394
|
+
detector = PowerRegressionDetector(
|
|
395
|
+
test_command="pytest tests/ -x",
|
|
396
|
+
energy_backend="rapl",
|
|
397
|
+
threshold_percent=5.0,
|
|
398
|
+
iterations=5,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
regressions = detector.detect(
|
|
402
|
+
repo_path="/path/to/repo",
|
|
403
|
+
baseline_commit="v1.0.0",
|
|
404
|
+
target_commit="HEAD",
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
for regression in regressions:
|
|
408
|
+
print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
#### Version Power Comparison
|
|
412
|
+
|
|
413
|
+
```python
|
|
414
|
+
from greenmining.analyzers import VersionPowerAnalyzer
|
|
415
|
+
|
|
416
|
+
analyzer = VersionPowerAnalyzer(
|
|
417
|
+
test_command="pytest tests/",
|
|
418
|
+
energy_backend="rapl",
|
|
419
|
+
iterations=10,
|
|
420
|
+
warmup_iterations=2,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
report = analyzer.analyze_versions(
|
|
424
|
+
repo_path="/path/to/repo",
|
|
425
|
+
versions=["v1.0", "v1.1", "v1.2", "v2.0"],
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
print(report.summary())
|
|
429
|
+
print(f"Trend: {report.trend}")
|
|
430
|
+
print(f"Most efficient: {report.most_efficient}")
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
#### Metrics-to-Power Correlation
|
|
434
|
+
|
|
435
|
+
```python
|
|
436
|
+
from greenmining.analyzers import MetricsPowerCorrelator
|
|
437
|
+
|
|
438
|
+
correlator = MetricsPowerCorrelator()
|
|
439
|
+
correlator.fit(
|
|
440
|
+
metrics=["complexity", "nloc", "code_churn"],
|
|
441
|
+
metrics_values={
|
|
442
|
+
"complexity": [10, 20, 30, 40],
|
|
443
|
+
"nloc": [100, 200, 300, 400],
|
|
444
|
+
"code_churn": [50, 100, 150, 200],
|
|
445
|
+
},
|
|
446
|
+
power_measurements=[5.0, 8.0, 12.0, 15.0],
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
print(f"Pearson: {correlator.pearson}")
|
|
450
|
+
print(f"Spearman: {correlator.spearman}")
|
|
451
|
+
print(f"Feature importance: {correlator.feature_importance}")
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
#### Web Dashboard
|
|
455
|
+
|
|
456
|
+
```python
|
|
457
|
+
from greenmining.dashboard import run_dashboard
|
|
458
|
+
|
|
459
|
+
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
460
|
+
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
#### Pipeline Batch Analysis
|
|
314
464
|
|
|
315
465
|
```python
|
|
316
466
|
from greenmining.controllers.repository_controller import RepositoryController
|
|
@@ -531,17 +681,24 @@ config = Config(
|
|
|
531
681
|
|
|
532
682
|
### Core Capabilities
|
|
533
683
|
|
|
534
|
-
- **Pattern Detection**:
|
|
535
|
-
- **Keyword Analysis**:
|
|
536
|
-
- **
|
|
537
|
-
- **
|
|
538
|
-
- **Batch Processing**:
|
|
539
|
-
- **
|
|
540
|
-
- **
|
|
684
|
+
- **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
|
|
685
|
+
- **Keyword Analysis**: 321 green software detection keywords
|
|
686
|
+
- **Repository Fetching**: GraphQL API with date, star, and language filters
|
|
687
|
+
- **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
|
|
688
|
+
- **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
|
|
689
|
+
- **Private Repository Support**: Authentication via SSH keys or GitHub tokens
|
|
690
|
+
- **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
|
|
691
|
+
- **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
|
|
692
|
+
- **Power Regression Detection**: Identify commits that increased energy consumption
|
|
693
|
+
- **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
|
|
694
|
+
- **Version Power Comparison**: Compare power consumption across software versions with trend detection
|
|
695
|
+
- **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
|
|
696
|
+
- **Source Code Access**: Before/after source code for refactoring detection
|
|
697
|
+
- **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
698
|
+
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
699
|
+
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
700
|
+
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
541
701
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
542
|
-
- **Programmatic API**: Full Python API for custom workflows and integrations
|
|
543
|
-
- **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
|
|
544
|
-
- **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
|
|
545
702
|
|
|
546
703
|
### Energy Measurement
|
|
547
704
|
|
|
@@ -553,38 +710,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
|
|
|
553
710
|
|---------|----------|---------|--------------|
|
|
554
711
|
| **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
|
|
555
712
|
| **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
|
|
713
|
+
| **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
|
|
714
|
+
| **Auto** | All platforms | Best available backend | Automatic detection |
|
|
556
715
|
|
|
557
716
|
#### Python API
|
|
558
717
|
|
|
559
718
|
```python
|
|
560
|
-
from greenmining.energy import RAPLEnergyMeter,
|
|
561
|
-
|
|
562
|
-
#
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
#
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
print(f"Energy: {result.energy_joules:.2f} J")
|
|
577
|
-
print(f"Carbon: {result.carbon_grams:.4f} gCO2")
|
|
719
|
+
from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
|
|
720
|
+
|
|
721
|
+
# Auto-detect best backend
|
|
722
|
+
meter = get_energy_meter("auto")
|
|
723
|
+
meter.start()
|
|
724
|
+
# ... run analysis ...
|
|
725
|
+
result = meter.stop()
|
|
726
|
+
print(f"Energy: {result.joules:.2f} J")
|
|
727
|
+
print(f"Power: {result.watts_avg:.2f} W")
|
|
728
|
+
|
|
729
|
+
# Integrated energy tracking during analysis
|
|
730
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
731
|
+
|
|
732
|
+
analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
|
|
733
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
734
|
+
print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
|
|
578
735
|
```
|
|
579
736
|
|
|
580
|
-
####
|
|
737
|
+
#### Carbon Footprint Reporting
|
|
581
738
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
739
|
+
```python
|
|
740
|
+
from greenmining.energy import CarbonReporter
|
|
741
|
+
|
|
742
|
+
reporter = CarbonReporter(
|
|
743
|
+
country_iso="USA",
|
|
744
|
+
cloud_provider="aws",
|
|
745
|
+
region="us-east-1",
|
|
746
|
+
)
|
|
747
|
+
report = reporter.generate_report(total_joules=3600.0)
|
|
748
|
+
print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
|
|
749
|
+
print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
|
|
750
|
+
```
|
|
588
751
|
|
|
589
752
|
### Pattern Database
|
|
590
753
|
|
|
@@ -687,8 +850,14 @@ ruff check greenmining/ tests/
|
|
|
687
850
|
- PyGithub >= 2.1.1
|
|
688
851
|
- PyDriller >= 2.5
|
|
689
852
|
- pandas >= 2.2.0
|
|
690
|
-
|
|
691
|
-
|
|
853
|
+
|
|
854
|
+
**Optional dependencies:**
|
|
855
|
+
|
|
856
|
+
```bash
|
|
857
|
+
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
858
|
+
pip install greenmining[dashboard] # flask (web dashboard)
|
|
859
|
+
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
860
|
+
```
|
|
692
861
|
|
|
693
862
|
## License
|
|
694
863
|
|
|
@@ -1,14 +1,29 @@
|
|
|
1
1
|
# greenmining
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
|
|
4
4
|
|
|
5
5
|
[](https://pypi.org/project/greenmining/)
|
|
6
6
|
[](https://pypi.org/project/greenmining/)
|
|
7
7
|
[](LICENSE)
|
|
8
|
+
[](https://greenmining.readthedocs.io/)
|
|
8
9
|
|
|
9
10
|
## Overview
|
|
10
11
|
|
|
11
|
-
`greenmining` is a Python library for
|
|
12
|
+
`greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
|
|
13
|
+
|
|
14
|
+
- **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
15
|
+
- **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
|
|
16
|
+
- **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
|
|
17
|
+
- **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
|
|
18
|
+
- **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
|
|
19
|
+
- **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
|
|
20
|
+
- **Power regression detection** - Identify commits that increased energy consumption
|
|
21
|
+
- **Method-level analysis** - Per-method complexity and metrics via Lizard integration
|
|
22
|
+
- **Version power comparison** - Compare power consumption across software versions
|
|
23
|
+
- **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
|
|
24
|
+
- **Web dashboard** - Flask-based interactive visualization of analysis results
|
|
25
|
+
|
|
26
|
+
Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
|
|
12
27
|
|
|
13
28
|
## Installation
|
|
14
29
|
|
|
@@ -257,7 +272,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
|
|
|
257
272
|
aggregator.export_to_csv(results, "output.csv")
|
|
258
273
|
```
|
|
259
274
|
|
|
260
|
-
####
|
|
275
|
+
#### URL-Based Repository Analysis
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
279
|
+
|
|
280
|
+
analyzer = LocalRepoAnalyzer(
|
|
281
|
+
max_commits=200,
|
|
282
|
+
cleanup_after=True,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
286
|
+
|
|
287
|
+
print(f"Repository: {result.name}")
|
|
288
|
+
print(f"Commits analyzed: {result.total_commits}")
|
|
289
|
+
print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
|
|
290
|
+
|
|
291
|
+
for commit in result.commits[:5]:
|
|
292
|
+
if commit.green_aware:
|
|
293
|
+
print(f" {commit.message[:60]}...")
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
#### Batch Analysis with Parallelism
|
|
297
|
+
|
|
298
|
+
```python
|
|
299
|
+
from greenmining import analyze_repositories
|
|
300
|
+
|
|
301
|
+
results = analyze_repositories(
|
|
302
|
+
urls=[
|
|
303
|
+
"https://github.com/kubernetes/kubernetes",
|
|
304
|
+
"https://github.com/istio/istio",
|
|
305
|
+
"https://github.com/envoyproxy/envoy",
|
|
306
|
+
],
|
|
307
|
+
max_commits=100,
|
|
308
|
+
parallel_workers=3,
|
|
309
|
+
energy_tracking=True,
|
|
310
|
+
energy_backend="auto",
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
for result in results:
|
|
314
|
+
print(f"{result.name}: {result.green_commit_rate:.1%} green")
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
#### Private Repository Analysis
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
321
|
+
|
|
322
|
+
# HTTPS with token
|
|
323
|
+
analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
|
|
324
|
+
result = analyzer.analyze_repository("https://github.com/company/private-repo")
|
|
325
|
+
|
|
326
|
+
# SSH with key
|
|
327
|
+
analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
|
|
328
|
+
result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
#### Power Regression Detection
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
from greenmining.analyzers import PowerRegressionDetector
|
|
335
|
+
|
|
336
|
+
detector = PowerRegressionDetector(
|
|
337
|
+
test_command="pytest tests/ -x",
|
|
338
|
+
energy_backend="rapl",
|
|
339
|
+
threshold_percent=5.0,
|
|
340
|
+
iterations=5,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
regressions = detector.detect(
|
|
344
|
+
repo_path="/path/to/repo",
|
|
345
|
+
baseline_commit="v1.0.0",
|
|
346
|
+
target_commit="HEAD",
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
for regression in regressions:
|
|
350
|
+
print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
#### Version Power Comparison
|
|
354
|
+
|
|
355
|
+
```python
|
|
356
|
+
from greenmining.analyzers import VersionPowerAnalyzer
|
|
357
|
+
|
|
358
|
+
analyzer = VersionPowerAnalyzer(
|
|
359
|
+
test_command="pytest tests/",
|
|
360
|
+
energy_backend="rapl",
|
|
361
|
+
iterations=10,
|
|
362
|
+
warmup_iterations=2,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
report = analyzer.analyze_versions(
|
|
366
|
+
repo_path="/path/to/repo",
|
|
367
|
+
versions=["v1.0", "v1.1", "v1.2", "v2.0"],
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
print(report.summary())
|
|
371
|
+
print(f"Trend: {report.trend}")
|
|
372
|
+
print(f"Most efficient: {report.most_efficient}")
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
#### Metrics-to-Power Correlation
|
|
376
|
+
|
|
377
|
+
```python
|
|
378
|
+
from greenmining.analyzers import MetricsPowerCorrelator
|
|
379
|
+
|
|
380
|
+
correlator = MetricsPowerCorrelator()
|
|
381
|
+
correlator.fit(
|
|
382
|
+
metrics=["complexity", "nloc", "code_churn"],
|
|
383
|
+
metrics_values={
|
|
384
|
+
"complexity": [10, 20, 30, 40],
|
|
385
|
+
"nloc": [100, 200, 300, 400],
|
|
386
|
+
"code_churn": [50, 100, 150, 200],
|
|
387
|
+
},
|
|
388
|
+
power_measurements=[5.0, 8.0, 12.0, 15.0],
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
print(f"Pearson: {correlator.pearson}")
|
|
392
|
+
print(f"Spearman: {correlator.spearman}")
|
|
393
|
+
print(f"Feature importance: {correlator.feature_importance}")
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
#### Web Dashboard
|
|
397
|
+
|
|
398
|
+
```python
|
|
399
|
+
from greenmining.dashboard import run_dashboard
|
|
400
|
+
|
|
401
|
+
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
402
|
+
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
#### Pipeline Batch Analysis
|
|
261
406
|
|
|
262
407
|
```python
|
|
263
408
|
from greenmining.controllers.repository_controller import RepositoryController
|
|
@@ -478,17 +623,24 @@ config = Config(
|
|
|
478
623
|
|
|
479
624
|
### Core Capabilities
|
|
480
625
|
|
|
481
|
-
- **Pattern Detection**:
|
|
482
|
-
- **Keyword Analysis**:
|
|
483
|
-
- **
|
|
484
|
-
- **
|
|
485
|
-
- **Batch Processing**:
|
|
486
|
-
- **
|
|
487
|
-
- **
|
|
626
|
+
- **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
|
|
627
|
+
- **Keyword Analysis**: 321 green software detection keywords
|
|
628
|
+
- **Repository Fetching**: GraphQL API with date, star, and language filters
|
|
629
|
+
- **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
|
|
630
|
+
- **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
|
|
631
|
+
- **Private Repository Support**: Authentication via SSH keys or GitHub tokens
|
|
632
|
+
- **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
|
|
633
|
+
- **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
|
|
634
|
+
- **Power Regression Detection**: Identify commits that increased energy consumption
|
|
635
|
+
- **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
|
|
636
|
+
- **Version Power Comparison**: Compare power consumption across software versions with trend detection
|
|
637
|
+
- **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
|
|
638
|
+
- **Source Code Access**: Before/after source code for refactoring detection
|
|
639
|
+
- **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
640
|
+
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
641
|
+
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
642
|
+
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
488
643
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
489
|
-
- **Programmatic API**: Full Python API for custom workflows and integrations
|
|
490
|
-
- **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
|
|
491
|
-
- **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
|
|
492
644
|
|
|
493
645
|
### Energy Measurement
|
|
494
646
|
|
|
@@ -500,38 +652,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
|
|
|
500
652
|
|---------|----------|---------|--------------|
|
|
501
653
|
| **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
|
|
502
654
|
| **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
|
|
655
|
+
| **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
|
|
656
|
+
| **Auto** | All platforms | Best available backend | Automatic detection |
|
|
503
657
|
|
|
504
658
|
#### Python API
|
|
505
659
|
|
|
506
660
|
```python
|
|
507
|
-
from greenmining.energy import RAPLEnergyMeter,
|
|
508
|
-
|
|
509
|
-
#
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
#
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
print(f"Energy: {result.energy_joules:.2f} J")
|
|
524
|
-
print(f"Carbon: {result.carbon_grams:.4f} gCO2")
|
|
661
|
+
from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
|
|
662
|
+
|
|
663
|
+
# Auto-detect best backend
|
|
664
|
+
meter = get_energy_meter("auto")
|
|
665
|
+
meter.start()
|
|
666
|
+
# ... run analysis ...
|
|
667
|
+
result = meter.stop()
|
|
668
|
+
print(f"Energy: {result.joules:.2f} J")
|
|
669
|
+
print(f"Power: {result.watts_avg:.2f} W")
|
|
670
|
+
|
|
671
|
+
# Integrated energy tracking during analysis
|
|
672
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
673
|
+
|
|
674
|
+
analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
|
|
675
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
676
|
+
print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
|
|
525
677
|
```
|
|
526
678
|
|
|
527
|
-
####
|
|
679
|
+
#### Carbon Footprint Reporting
|
|
528
680
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
681
|
+
```python
|
|
682
|
+
from greenmining.energy import CarbonReporter
|
|
683
|
+
|
|
684
|
+
reporter = CarbonReporter(
|
|
685
|
+
country_iso="USA",
|
|
686
|
+
cloud_provider="aws",
|
|
687
|
+
region="us-east-1",
|
|
688
|
+
)
|
|
689
|
+
report = reporter.generate_report(total_joules=3600.0)
|
|
690
|
+
print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
|
|
691
|
+
print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
|
|
692
|
+
```
|
|
535
693
|
|
|
536
694
|
### Pattern Database
|
|
537
695
|
|
|
@@ -634,8 +792,14 @@ ruff check greenmining/ tests/
|
|
|
634
792
|
- PyGithub >= 2.1.1
|
|
635
793
|
- PyDriller >= 2.5
|
|
636
794
|
- pandas >= 2.2.0
|
|
637
|
-
|
|
638
|
-
|
|
795
|
+
|
|
796
|
+
**Optional dependencies:**
|
|
797
|
+
|
|
798
|
+
```bash
|
|
799
|
+
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
800
|
+
pip install greenmining[dashboard] # flask (web dashboard)
|
|
801
|
+
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
802
|
+
```
|
|
639
803
|
|
|
640
804
|
## License
|
|
641
805
|
|