greenmining 1.0.4__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {greenmining-1.0.4 → greenmining-1.0.6}/CHANGELOG.md +0 -1
- {greenmining-1.0.4/greenmining.egg-info → greenmining-1.0.6}/PKG-INFO +65 -54
- {greenmining-1.0.4 → greenmining-1.0.6}/README.md +56 -50
- greenmining-1.0.6/greenmining/__init__.py +87 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/__version__.py +1 -1
- greenmining-1.0.6/greenmining/analyzers/__init__.py +22 -0
- greenmining-1.0.6/greenmining/analyzers/metrics_power_correlator.py +165 -0
- greenmining-1.0.6/greenmining/analyzers/power_regression.py +212 -0
- greenmining-1.0.6/greenmining/analyzers/version_power_analyzer.py +246 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/config.py +46 -34
- greenmining-1.0.6/greenmining/dashboard/__init__.py +5 -0
- greenmining-1.0.6/greenmining/dashboard/app.py +200 -0
- greenmining-1.0.6/greenmining/energy/__init__.py +20 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/energy/base.py +45 -35
- greenmining-1.0.6/greenmining/energy/carbon_reporter.py +242 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/energy/codecarbon_meter.py +25 -24
- greenmining-1.0.6/greenmining/energy/cpu_meter.py +144 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/energy/rapl.py +30 -36
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/__init__.py +13 -3
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/commit_extractor.py +9 -5
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/github_fetcher.py +16 -18
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/github_graphql_fetcher.py +45 -55
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/local_repo_analyzer.py +325 -63
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/reports.py +5 -8
- {greenmining-1.0.4 → greenmining-1.0.6/greenmining.egg-info}/PKG-INFO +65 -54
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/SOURCES.txt +14 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/requires.txt +8 -1
- {greenmining-1.0.4 → greenmining-1.0.6}/pyproject.toml +19 -7
- {greenmining-1.0.4 → greenmining-1.0.6}/setup.py +1 -1
- greenmining-1.0.4/greenmining/__init__.py +0 -43
- greenmining-1.0.4/greenmining/analyzers/__init__.py +0 -13
- greenmining-1.0.4/greenmining/energy/__init__.py +0 -13
- {greenmining-1.0.4 → greenmining-1.0.6}/LICENSE +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/MANIFEST.in +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/__main__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/code_diff_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/qualitative_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/statistical_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/temporal_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/controllers/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/controllers/repository_controller.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/gsf_patterns.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/aggregated_stats.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/analysis_result.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/commit.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/repository.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/presenters/__init__.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/presenters/console_presenter.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/data_aggregator.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/data_analyzer.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/utils.py +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/dependency_links.txt +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/top_level.txt +0 -0
- {greenmining-1.0.4 → greenmining-1.0.6}/setup.cfg +0 -0
|
@@ -84,7 +84,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
84
84
|
- Pattern matching engine
|
|
85
85
|
- Green awareness detection
|
|
86
86
|
- Data analysis and reporting
|
|
87
|
-
- CLI interface with Click
|
|
88
87
|
- Docker support with multi-stage builds
|
|
89
88
|
- GitHub Actions CI/CD pipeline
|
|
90
89
|
- PyPI publishing workflow
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 1.0.6
|
|
4
|
+
Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/adam-bouafia/greenmining
|
|
@@ -9,7 +9,7 @@ Project-URL: Documentation, https://github.com/adam-bouafia/greenmining#readme
|
|
|
9
9
|
Project-URL: Repository, https://github.com/adam-bouafia/greenmining
|
|
10
10
|
Project-URL: Issues, https://github.com/adam-bouafia/greenmining/issues
|
|
11
11
|
Project-URL: Changelog, https://github.com/adam-bouafia/greenmining/blob/main/CHANGELOG.md
|
|
12
|
-
Keywords: green-software,gsf,sustainability,carbon-footprint,
|
|
12
|
+
Keywords: green-software,gsf,msr,mining-software-repositories,green-it,sustainability,carbon-footprint,energy-efficiency,repository-analysis,github-analysis,pydriller,empirical-software-engineering
|
|
13
13
|
Classifier: Development Status :: 3 - Alpha
|
|
14
14
|
Classifier: Intended Audience :: Developers
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -43,8 +43,13 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
|
|
|
43
43
|
Requires-Dist: black>=23.12.0; extra == "dev"
|
|
44
44
|
Requires-Dist: ruff>=0.1.9; extra == "dev"
|
|
45
45
|
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
46
|
-
Requires-Dist: build>=1.0.
|
|
46
|
+
Requires-Dist: build>=1.0.5; extra == "dev"
|
|
47
47
|
Requires-Dist: twine>=4.0.2; extra == "dev"
|
|
48
|
+
Provides-Extra: energy
|
|
49
|
+
Requires-Dist: psutil>=5.9.0; extra == "energy"
|
|
50
|
+
Requires-Dist: codecarbon>=2.3.0; extra == "energy"
|
|
51
|
+
Provides-Extra: dashboard
|
|
52
|
+
Requires-Dist: flask>=3.0.0; extra == "dashboard"
|
|
48
53
|
Provides-Extra: docs
|
|
49
54
|
Requires-Dist: sphinx>=7.2.0; extra == "docs"
|
|
50
55
|
Requires-Dist: sphinx-rtd-theme>=2.0.0; extra == "docs"
|
|
@@ -53,15 +58,30 @@ Dynamic: license-file
|
|
|
53
58
|
|
|
54
59
|
# greenmining
|
|
55
60
|
|
|
56
|
-
|
|
61
|
+
An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
|
|
57
62
|
|
|
58
63
|
[](https://pypi.org/project/greenmining/)
|
|
59
64
|
[](https://pypi.org/project/greenmining/)
|
|
60
65
|
[](LICENSE)
|
|
66
|
+
[](https://greenmining.readthedocs.io/)
|
|
61
67
|
|
|
62
68
|
## Overview
|
|
63
69
|
|
|
64
|
-
`greenmining` is a Python library
|
|
70
|
+
`greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
|
|
71
|
+
|
|
72
|
+
- **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
73
|
+
- **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
|
|
74
|
+
- **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
|
|
75
|
+
- **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
|
|
76
|
+
- **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
|
|
77
|
+
- **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
|
|
78
|
+
- **Power regression detection** - Identify commits that increased energy consumption
|
|
79
|
+
- **Method-level analysis** - Per-method complexity and metrics via Lizard integration
|
|
80
|
+
- **Version power comparison** - Compare power consumption across software versions
|
|
81
|
+
- **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
|
|
82
|
+
- **Web dashboard** - Flask-based interactive visualization of analysis results
|
|
83
|
+
|
|
84
|
+
Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
|
|
65
85
|
|
|
66
86
|
## Installation
|
|
67
87
|
|
|
@@ -105,7 +125,7 @@ if is_green_aware(commit_msg):
|
|
|
105
125
|
# Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
|
|
106
126
|
```
|
|
107
127
|
|
|
108
|
-
#### Fetch Repositories with Custom Keywords
|
|
128
|
+
#### Fetch Repositories with Custom Keywords
|
|
109
129
|
|
|
110
130
|
```python
|
|
111
131
|
from greenmining import fetch_repositories
|
|
@@ -144,8 +164,6 @@ for repo in repos[:5]:
|
|
|
144
164
|
```python
|
|
145
165
|
from greenmining.services.commit_extractor import CommitExtractor
|
|
146
166
|
from greenmining.services.data_analyzer import DataAnalyzer
|
|
147
|
-
from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
|
|
148
|
-
from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
|
|
149
167
|
from greenmining import fetch_repositories
|
|
150
168
|
|
|
151
169
|
# Fetch repositories with custom keywords
|
|
@@ -195,18 +213,6 @@ for commit in commits:
|
|
|
195
213
|
results.append(result)
|
|
196
214
|
print(f"Green commit found: {commit.message[:50]}...")
|
|
197
215
|
print(f" Patterns: {result['known_pattern']}")
|
|
198
|
-
|
|
199
|
-
# Access NLP analysis results (NEW)
|
|
200
|
-
if 'nlp_analysis' in result:
|
|
201
|
-
nlp = result['nlp_analysis']
|
|
202
|
-
print(f" NLP: {nlp['morphological_count']} morphological matches, "
|
|
203
|
-
f"{nlp['semantic_count']} semantic matches")
|
|
204
|
-
|
|
205
|
-
# Access ML features (NEW)
|
|
206
|
-
if 'ml_features' in result:
|
|
207
|
-
ml = result['ml_features']['text']
|
|
208
|
-
print(f" ML Features: {ml['word_count']} words, "
|
|
209
|
-
f"keyword density: {ml['keyword_density']:.2f}")
|
|
210
216
|
```
|
|
211
217
|
|
|
212
218
|
#### Access Sustainability Patterns Data
|
|
@@ -242,7 +248,7 @@ print(f"Available categories: {sorted(categories)}")
|
|
|
242
248
|
# 'monitoring', 'network', 'networking', 'resource', 'web']
|
|
243
249
|
```
|
|
244
250
|
|
|
245
|
-
#### Advanced Analysis: Temporal Trends
|
|
251
|
+
#### Advanced Analysis: Temporal Trends
|
|
246
252
|
|
|
247
253
|
```python
|
|
248
254
|
from greenmining.services.data_aggregator import DataAggregator
|
|
@@ -374,7 +380,7 @@ repositories = fetch_repositories(
|
|
|
374
380
|
min_stars=10,
|
|
375
381
|
keywords="software engineering",
|
|
376
382
|
)
|
|
377
|
-
print(f"
|
|
383
|
+
print(f"Fetched {len(repositories)} repositories")
|
|
378
384
|
|
|
379
385
|
# STAGE 2: Extract Commits
|
|
380
386
|
print("\nExtracting commits...")
|
|
@@ -386,7 +392,7 @@ extractor = CommitExtractor(
|
|
|
386
392
|
timeout=120,
|
|
387
393
|
)
|
|
388
394
|
all_commits = extractor.extract_from_repositories(repositories)
|
|
389
|
-
print(f"
|
|
395
|
+
print(f"Extracted {len(all_commits)} commits")
|
|
390
396
|
|
|
391
397
|
# Save commits
|
|
392
398
|
extractor.save_results(
|
|
@@ -405,8 +411,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
|
|
|
405
411
|
# Count green-aware commits
|
|
406
412
|
green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
|
|
407
413
|
green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
|
|
408
|
-
print(f"
|
|
409
|
-
print(f"
|
|
414
|
+
print(f"Analyzed {len(analyzed_commits)} commits")
|
|
415
|
+
print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
|
|
410
416
|
|
|
411
417
|
# Save analysis
|
|
412
418
|
analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
|
|
@@ -434,7 +440,7 @@ print("\n" + "="*80)
|
|
|
434
440
|
print("ANALYSIS COMPLETE")
|
|
435
441
|
print("="*80)
|
|
436
442
|
aggregator.print_summary(results)
|
|
437
|
-
print(f"\
|
|
443
|
+
print(f"\nResults saved in: {output_dir.absolute()}")
|
|
438
444
|
```
|
|
439
445
|
|
|
440
446
|
**What this example does:**
|
|
@@ -567,38 +573,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
|
|
|
567
573
|
|---------|----------|---------|--------------|
|
|
568
574
|
| **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
|
|
569
575
|
| **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
|
|
576
|
+
| **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
|
|
577
|
+
| **Auto** | All platforms | Best available backend | Automatic detection |
|
|
570
578
|
|
|
571
579
|
#### Python API
|
|
572
580
|
|
|
573
581
|
```python
|
|
574
|
-
from greenmining.energy import RAPLEnergyMeter,
|
|
575
|
-
|
|
576
|
-
#
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
#
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
print(f"Energy: {result.energy_joules:.2f} J")
|
|
591
|
-
print(f"Carbon: {result.carbon_grams:.4f} gCO2")
|
|
582
|
+
from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
|
|
583
|
+
|
|
584
|
+
# Auto-detect best backend
|
|
585
|
+
meter = get_energy_meter("auto")
|
|
586
|
+
meter.start()
|
|
587
|
+
# ... run analysis ...
|
|
588
|
+
result = meter.stop()
|
|
589
|
+
print(f"Energy: {result.joules:.2f} J")
|
|
590
|
+
print(f"Power: {result.watts_avg:.2f} W")
|
|
591
|
+
|
|
592
|
+
# Integrated energy tracking during analysis
|
|
593
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
594
|
+
|
|
595
|
+
analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
|
|
596
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
597
|
+
print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
|
|
592
598
|
```
|
|
593
599
|
|
|
594
|
-
####
|
|
600
|
+
#### Carbon Footprint Reporting
|
|
601
|
+
|
|
602
|
+
```python
|
|
603
|
+
from greenmining.energy import CarbonReporter
|
|
595
604
|
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
-
|
|
600
|
-
|
|
601
|
-
|
|
605
|
+
reporter = CarbonReporter(
|
|
606
|
+
country_iso="USA",
|
|
607
|
+
cloud_provider="aws",
|
|
608
|
+
region="us-east-1",
|
|
609
|
+
)
|
|
610
|
+
report = reporter.generate_report(total_joules=3600.0)
|
|
611
|
+
print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
|
|
612
|
+
print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
|
|
613
|
+
```
|
|
602
614
|
|
|
603
615
|
### Pattern Database
|
|
604
616
|
|
|
@@ -700,8 +712,7 @@ ruff check greenmining/ tests/
|
|
|
700
712
|
- Python 3.9+
|
|
701
713
|
- PyGithub >= 2.1.1
|
|
702
714
|
- PyDriller >= 2.5
|
|
703
|
-
- pandas >= 2.2.0
|
|
704
|
-
- click >= 8.1.7
|
|
715
|
+
- pandas >= 2.2.0
|
|
705
716
|
- codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
|
|
706
717
|
|
|
707
718
|
## License
|
|
@@ -1,14 +1,29 @@
|
|
|
1
1
|
# greenmining
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
|
|
4
4
|
|
|
5
5
|
[](https://pypi.org/project/greenmining/)
|
|
6
6
|
[](https://pypi.org/project/greenmining/)
|
|
7
7
|
[](LICENSE)
|
|
8
|
+
[](https://greenmining.readthedocs.io/)
|
|
8
9
|
|
|
9
10
|
## Overview
|
|
10
11
|
|
|
11
|
-
`greenmining` is a Python library
|
|
12
|
+
`greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
|
|
13
|
+
|
|
14
|
+
- **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
|
|
15
|
+
- **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
|
|
16
|
+
- **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
|
|
17
|
+
- **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
|
|
18
|
+
- **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
|
|
19
|
+
- **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
|
|
20
|
+
- **Power regression detection** - Identify commits that increased energy consumption
|
|
21
|
+
- **Method-level analysis** - Per-method complexity and metrics via Lizard integration
|
|
22
|
+
- **Version power comparison** - Compare power consumption across software versions
|
|
23
|
+
- **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
|
|
24
|
+
- **Web dashboard** - Flask-based interactive visualization of analysis results
|
|
25
|
+
|
|
26
|
+
Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
|
|
12
27
|
|
|
13
28
|
## Installation
|
|
14
29
|
|
|
@@ -52,7 +67,7 @@ if is_green_aware(commit_msg):
|
|
|
52
67
|
# Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
|
|
53
68
|
```
|
|
54
69
|
|
|
55
|
-
#### Fetch Repositories with Custom Keywords
|
|
70
|
+
#### Fetch Repositories with Custom Keywords
|
|
56
71
|
|
|
57
72
|
```python
|
|
58
73
|
from greenmining import fetch_repositories
|
|
@@ -91,8 +106,6 @@ for repo in repos[:5]:
|
|
|
91
106
|
```python
|
|
92
107
|
from greenmining.services.commit_extractor import CommitExtractor
|
|
93
108
|
from greenmining.services.data_analyzer import DataAnalyzer
|
|
94
|
-
from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
|
|
95
|
-
from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
|
|
96
109
|
from greenmining import fetch_repositories
|
|
97
110
|
|
|
98
111
|
# Fetch repositories with custom keywords
|
|
@@ -142,18 +155,6 @@ for commit in commits:
|
|
|
142
155
|
results.append(result)
|
|
143
156
|
print(f"Green commit found: {commit.message[:50]}...")
|
|
144
157
|
print(f" Patterns: {result['known_pattern']}")
|
|
145
|
-
|
|
146
|
-
# Access NLP analysis results (NEW)
|
|
147
|
-
if 'nlp_analysis' in result:
|
|
148
|
-
nlp = result['nlp_analysis']
|
|
149
|
-
print(f" NLP: {nlp['morphological_count']} morphological matches, "
|
|
150
|
-
f"{nlp['semantic_count']} semantic matches")
|
|
151
|
-
|
|
152
|
-
# Access ML features (NEW)
|
|
153
|
-
if 'ml_features' in result:
|
|
154
|
-
ml = result['ml_features']['text']
|
|
155
|
-
print(f" ML Features: {ml['word_count']} words, "
|
|
156
|
-
f"keyword density: {ml['keyword_density']:.2f}")
|
|
157
158
|
```
|
|
158
159
|
|
|
159
160
|
#### Access Sustainability Patterns Data
|
|
@@ -189,7 +190,7 @@ print(f"Available categories: {sorted(categories)}")
|
|
|
189
190
|
# 'monitoring', 'network', 'networking', 'resource', 'web']
|
|
190
191
|
```
|
|
191
192
|
|
|
192
|
-
#### Advanced Analysis: Temporal Trends
|
|
193
|
+
#### Advanced Analysis: Temporal Trends
|
|
193
194
|
|
|
194
195
|
```python
|
|
195
196
|
from greenmining.services.data_aggregator import DataAggregator
|
|
@@ -321,7 +322,7 @@ repositories = fetch_repositories(
|
|
|
321
322
|
min_stars=10,
|
|
322
323
|
keywords="software engineering",
|
|
323
324
|
)
|
|
324
|
-
print(f"
|
|
325
|
+
print(f"Fetched {len(repositories)} repositories")
|
|
325
326
|
|
|
326
327
|
# STAGE 2: Extract Commits
|
|
327
328
|
print("\nExtracting commits...")
|
|
@@ -333,7 +334,7 @@ extractor = CommitExtractor(
|
|
|
333
334
|
timeout=120,
|
|
334
335
|
)
|
|
335
336
|
all_commits = extractor.extract_from_repositories(repositories)
|
|
336
|
-
print(f"
|
|
337
|
+
print(f"Extracted {len(all_commits)} commits")
|
|
337
338
|
|
|
338
339
|
# Save commits
|
|
339
340
|
extractor.save_results(
|
|
@@ -352,8 +353,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
|
|
|
352
353
|
# Count green-aware commits
|
|
353
354
|
green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
|
|
354
355
|
green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
|
|
355
|
-
print(f"
|
|
356
|
-
print(f"
|
|
356
|
+
print(f"Analyzed {len(analyzed_commits)} commits")
|
|
357
|
+
print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
|
|
357
358
|
|
|
358
359
|
# Save analysis
|
|
359
360
|
analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
|
|
@@ -381,7 +382,7 @@ print("\n" + "="*80)
|
|
|
381
382
|
print("ANALYSIS COMPLETE")
|
|
382
383
|
print("="*80)
|
|
383
384
|
aggregator.print_summary(results)
|
|
384
|
-
print(f"\
|
|
385
|
+
print(f"\nResults saved in: {output_dir.absolute()}")
|
|
385
386
|
```
|
|
386
387
|
|
|
387
388
|
**What this example does:**
|
|
@@ -514,38 +515,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
|
|
|
514
515
|
|---------|----------|---------|--------------|
|
|
515
516
|
| **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
|
|
516
517
|
| **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
|
|
518
|
+
| **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
|
|
519
|
+
| **Auto** | All platforms | Best available backend | Automatic detection |
|
|
517
520
|
|
|
518
521
|
#### Python API
|
|
519
522
|
|
|
520
523
|
```python
|
|
521
|
-
from greenmining.energy import RAPLEnergyMeter,
|
|
522
|
-
|
|
523
|
-
#
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
#
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
print(f"Energy: {result.energy_joules:.2f} J")
|
|
538
|
-
print(f"Carbon: {result.carbon_grams:.4f} gCO2")
|
|
524
|
+
from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
|
|
525
|
+
|
|
526
|
+
# Auto-detect best backend
|
|
527
|
+
meter = get_energy_meter("auto")
|
|
528
|
+
meter.start()
|
|
529
|
+
# ... run analysis ...
|
|
530
|
+
result = meter.stop()
|
|
531
|
+
print(f"Energy: {result.joules:.2f} J")
|
|
532
|
+
print(f"Power: {result.watts_avg:.2f} W")
|
|
533
|
+
|
|
534
|
+
# Integrated energy tracking during analysis
|
|
535
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
536
|
+
|
|
537
|
+
analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
|
|
538
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
539
|
+
print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
|
|
539
540
|
```
|
|
540
541
|
|
|
541
|
-
####
|
|
542
|
+
#### Carbon Footprint Reporting
|
|
543
|
+
|
|
544
|
+
```python
|
|
545
|
+
from greenmining.energy import CarbonReporter
|
|
542
546
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
-
|
|
547
|
-
|
|
548
|
-
|
|
547
|
+
reporter = CarbonReporter(
|
|
548
|
+
country_iso="USA",
|
|
549
|
+
cloud_provider="aws",
|
|
550
|
+
region="us-east-1",
|
|
551
|
+
)
|
|
552
|
+
report = reporter.generate_report(total_joules=3600.0)
|
|
553
|
+
print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
|
|
554
|
+
print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
|
|
555
|
+
```
|
|
549
556
|
|
|
550
557
|
### Pattern Database
|
|
551
558
|
|
|
@@ -647,8 +654,7 @@ ruff check greenmining/ tests/
|
|
|
647
654
|
- Python 3.9+
|
|
648
655
|
- PyGithub >= 2.1.1
|
|
649
656
|
- PyDriller >= 2.5
|
|
650
|
-
- pandas >= 2.2.0
|
|
651
|
-
- click >= 8.1.7
|
|
657
|
+
- pandas >= 2.2.0
|
|
652
658
|
- codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
|
|
653
659
|
|
|
654
660
|
## License
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Green Microservices Mining - GSF Pattern Analysis Tool.
|
|
2
|
+
|
|
3
|
+
from greenmining.config import Config
|
|
4
|
+
from greenmining.controllers.repository_controller import RepositoryController
|
|
5
|
+
from greenmining.gsf_patterns import (
|
|
6
|
+
GREEN_KEYWORDS,
|
|
7
|
+
GSF_PATTERNS,
|
|
8
|
+
get_pattern_by_keywords,
|
|
9
|
+
is_green_aware,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__version__ = "1.0.6"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def fetch_repositories(
|
|
16
|
+
github_token: str,
|
|
17
|
+
max_repos: int = None,
|
|
18
|
+
min_stars: int = None,
|
|
19
|
+
languages: list = None,
|
|
20
|
+
keywords: str = None,
|
|
21
|
+
):
|
|
22
|
+
# Fetch repositories from GitHub with custom search keywords.
|
|
23
|
+
config = Config()
|
|
24
|
+
config.GITHUB_TOKEN = github_token
|
|
25
|
+
controller = RepositoryController(config)
|
|
26
|
+
|
|
27
|
+
return controller.fetch_repositories(
|
|
28
|
+
max_repos=max_repos,
|
|
29
|
+
min_stars=min_stars,
|
|
30
|
+
languages=languages,
|
|
31
|
+
keywords=keywords,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def analyze_repositories(
|
|
36
|
+
urls: list,
|
|
37
|
+
max_commits: int = 500,
|
|
38
|
+
parallel_workers: int = 1,
|
|
39
|
+
output_format: str = "dict",
|
|
40
|
+
energy_tracking: bool = False,
|
|
41
|
+
energy_backend: str = "rapl",
|
|
42
|
+
method_level_analysis: bool = False,
|
|
43
|
+
include_source_code: bool = False,
|
|
44
|
+
ssh_key_path: str = None,
|
|
45
|
+
github_token: str = None,
|
|
46
|
+
):
|
|
47
|
+
# Analyze multiple repositories from URLs.
|
|
48
|
+
# Args:
|
|
49
|
+
# urls: List of GitHub repository URLs
|
|
50
|
+
# max_commits: Maximum commits to analyze per repository
|
|
51
|
+
# parallel_workers: Number of parallel analysis workers (1=sequential)
|
|
52
|
+
# output_format: Output format (dict, json, csv)
|
|
53
|
+
# energy_tracking: Enable automatic energy measurement during analysis
|
|
54
|
+
# energy_backend: Energy backend (rapl, codecarbon, cpu_meter, auto)
|
|
55
|
+
# method_level_analysis: Include per-method metrics via Lizard
|
|
56
|
+
# include_source_code: Include source code before/after in results
|
|
57
|
+
# ssh_key_path: SSH key path for private repositories
|
|
58
|
+
# github_token: GitHub token for private HTTPS repositories
|
|
59
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
60
|
+
|
|
61
|
+
analyzer = LocalRepoAnalyzer(
|
|
62
|
+
max_commits=max_commits,
|
|
63
|
+
energy_tracking=energy_tracking,
|
|
64
|
+
energy_backend=energy_backend,
|
|
65
|
+
method_level_analysis=method_level_analysis,
|
|
66
|
+
include_source_code=include_source_code,
|
|
67
|
+
ssh_key_path=ssh_key_path,
|
|
68
|
+
github_token=github_token,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return analyzer.analyze_repositories(
|
|
72
|
+
urls=urls,
|
|
73
|
+
parallel_workers=parallel_workers,
|
|
74
|
+
output_format=output_format,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
__all__ = [
|
|
79
|
+
"Config",
|
|
80
|
+
"GSF_PATTERNS",
|
|
81
|
+
"GREEN_KEYWORDS",
|
|
82
|
+
"is_green_aware",
|
|
83
|
+
"get_pattern_by_keywords",
|
|
84
|
+
"fetch_repositories",
|
|
85
|
+
"analyze_repositories",
|
|
86
|
+
"__version__",
|
|
87
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Analyzers for GreenMining framework.
|
|
2
|
+
|
|
3
|
+
from .code_diff_analyzer import CodeDiffAnalyzer
|
|
4
|
+
from .statistical_analyzer import StatisticalAnalyzer
|
|
5
|
+
from .temporal_analyzer import TemporalAnalyzer
|
|
6
|
+
from .qualitative_analyzer import QualitativeAnalyzer
|
|
7
|
+
from .power_regression import PowerRegressionDetector, PowerRegression
|
|
8
|
+
from .metrics_power_correlator import MetricsPowerCorrelator, CorrelationResult
|
|
9
|
+
from .version_power_analyzer import VersionPowerAnalyzer, VersionPowerReport
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"CodeDiffAnalyzer",
|
|
13
|
+
"StatisticalAnalyzer",
|
|
14
|
+
"TemporalAnalyzer",
|
|
15
|
+
"QualitativeAnalyzer",
|
|
16
|
+
"PowerRegressionDetector",
|
|
17
|
+
"PowerRegression",
|
|
18
|
+
"MetricsPowerCorrelator",
|
|
19
|
+
"CorrelationResult",
|
|
20
|
+
"VersionPowerAnalyzer",
|
|
21
|
+
"VersionPowerReport",
|
|
22
|
+
]
|