greenmining 1.0.6__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {greenmining-1.0.6/greenmining.egg-info → greenmining-1.0.7}/PKG-INFO +158 -14
- {greenmining-1.0.6 → greenmining-1.0.7}/README.md +157 -13
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/__init__.py +9 -1
- {greenmining-1.0.6 → greenmining-1.0.7/greenmining.egg-info}/PKG-INFO +158 -14
- {greenmining-1.0.6 → greenmining-1.0.7}/pyproject.toml +1 -1
- {greenmining-1.0.6 → greenmining-1.0.7}/CHANGELOG.md +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/LICENSE +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/MANIFEST.in +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/__main__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/__version__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/__init__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/code_diff_analyzer.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/metrics_power_correlator.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/power_regression.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/qualitative_analyzer.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/statistical_analyzer.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/temporal_analyzer.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/version_power_analyzer.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/config.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/controllers/__init__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/controllers/repository_controller.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/dashboard/__init__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/dashboard/app.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/__init__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/base.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/carbon_reporter.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/codecarbon_meter.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/cpu_meter.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/rapl.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/gsf_patterns.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/__init__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/aggregated_stats.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/analysis_result.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/commit.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/repository.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/presenters/__init__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/presenters/console_presenter.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/__init__.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/commit_extractor.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/data_aggregator.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/data_analyzer.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/github_fetcher.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/github_graphql_fetcher.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/local_repo_analyzer.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/reports.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/utils.py +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/SOURCES.txt +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/dependency_links.txt +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/requires.txt +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/top_level.txt +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/setup.cfg +0 -0
- {greenmining-1.0.6 → greenmining-1.0.7}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.7
|
|
4
4
|
Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -330,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
|
|
|
330
330
|
aggregator.export_to_csv(results, "output.csv")
|
|
331
331
|
```
|
|
332
332
|
|
|
333
|
-
####
|
|
333
|
+
#### URL-Based Repository Analysis
|
|
334
|
+
|
|
335
|
+
```python
|
|
336
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
337
|
+
|
|
338
|
+
analyzer = LocalRepoAnalyzer(
|
|
339
|
+
max_commits=200,
|
|
340
|
+
cleanup_after=True,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
344
|
+
|
|
345
|
+
print(f"Repository: {result.name}")
|
|
346
|
+
print(f"Commits analyzed: {result.total_commits}")
|
|
347
|
+
print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
|
|
348
|
+
|
|
349
|
+
for commit in result.commits[:5]:
|
|
350
|
+
if commit.green_aware:
|
|
351
|
+
print(f" {commit.message[:60]}...")
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
#### Batch Analysis with Parallelism
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
from greenmining import analyze_repositories
|
|
358
|
+
|
|
359
|
+
results = analyze_repositories(
|
|
360
|
+
urls=[
|
|
361
|
+
"https://github.com/kubernetes/kubernetes",
|
|
362
|
+
"https://github.com/istio/istio",
|
|
363
|
+
"https://github.com/envoyproxy/envoy",
|
|
364
|
+
],
|
|
365
|
+
max_commits=100,
|
|
366
|
+
parallel_workers=3,
|
|
367
|
+
energy_tracking=True,
|
|
368
|
+
energy_backend="auto",
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
for result in results:
|
|
372
|
+
print(f"{result.name}: {result.green_commit_rate:.1%} green")
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
#### Private Repository Analysis
|
|
376
|
+
|
|
377
|
+
```python
|
|
378
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
379
|
+
|
|
380
|
+
# HTTPS with token
|
|
381
|
+
analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
|
|
382
|
+
result = analyzer.analyze_repository("https://github.com/company/private-repo")
|
|
383
|
+
|
|
384
|
+
# SSH with key
|
|
385
|
+
analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
|
|
386
|
+
result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
#### Power Regression Detection
|
|
390
|
+
|
|
391
|
+
```python
|
|
392
|
+
from greenmining.analyzers import PowerRegressionDetector
|
|
393
|
+
|
|
394
|
+
detector = PowerRegressionDetector(
|
|
395
|
+
test_command="pytest tests/ -x",
|
|
396
|
+
energy_backend="rapl",
|
|
397
|
+
threshold_percent=5.0,
|
|
398
|
+
iterations=5,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
regressions = detector.detect(
|
|
402
|
+
repo_path="/path/to/repo",
|
|
403
|
+
baseline_commit="v1.0.0",
|
|
404
|
+
target_commit="HEAD",
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
for regression in regressions:
|
|
408
|
+
print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
#### Version Power Comparison
|
|
412
|
+
|
|
413
|
+
```python
|
|
414
|
+
from greenmining.analyzers import VersionPowerAnalyzer
|
|
415
|
+
|
|
416
|
+
analyzer = VersionPowerAnalyzer(
|
|
417
|
+
test_command="pytest tests/",
|
|
418
|
+
energy_backend="rapl",
|
|
419
|
+
iterations=10,
|
|
420
|
+
warmup_iterations=2,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
report = analyzer.analyze_versions(
|
|
424
|
+
repo_path="/path/to/repo",
|
|
425
|
+
versions=["v1.0", "v1.1", "v1.2", "v2.0"],
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
print(report.summary())
|
|
429
|
+
print(f"Trend: {report.trend}")
|
|
430
|
+
print(f"Most efficient: {report.most_efficient}")
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
#### Metrics-to-Power Correlation
|
|
434
|
+
|
|
435
|
+
```python
|
|
436
|
+
from greenmining.analyzers import MetricsPowerCorrelator
|
|
437
|
+
|
|
438
|
+
correlator = MetricsPowerCorrelator()
|
|
439
|
+
correlator.fit(
|
|
440
|
+
metrics=["complexity", "nloc", "code_churn"],
|
|
441
|
+
metrics_values={
|
|
442
|
+
"complexity": [10, 20, 30, 40],
|
|
443
|
+
"nloc": [100, 200, 300, 400],
|
|
444
|
+
"code_churn": [50, 100, 150, 200],
|
|
445
|
+
},
|
|
446
|
+
power_measurements=[5.0, 8.0, 12.0, 15.0],
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
print(f"Pearson: {correlator.pearson}")
|
|
450
|
+
print(f"Spearman: {correlator.spearman}")
|
|
451
|
+
print(f"Feature importance: {correlator.feature_importance}")
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
#### Web Dashboard
|
|
455
|
+
|
|
456
|
+
```python
|
|
457
|
+
from greenmining.dashboard import run_dashboard
|
|
458
|
+
|
|
459
|
+
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
460
|
+
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
#### Pipeline Batch Analysis
|
|
334
464
|
|
|
335
465
|
```python
|
|
336
466
|
from greenmining.controllers.repository_controller import RepositoryController
|
|
@@ -551,17 +681,24 @@ config = Config(
|
|
|
551
681
|
|
|
552
682
|
### Core Capabilities
|
|
553
683
|
|
|
554
|
-
- **Pattern Detection**:
|
|
555
|
-
- **Keyword Analysis**:
|
|
556
|
-
- **
|
|
557
|
-
- **
|
|
558
|
-
- **Batch Processing**:
|
|
559
|
-
- **
|
|
560
|
-
- **
|
|
684
|
+
- **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
|
|
685
|
+
- **Keyword Analysis**: 321 green software detection keywords
|
|
686
|
+
- **Repository Fetching**: GraphQL API with date, star, and language filters
|
|
687
|
+
- **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
|
|
688
|
+
- **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
|
|
689
|
+
- **Private Repository Support**: Authentication via SSH keys or GitHub tokens
|
|
690
|
+
- **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
|
|
691
|
+
- **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
|
|
692
|
+
- **Power Regression Detection**: Identify commits that increased energy consumption
|
|
693
|
+
- **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
|
|
694
|
+
- **Version Power Comparison**: Compare power consumption across software versions with trend detection
|
|
695
|
+
- **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
|
|
696
|
+
- **Source Code Access**: Before/after source code for refactoring detection
|
|
697
|
+
- **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
698
|
+
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
699
|
+
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
700
|
+
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
561
701
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
562
|
-
- **Programmatic API**: Full Python API for custom workflows and integrations
|
|
563
|
-
- **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
|
|
564
|
-
- **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
|
|
565
702
|
|
|
566
703
|
### Energy Measurement
|
|
567
704
|
|
|
@@ -712,8 +849,15 @@ ruff check greenmining/ tests/
|
|
|
712
849
|
- Python 3.9+
|
|
713
850
|
- PyGithub >= 2.1.1
|
|
714
851
|
- PyDriller >= 2.5
|
|
715
|
-
- pandas >= 2.2.0
|
|
716
|
-
|
|
852
|
+
- pandas >= 2.2.0
|
|
853
|
+
|
|
854
|
+
**Optional dependencies:**
|
|
855
|
+
|
|
856
|
+
```bash
|
|
857
|
+
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
858
|
+
pip install greenmining[dashboard] # flask (web dashboard)
|
|
859
|
+
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
860
|
+
```
|
|
717
861
|
|
|
718
862
|
## License
|
|
719
863
|
|
|
@@ -272,7 +272,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
|
|
|
272
272
|
aggregator.export_to_csv(results, "output.csv")
|
|
273
273
|
```
|
|
274
274
|
|
|
275
|
-
####
|
|
275
|
+
#### URL-Based Repository Analysis
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
279
|
+
|
|
280
|
+
analyzer = LocalRepoAnalyzer(
|
|
281
|
+
max_commits=200,
|
|
282
|
+
cleanup_after=True,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
286
|
+
|
|
287
|
+
print(f"Repository: {result.name}")
|
|
288
|
+
print(f"Commits analyzed: {result.total_commits}")
|
|
289
|
+
print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
|
|
290
|
+
|
|
291
|
+
for commit in result.commits[:5]:
|
|
292
|
+
if commit.green_aware:
|
|
293
|
+
print(f" {commit.message[:60]}...")
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
#### Batch Analysis with Parallelism
|
|
297
|
+
|
|
298
|
+
```python
|
|
299
|
+
from greenmining import analyze_repositories
|
|
300
|
+
|
|
301
|
+
results = analyze_repositories(
|
|
302
|
+
urls=[
|
|
303
|
+
"https://github.com/kubernetes/kubernetes",
|
|
304
|
+
"https://github.com/istio/istio",
|
|
305
|
+
"https://github.com/envoyproxy/envoy",
|
|
306
|
+
],
|
|
307
|
+
max_commits=100,
|
|
308
|
+
parallel_workers=3,
|
|
309
|
+
energy_tracking=True,
|
|
310
|
+
energy_backend="auto",
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
for result in results:
|
|
314
|
+
print(f"{result.name}: {result.green_commit_rate:.1%} green")
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
#### Private Repository Analysis
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
321
|
+
|
|
322
|
+
# HTTPS with token
|
|
323
|
+
analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
|
|
324
|
+
result = analyzer.analyze_repository("https://github.com/company/private-repo")
|
|
325
|
+
|
|
326
|
+
# SSH with key
|
|
327
|
+
analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
|
|
328
|
+
result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
#### Power Regression Detection
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
from greenmining.analyzers import PowerRegressionDetector
|
|
335
|
+
|
|
336
|
+
detector = PowerRegressionDetector(
|
|
337
|
+
test_command="pytest tests/ -x",
|
|
338
|
+
energy_backend="rapl",
|
|
339
|
+
threshold_percent=5.0,
|
|
340
|
+
iterations=5,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
regressions = detector.detect(
|
|
344
|
+
repo_path="/path/to/repo",
|
|
345
|
+
baseline_commit="v1.0.0",
|
|
346
|
+
target_commit="HEAD",
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
for regression in regressions:
|
|
350
|
+
print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
#### Version Power Comparison
|
|
354
|
+
|
|
355
|
+
```python
|
|
356
|
+
from greenmining.analyzers import VersionPowerAnalyzer
|
|
357
|
+
|
|
358
|
+
analyzer = VersionPowerAnalyzer(
|
|
359
|
+
test_command="pytest tests/",
|
|
360
|
+
energy_backend="rapl",
|
|
361
|
+
iterations=10,
|
|
362
|
+
warmup_iterations=2,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
report = analyzer.analyze_versions(
|
|
366
|
+
repo_path="/path/to/repo",
|
|
367
|
+
versions=["v1.0", "v1.1", "v1.2", "v2.0"],
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
print(report.summary())
|
|
371
|
+
print(f"Trend: {report.trend}")
|
|
372
|
+
print(f"Most efficient: {report.most_efficient}")
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
#### Metrics-to-Power Correlation
|
|
376
|
+
|
|
377
|
+
```python
|
|
378
|
+
from greenmining.analyzers import MetricsPowerCorrelator
|
|
379
|
+
|
|
380
|
+
correlator = MetricsPowerCorrelator()
|
|
381
|
+
correlator.fit(
|
|
382
|
+
metrics=["complexity", "nloc", "code_churn"],
|
|
383
|
+
metrics_values={
|
|
384
|
+
"complexity": [10, 20, 30, 40],
|
|
385
|
+
"nloc": [100, 200, 300, 400],
|
|
386
|
+
"code_churn": [50, 100, 150, 200],
|
|
387
|
+
},
|
|
388
|
+
power_measurements=[5.0, 8.0, 12.0, 15.0],
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
print(f"Pearson: {correlator.pearson}")
|
|
392
|
+
print(f"Spearman: {correlator.spearman}")
|
|
393
|
+
print(f"Feature importance: {correlator.feature_importance}")
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
#### Web Dashboard
|
|
397
|
+
|
|
398
|
+
```python
|
|
399
|
+
from greenmining.dashboard import run_dashboard
|
|
400
|
+
|
|
401
|
+
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
402
|
+
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
#### Pipeline Batch Analysis
|
|
276
406
|
|
|
277
407
|
```python
|
|
278
408
|
from greenmining.controllers.repository_controller import RepositoryController
|
|
@@ -493,17 +623,24 @@ config = Config(
|
|
|
493
623
|
|
|
494
624
|
### Core Capabilities
|
|
495
625
|
|
|
496
|
-
- **Pattern Detection**:
|
|
497
|
-
- **Keyword Analysis**:
|
|
498
|
-
- **
|
|
499
|
-
- **
|
|
500
|
-
- **Batch Processing**:
|
|
501
|
-
- **
|
|
502
|
-
- **
|
|
626
|
+
- **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
|
|
627
|
+
- **Keyword Analysis**: 321 green software detection keywords
|
|
628
|
+
- **Repository Fetching**: GraphQL API with date, star, and language filters
|
|
629
|
+
- **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
|
|
630
|
+
- **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
|
|
631
|
+
- **Private Repository Support**: Authentication via SSH keys or GitHub tokens
|
|
632
|
+
- **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
|
|
633
|
+
- **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
|
|
634
|
+
- **Power Regression Detection**: Identify commits that increased energy consumption
|
|
635
|
+
- **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
|
|
636
|
+
- **Version Power Comparison**: Compare power consumption across software versions with trend detection
|
|
637
|
+
- **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
|
|
638
|
+
- **Source Code Access**: Before/after source code for refactoring detection
|
|
639
|
+
- **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
640
|
+
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
641
|
+
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
642
|
+
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
503
643
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
504
|
-
- **Programmatic API**: Full Python API for custom workflows and integrations
|
|
505
|
-
- **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
|
|
506
|
-
- **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
|
|
507
644
|
|
|
508
645
|
### Energy Measurement
|
|
509
646
|
|
|
@@ -654,8 +791,15 @@ ruff check greenmining/ tests/
|
|
|
654
791
|
- Python 3.9+
|
|
655
792
|
- PyGithub >= 2.1.1
|
|
656
793
|
- PyDriller >= 2.5
|
|
657
|
-
- pandas >= 2.2.0
|
|
658
|
-
|
|
794
|
+
- pandas >= 2.2.0
|
|
795
|
+
|
|
796
|
+
**Optional dependencies:**
|
|
797
|
+
|
|
798
|
+
```bash
|
|
799
|
+
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
800
|
+
pip install greenmining[dashboard] # flask (web dashboard)
|
|
801
|
+
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
802
|
+
```
|
|
659
803
|
|
|
660
804
|
## License
|
|
661
805
|
|
|
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
|
|
|
9
9
|
is_green_aware,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
-
__version__ = "1.0.
|
|
12
|
+
__version__ = "1.0.7"
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def fetch_repositories(
|
|
@@ -18,6 +18,10 @@ def fetch_repositories(
|
|
|
18
18
|
min_stars: int = None,
|
|
19
19
|
languages: list = None,
|
|
20
20
|
keywords: str = None,
|
|
21
|
+
created_after: str = None,
|
|
22
|
+
created_before: str = None,
|
|
23
|
+
pushed_after: str = None,
|
|
24
|
+
pushed_before: str = None,
|
|
21
25
|
):
|
|
22
26
|
# Fetch repositories from GitHub with custom search keywords.
|
|
23
27
|
config = Config()
|
|
@@ -29,6 +33,10 @@ def fetch_repositories(
|
|
|
29
33
|
min_stars=min_stars,
|
|
30
34
|
languages=languages,
|
|
31
35
|
keywords=keywords,
|
|
36
|
+
created_after=created_after,
|
|
37
|
+
created_before=created_before,
|
|
38
|
+
pushed_after=pushed_after,
|
|
39
|
+
pushed_before=pushed_before,
|
|
32
40
|
)
|
|
33
41
|
|
|
34
42
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.7
|
|
4
4
|
Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -330,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
|
|
|
330
330
|
aggregator.export_to_csv(results, "output.csv")
|
|
331
331
|
```
|
|
332
332
|
|
|
333
|
-
####
|
|
333
|
+
#### URL-Based Repository Analysis
|
|
334
|
+
|
|
335
|
+
```python
|
|
336
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
337
|
+
|
|
338
|
+
analyzer = LocalRepoAnalyzer(
|
|
339
|
+
max_commits=200,
|
|
340
|
+
cleanup_after=True,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
344
|
+
|
|
345
|
+
print(f"Repository: {result.name}")
|
|
346
|
+
print(f"Commits analyzed: {result.total_commits}")
|
|
347
|
+
print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
|
|
348
|
+
|
|
349
|
+
for commit in result.commits[:5]:
|
|
350
|
+
if commit.green_aware:
|
|
351
|
+
print(f" {commit.message[:60]}...")
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
#### Batch Analysis with Parallelism
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
from greenmining import analyze_repositories
|
|
358
|
+
|
|
359
|
+
results = analyze_repositories(
|
|
360
|
+
urls=[
|
|
361
|
+
"https://github.com/kubernetes/kubernetes",
|
|
362
|
+
"https://github.com/istio/istio",
|
|
363
|
+
"https://github.com/envoyproxy/envoy",
|
|
364
|
+
],
|
|
365
|
+
max_commits=100,
|
|
366
|
+
parallel_workers=3,
|
|
367
|
+
energy_tracking=True,
|
|
368
|
+
energy_backend="auto",
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
for result in results:
|
|
372
|
+
print(f"{result.name}: {result.green_commit_rate:.1%} green")
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
#### Private Repository Analysis
|
|
376
|
+
|
|
377
|
+
```python
|
|
378
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
379
|
+
|
|
380
|
+
# HTTPS with token
|
|
381
|
+
analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
|
|
382
|
+
result = analyzer.analyze_repository("https://github.com/company/private-repo")
|
|
383
|
+
|
|
384
|
+
# SSH with key
|
|
385
|
+
analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
|
|
386
|
+
result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
#### Power Regression Detection
|
|
390
|
+
|
|
391
|
+
```python
|
|
392
|
+
from greenmining.analyzers import PowerRegressionDetector
|
|
393
|
+
|
|
394
|
+
detector = PowerRegressionDetector(
|
|
395
|
+
test_command="pytest tests/ -x",
|
|
396
|
+
energy_backend="rapl",
|
|
397
|
+
threshold_percent=5.0,
|
|
398
|
+
iterations=5,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
regressions = detector.detect(
|
|
402
|
+
repo_path="/path/to/repo",
|
|
403
|
+
baseline_commit="v1.0.0",
|
|
404
|
+
target_commit="HEAD",
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
for regression in regressions:
|
|
408
|
+
print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
#### Version Power Comparison
|
|
412
|
+
|
|
413
|
+
```python
|
|
414
|
+
from greenmining.analyzers import VersionPowerAnalyzer
|
|
415
|
+
|
|
416
|
+
analyzer = VersionPowerAnalyzer(
|
|
417
|
+
test_command="pytest tests/",
|
|
418
|
+
energy_backend="rapl",
|
|
419
|
+
iterations=10,
|
|
420
|
+
warmup_iterations=2,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
report = analyzer.analyze_versions(
|
|
424
|
+
repo_path="/path/to/repo",
|
|
425
|
+
versions=["v1.0", "v1.1", "v1.2", "v2.0"],
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
print(report.summary())
|
|
429
|
+
print(f"Trend: {report.trend}")
|
|
430
|
+
print(f"Most efficient: {report.most_efficient}")
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
#### Metrics-to-Power Correlation
|
|
434
|
+
|
|
435
|
+
```python
|
|
436
|
+
from greenmining.analyzers import MetricsPowerCorrelator
|
|
437
|
+
|
|
438
|
+
correlator = MetricsPowerCorrelator()
|
|
439
|
+
correlator.fit(
|
|
440
|
+
metrics=["complexity", "nloc", "code_churn"],
|
|
441
|
+
metrics_values={
|
|
442
|
+
"complexity": [10, 20, 30, 40],
|
|
443
|
+
"nloc": [100, 200, 300, 400],
|
|
444
|
+
"code_churn": [50, 100, 150, 200],
|
|
445
|
+
},
|
|
446
|
+
power_measurements=[5.0, 8.0, 12.0, 15.0],
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
print(f"Pearson: {correlator.pearson}")
|
|
450
|
+
print(f"Spearman: {correlator.spearman}")
|
|
451
|
+
print(f"Feature importance: {correlator.feature_importance}")
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
#### Web Dashboard
|
|
455
|
+
|
|
456
|
+
```python
|
|
457
|
+
from greenmining.dashboard import run_dashboard
|
|
458
|
+
|
|
459
|
+
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
460
|
+
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
#### Pipeline Batch Analysis
|
|
334
464
|
|
|
335
465
|
```python
|
|
336
466
|
from greenmining.controllers.repository_controller import RepositoryController
|
|
@@ -551,17 +681,24 @@ config = Config(
|
|
|
551
681
|
|
|
552
682
|
### Core Capabilities
|
|
553
683
|
|
|
554
|
-
- **Pattern Detection**:
|
|
555
|
-
- **Keyword Analysis**:
|
|
556
|
-
- **
|
|
557
|
-
- **
|
|
558
|
-
- **Batch Processing**:
|
|
559
|
-
- **
|
|
560
|
-
- **
|
|
684
|
+
- **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
|
|
685
|
+
- **Keyword Analysis**: 321 green software detection keywords
|
|
686
|
+
- **Repository Fetching**: GraphQL API with date, star, and language filters
|
|
687
|
+
- **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
|
|
688
|
+
- **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
|
|
689
|
+
- **Private Repository Support**: Authentication via SSH keys or GitHub tokens
|
|
690
|
+
- **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
|
|
691
|
+
- **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
|
|
692
|
+
- **Power Regression Detection**: Identify commits that increased energy consumption
|
|
693
|
+
- **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
|
|
694
|
+
- **Version Power Comparison**: Compare power consumption across software versions with trend detection
|
|
695
|
+
- **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
|
|
696
|
+
- **Source Code Access**: Before/after source code for refactoring detection
|
|
697
|
+
- **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
698
|
+
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
699
|
+
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
700
|
+
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
561
701
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
562
|
-
- **Programmatic API**: Full Python API for custom workflows and integrations
|
|
563
|
-
- **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
|
|
564
|
-
- **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
|
|
565
702
|
|
|
566
703
|
### Energy Measurement
|
|
567
704
|
|
|
@@ -712,8 +849,15 @@ ruff check greenmining/ tests/
|
|
|
712
849
|
- Python 3.9+
|
|
713
850
|
- PyGithub >= 2.1.1
|
|
714
851
|
- PyDriller >= 2.5
|
|
715
|
-
- pandas >= 2.2.0
|
|
716
|
-
|
|
852
|
+
- pandas >= 2.2.0
|
|
853
|
+
|
|
854
|
+
**Optional dependencies:**
|
|
855
|
+
|
|
856
|
+
```bash
|
|
857
|
+
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
858
|
+
pip install greenmining[dashboard] # flask (web dashboard)
|
|
859
|
+
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
860
|
+
```
|
|
717
861
|
|
|
718
862
|
## License
|
|
719
863
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "greenmining"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.7"
|
|
8
8
|
description = "An empirical Python library for Mining Software Repositories (MSR) in Green IT research"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|