greenmining 1.0.5__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {greenmining-1.0.5 → greenmining-1.0.7}/CHANGELOG.md +0 -1
  2. {greenmining-1.0.5/greenmining.egg-info → greenmining-1.0.7}/PKG-INFO +212 -43
  3. {greenmining-1.0.5 → greenmining-1.0.7}/README.md +204 -40
  4. greenmining-1.0.7/greenmining/__init__.py +95 -0
  5. greenmining-1.0.7/greenmining/analyzers/__init__.py +22 -0
  6. greenmining-1.0.7/greenmining/analyzers/metrics_power_correlator.py +165 -0
  7. greenmining-1.0.7/greenmining/analyzers/power_regression.py +212 -0
  8. greenmining-1.0.7/greenmining/analyzers/version_power_analyzer.py +246 -0
  9. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/config.py +46 -34
  10. greenmining-1.0.7/greenmining/dashboard/__init__.py +5 -0
  11. greenmining-1.0.7/greenmining/dashboard/app.py +200 -0
  12. greenmining-1.0.7/greenmining/energy/__init__.py +20 -0
  13. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/energy/base.py +45 -35
  14. greenmining-1.0.7/greenmining/energy/carbon_reporter.py +242 -0
  15. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/energy/codecarbon_meter.py +25 -24
  16. greenmining-1.0.7/greenmining/energy/cpu_meter.py +144 -0
  17. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/energy/rapl.py +30 -36
  18. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/__init__.py +13 -3
  19. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/commit_extractor.py +9 -5
  20. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/local_repo_analyzer.py +325 -63
  21. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/reports.py +5 -8
  22. {greenmining-1.0.5 → greenmining-1.0.7/greenmining.egg-info}/PKG-INFO +212 -43
  23. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/SOURCES.txt +14 -0
  24. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/requires.txt +7 -0
  25. {greenmining-1.0.5 → greenmining-1.0.7}/pyproject.toml +18 -6
  26. greenmining-1.0.5/greenmining/__init__.py +0 -43
  27. greenmining-1.0.5/greenmining/analyzers/__init__.py +0 -13
  28. greenmining-1.0.5/greenmining/energy/__init__.py +0 -13
  29. {greenmining-1.0.5 → greenmining-1.0.7}/LICENSE +0 -0
  30. {greenmining-1.0.5 → greenmining-1.0.7}/MANIFEST.in +0 -0
  31. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/__main__.py +0 -0
  32. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/__version__.py +0 -0
  33. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/code_diff_analyzer.py +0 -0
  34. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/qualitative_analyzer.py +0 -0
  35. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/statistical_analyzer.py +0 -0
  36. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/analyzers/temporal_analyzer.py +0 -0
  37. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/controllers/__init__.py +0 -0
  38. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/controllers/repository_controller.py +0 -0
  39. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/gsf_patterns.py +0 -0
  40. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/__init__.py +0 -0
  41. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/aggregated_stats.py +0 -0
  42. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/analysis_result.py +0 -0
  43. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/commit.py +0 -0
  44. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/models/repository.py +0 -0
  45. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/presenters/__init__.py +0 -0
  46. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/presenters/console_presenter.py +0 -0
  47. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/data_aggregator.py +0 -0
  48. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/data_analyzer.py +0 -0
  49. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/github_fetcher.py +0 -0
  50. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/services/github_graphql_fetcher.py +0 -0
  51. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining/utils.py +0 -0
  52. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/dependency_links.txt +0 -0
  53. {greenmining-1.0.5 → greenmining-1.0.7}/greenmining.egg-info/top_level.txt +0 -0
  54. {greenmining-1.0.5 → greenmining-1.0.7}/setup.cfg +0 -0
  55. {greenmining-1.0.5 → greenmining-1.0.7}/setup.py +0 -0
@@ -84,7 +84,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
84
84
  - Pattern matching engine
85
85
  - Green awareness detection
86
86
  - Data analysis and reporting
87
- - CLI interface with Click
88
87
  - Docker support with multi-stage builds
89
88
  - GitHub Actions CI/CD pipeline
90
89
  - PyPI publishing workflow
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.5
4
- Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
3
+ Version: 1.0.7
4
+ Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/adam-bouafia/greenmining
@@ -9,7 +9,7 @@ Project-URL: Documentation, https://github.com/adam-bouafia/greenmining#readme
9
9
  Project-URL: Repository, https://github.com/adam-bouafia/greenmining
10
10
  Project-URL: Issues, https://github.com/adam-bouafia/greenmining/issues
11
11
  Project-URL: Changelog, https://github.com/adam-bouafia/greenmining/blob/main/CHANGELOG.md
12
- Keywords: green-software,gsf,sustainability,carbon-footprint,microservices,mining,repository-analysis,energy-efficiency,github-analysis
12
+ Keywords: green-software,gsf,msr,mining-software-repositories,green-it,sustainability,carbon-footprint,energy-efficiency,repository-analysis,github-analysis,pydriller,empirical-software-engineering
13
13
  Classifier: Development Status :: 3 - Alpha
14
14
  Classifier: Intended Audience :: Developers
15
15
  Classifier: Intended Audience :: Science/Research
@@ -45,6 +45,11 @@ Requires-Dist: ruff>=0.1.9; extra == "dev"
45
45
  Requires-Dist: mypy>=1.8.0; extra == "dev"
46
46
  Requires-Dist: build>=1.0.5; extra == "dev"
47
47
  Requires-Dist: twine>=4.0.2; extra == "dev"
48
+ Provides-Extra: energy
49
+ Requires-Dist: psutil>=5.9.0; extra == "energy"
50
+ Requires-Dist: codecarbon>=2.3.0; extra == "energy"
51
+ Provides-Extra: dashboard
52
+ Requires-Dist: flask>=3.0.0; extra == "dashboard"
48
53
  Provides-Extra: docs
49
54
  Requires-Dist: sphinx>=7.2.0; extra == "docs"
50
55
  Requires-Dist: sphinx-rtd-theme>=2.0.0; extra == "docs"
@@ -53,15 +58,30 @@ Dynamic: license-file
53
58
 
54
59
  # greenmining
55
60
 
56
- Green mining for microservices repositories.
61
+ An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
57
62
 
58
63
  [![PyPI](https://img.shields.io/pypi/v/greenmining)](https://pypi.org/project/greenmining/)
59
64
  [![Python](https://img.shields.io/pypi/pyversions/greenmining)](https://pypi.org/project/greenmining/)
60
65
  [![License](https://img.shields.io/github/license/adam-bouafia/greenmining)](LICENSE)
66
+ [![Documentation](https://img.shields.io/badge/docs-readthedocs-blue)](https://greenmining.readthedocs.io/)
61
67
 
62
68
  ## Overview
63
69
 
64
- `greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
70
+ `greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
71
+
72
+ - **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
73
+ - **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
74
+ - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
75
+ - **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
76
+ - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
77
+ - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
78
+ - **Power regression detection** - Identify commits that increased energy consumption
79
+ - **Method-level analysis** - Per-method complexity and metrics via Lizard integration
80
+ - **Version power comparison** - Compare power consumption across software versions
81
+ - **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
82
+ - **Web dashboard** - Flask-based interactive visualization of analysis results
83
+
84
+ Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
65
85
 
66
86
  ## Installation
67
87
 
@@ -310,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
310
330
  aggregator.export_to_csv(results, "output.csv")
311
331
  ```
312
332
 
313
- #### Batch Analysis
333
+ #### URL-Based Repository Analysis
334
+
335
+ ```python
336
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
337
+
338
+ analyzer = LocalRepoAnalyzer(
339
+ max_commits=200,
340
+ cleanup_after=True,
341
+ )
342
+
343
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
344
+
345
+ print(f"Repository: {result.name}")
346
+ print(f"Commits analyzed: {result.total_commits}")
347
+ print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
348
+
349
+ for commit in result.commits[:5]:
350
+ if commit.green_aware:
351
+ print(f" {commit.message[:60]}...")
352
+ ```
353
+
354
+ #### Batch Analysis with Parallelism
355
+
356
+ ```python
357
+ from greenmining import analyze_repositories
358
+
359
+ results = analyze_repositories(
360
+ urls=[
361
+ "https://github.com/kubernetes/kubernetes",
362
+ "https://github.com/istio/istio",
363
+ "https://github.com/envoyproxy/envoy",
364
+ ],
365
+ max_commits=100,
366
+ parallel_workers=3,
367
+ energy_tracking=True,
368
+ energy_backend="auto",
369
+ )
370
+
371
+ for result in results:
372
+ print(f"{result.name}: {result.green_commit_rate:.1%} green")
373
+ ```
374
+
375
+ #### Private Repository Analysis
376
+
377
+ ```python
378
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
379
+
380
+ # HTTPS with token
381
+ analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
382
+ result = analyzer.analyze_repository("https://github.com/company/private-repo")
383
+
384
+ # SSH with key
385
+ analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
386
+ result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
387
+ ```
388
+
389
+ #### Power Regression Detection
390
+
391
+ ```python
392
+ from greenmining.analyzers import PowerRegressionDetector
393
+
394
+ detector = PowerRegressionDetector(
395
+ test_command="pytest tests/ -x",
396
+ energy_backend="rapl",
397
+ threshold_percent=5.0,
398
+ iterations=5,
399
+ )
400
+
401
+ regressions = detector.detect(
402
+ repo_path="/path/to/repo",
403
+ baseline_commit="v1.0.0",
404
+ target_commit="HEAD",
405
+ )
406
+
407
+ for regression in regressions:
408
+ print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
409
+ ```
410
+
411
+ #### Version Power Comparison
412
+
413
+ ```python
414
+ from greenmining.analyzers import VersionPowerAnalyzer
415
+
416
+ analyzer = VersionPowerAnalyzer(
417
+ test_command="pytest tests/",
418
+ energy_backend="rapl",
419
+ iterations=10,
420
+ warmup_iterations=2,
421
+ )
422
+
423
+ report = analyzer.analyze_versions(
424
+ repo_path="/path/to/repo",
425
+ versions=["v1.0", "v1.1", "v1.2", "v2.0"],
426
+ )
427
+
428
+ print(report.summary())
429
+ print(f"Trend: {report.trend}")
430
+ print(f"Most efficient: {report.most_efficient}")
431
+ ```
432
+
433
+ #### Metrics-to-Power Correlation
434
+
435
+ ```python
436
+ from greenmining.analyzers import MetricsPowerCorrelator
437
+
438
+ correlator = MetricsPowerCorrelator()
439
+ correlator.fit(
440
+ metrics=["complexity", "nloc", "code_churn"],
441
+ metrics_values={
442
+ "complexity": [10, 20, 30, 40],
443
+ "nloc": [100, 200, 300, 400],
444
+ "code_churn": [50, 100, 150, 200],
445
+ },
446
+ power_measurements=[5.0, 8.0, 12.0, 15.0],
447
+ )
448
+
449
+ print(f"Pearson: {correlator.pearson}")
450
+ print(f"Spearman: {correlator.spearman}")
451
+ print(f"Feature importance: {correlator.feature_importance}")
452
+ ```
453
+
454
+ #### Web Dashboard
455
+
456
+ ```python
457
+ from greenmining.dashboard import run_dashboard
458
+
459
+ # Launch interactive dashboard (requires pip install greenmining[dashboard])
460
+ run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
461
+ ```
462
+
463
+ #### Pipeline Batch Analysis
314
464
 
315
465
  ```python
316
466
  from greenmining.controllers.repository_controller import RepositoryController
@@ -531,17 +681,24 @@ config = Config(
531
681
 
532
682
  ### Core Capabilities
533
683
 
534
- - **Pattern Detection**: Automatically identifies 122 sustainability patterns across 15 categories
535
- - **Keyword Analysis**: Scans commit messages using 321 green software keywords
536
- - **Custom Repository Fetching**: Fetch repositories with custom search keywords (not limited to microservices)
537
- - **Repository Analysis**: Analyzes repositories from GitHub with flexible filtering
538
- - **Batch Processing**: Analyze hundreds of repositories and thousands of commits
539
- - **Multi-format Output**: Generates Markdown reports, CSV exports, and JSON data
540
- - **Statistical Analysis**: Calculates green-awareness metrics, pattern distribution, and trends
684
+ - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
685
+ - **Keyword Analysis**: 321 green software detection keywords
686
+ - **Repository Fetching**: GraphQL API with date, star, and language filters
687
+ - **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
688
+ - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
689
+ - **Private Repository Support**: Authentication via SSH keys or GitHub tokens
690
+ - **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
691
+ - **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
692
+ - **Power Regression Detection**: Identify commits that increased energy consumption
693
+ - **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
694
+ - **Version Power Comparison**: Compare power consumption across software versions with trend detection
695
+ - **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
696
+ - **Source Code Access**: Before/after source code for refactoring detection
697
+ - **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
698
+ - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
699
+ - **Multi-format Output**: Markdown reports, CSV exports, JSON data
700
+ - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
541
701
  - **Docker Support**: Pre-built images for containerized analysis
542
- - **Programmatic API**: Full Python API for custom workflows and integrations
543
- - **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
544
- - **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
545
702
 
546
703
  ### Energy Measurement
547
704
 
@@ -553,38 +710,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
553
710
  |---------|----------|---------|--------------|
554
711
  | **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
555
712
  | **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
713
+ | **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
714
+ | **Auto** | All platforms | Best available backend | Automatic detection |
556
715
 
557
716
  #### Python API
558
717
 
559
718
  ```python
560
- from greenmining.energy import RAPLEnergyMeter, CodeCarbonMeter
561
-
562
- # RAPL (Linux only)
563
- rapl = RAPLEnergyMeter()
564
- if rapl.is_available():
565
- rapl.start()
566
- # ... run analysis ...
567
- result = rapl.stop()
568
- print(f"Energy: {result.energy_joules:.2f} J")
569
-
570
- # CodeCarbon (cross-platform)
571
- cc = CodeCarbonMeter()
572
- if cc.is_available():
573
- cc.start()
574
- # ... run analysis ...
575
- result = cc.stop()
576
- print(f"Energy: {result.energy_joules:.2f} J")
577
- print(f"Carbon: {result.carbon_grams:.4f} gCO2")
719
+ from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
720
+
721
+ # Auto-detect best backend
722
+ meter = get_energy_meter("auto")
723
+ meter.start()
724
+ # ... run analysis ...
725
+ result = meter.stop()
726
+ print(f"Energy: {result.joules:.2f} J")
727
+ print(f"Power: {result.watts_avg:.2f} W")
728
+
729
+ # Integrated energy tracking during analysis
730
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
731
+
732
+ analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
733
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
734
+ print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
578
735
  ```
579
736
 
580
- #### Experiment Results
737
+ #### Carbon Footprint Reporting
581
738
 
582
- CodeCarbon was verified with a real experiment:
583
- - **Repository**: flask (pallets/flask)
584
- - **Commits analyzed**: 10
585
- - **Energy measured**: 160.6 J
586
- - **Carbon emissions**: 0.0119 gCO2
587
- - **Duration**: 11.28 seconds
739
+ ```python
740
+ from greenmining.energy import CarbonReporter
741
+
742
+ reporter = CarbonReporter(
743
+ country_iso="USA",
744
+ cloud_provider="aws",
745
+ region="us-east-1",
746
+ )
747
+ report = reporter.generate_report(total_joules=3600.0)
748
+ print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
749
+ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
750
+ ```
588
751
 
589
752
  ### Pattern Database
590
753
 
@@ -687,8 +850,14 @@ ruff check greenmining/ tests/
687
850
  - PyGithub >= 2.1.1
688
851
  - PyDriller >= 2.5
689
852
  - pandas >= 2.2.0
690
- - click >= 8.1.7
691
- - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
853
+
854
+ **Optional dependencies:**
855
+
856
+ ```bash
857
+ pip install greenmining[energy] # psutil, codecarbon (energy measurement)
858
+ pip install greenmining[dashboard] # flask (web dashboard)
859
+ pip install greenmining[dev] # pytest, black, ruff, mypy (development)
860
+ ```
692
861
 
693
862
  ## License
694
863
 
@@ -1,14 +1,29 @@
1
1
  # greenmining
2
2
 
3
- Green mining for microservices repositories.
3
+ An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
4
4
 
5
5
  [![PyPI](https://img.shields.io/pypi/v/greenmining)](https://pypi.org/project/greenmining/)
6
6
  [![Python](https://img.shields.io/pypi/pyversions/greenmining)](https://pypi.org/project/greenmining/)
7
7
  [![License](https://img.shields.io/github/license/adam-bouafia/greenmining)](LICENSE)
8
+ [![Documentation](https://img.shields.io/badge/docs-readthedocs-blue)](https://greenmining.readthedocs.io/)
8
9
 
9
10
  ## Overview
10
11
 
11
- `greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
12
+ `greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
13
+
14
+ - **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
15
+ - **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
16
+ - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
17
+ - **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
18
+ - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
19
+ - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
20
+ - **Power regression detection** - Identify commits that increased energy consumption
21
+ - **Method-level analysis** - Per-method complexity and metrics via Lizard integration
22
+ - **Version power comparison** - Compare power consumption across software versions
23
+ - **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
24
+ - **Web dashboard** - Flask-based interactive visualization of analysis results
25
+
26
+ Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
12
27
 
13
28
  ## Installation
14
29
 
@@ -257,7 +272,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
257
272
  aggregator.export_to_csv(results, "output.csv")
258
273
  ```
259
274
 
260
- #### Batch Analysis
275
+ #### URL-Based Repository Analysis
276
+
277
+ ```python
278
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
279
+
280
+ analyzer = LocalRepoAnalyzer(
281
+ max_commits=200,
282
+ cleanup_after=True,
283
+ )
284
+
285
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
286
+
287
+ print(f"Repository: {result.name}")
288
+ print(f"Commits analyzed: {result.total_commits}")
289
+ print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
290
+
291
+ for commit in result.commits[:5]:
292
+ if commit.green_aware:
293
+ print(f" {commit.message[:60]}...")
294
+ ```
295
+
296
+ #### Batch Analysis with Parallelism
297
+
298
+ ```python
299
+ from greenmining import analyze_repositories
300
+
301
+ results = analyze_repositories(
302
+ urls=[
303
+ "https://github.com/kubernetes/kubernetes",
304
+ "https://github.com/istio/istio",
305
+ "https://github.com/envoyproxy/envoy",
306
+ ],
307
+ max_commits=100,
308
+ parallel_workers=3,
309
+ energy_tracking=True,
310
+ energy_backend="auto",
311
+ )
312
+
313
+ for result in results:
314
+ print(f"{result.name}: {result.green_commit_rate:.1%} green")
315
+ ```
316
+
317
+ #### Private Repository Analysis
318
+
319
+ ```python
320
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
321
+
322
+ # HTTPS with token
323
+ analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
324
+ result = analyzer.analyze_repository("https://github.com/company/private-repo")
325
+
326
+ # SSH with key
327
+ analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
328
+ result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
329
+ ```
330
+
331
+ #### Power Regression Detection
332
+
333
+ ```python
334
+ from greenmining.analyzers import PowerRegressionDetector
335
+
336
+ detector = PowerRegressionDetector(
337
+ test_command="pytest tests/ -x",
338
+ energy_backend="rapl",
339
+ threshold_percent=5.0,
340
+ iterations=5,
341
+ )
342
+
343
+ regressions = detector.detect(
344
+ repo_path="/path/to/repo",
345
+ baseline_commit="v1.0.0",
346
+ target_commit="HEAD",
347
+ )
348
+
349
+ for regression in regressions:
350
+ print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
351
+ ```
352
+
353
+ #### Version Power Comparison
354
+
355
+ ```python
356
+ from greenmining.analyzers import VersionPowerAnalyzer
357
+
358
+ analyzer = VersionPowerAnalyzer(
359
+ test_command="pytest tests/",
360
+ energy_backend="rapl",
361
+ iterations=10,
362
+ warmup_iterations=2,
363
+ )
364
+
365
+ report = analyzer.analyze_versions(
366
+ repo_path="/path/to/repo",
367
+ versions=["v1.0", "v1.1", "v1.2", "v2.0"],
368
+ )
369
+
370
+ print(report.summary())
371
+ print(f"Trend: {report.trend}")
372
+ print(f"Most efficient: {report.most_efficient}")
373
+ ```
374
+
375
+ #### Metrics-to-Power Correlation
376
+
377
+ ```python
378
+ from greenmining.analyzers import MetricsPowerCorrelator
379
+
380
+ correlator = MetricsPowerCorrelator()
381
+ correlator.fit(
382
+ metrics=["complexity", "nloc", "code_churn"],
383
+ metrics_values={
384
+ "complexity": [10, 20, 30, 40],
385
+ "nloc": [100, 200, 300, 400],
386
+ "code_churn": [50, 100, 150, 200],
387
+ },
388
+ power_measurements=[5.0, 8.0, 12.0, 15.0],
389
+ )
390
+
391
+ print(f"Pearson: {correlator.pearson}")
392
+ print(f"Spearman: {correlator.spearman}")
393
+ print(f"Feature importance: {correlator.feature_importance}")
394
+ ```
395
+
396
+ #### Web Dashboard
397
+
398
+ ```python
399
+ from greenmining.dashboard import run_dashboard
400
+
401
+ # Launch interactive dashboard (requires pip install greenmining[dashboard])
402
+ run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
403
+ ```
404
+
405
+ #### Pipeline Batch Analysis
261
406
 
262
407
  ```python
263
408
  from greenmining.controllers.repository_controller import RepositoryController
@@ -478,17 +623,24 @@ config = Config(
478
623
 
479
624
  ### Core Capabilities
480
625
 
481
- - **Pattern Detection**: Automatically identifies 122 sustainability patterns across 15 categories
482
- - **Keyword Analysis**: Scans commit messages using 321 green software keywords
483
- - **Custom Repository Fetching**: Fetch repositories with custom search keywords (not limited to microservices)
484
- - **Repository Analysis**: Analyzes repositories from GitHub with flexible filtering
485
- - **Batch Processing**: Analyze hundreds of repositories and thousands of commits
486
- - **Multi-format Output**: Generates Markdown reports, CSV exports, and JSON data
487
- - **Statistical Analysis**: Calculates green-awareness metrics, pattern distribution, and trends
626
+ - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
627
+ - **Keyword Analysis**: 321 green software detection keywords
628
+ - **Repository Fetching**: GraphQL API with date, star, and language filters
629
+ - **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
630
+ - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
631
+ - **Private Repository Support**: Authentication via SSH keys or GitHub tokens
632
+ - **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
633
+ - **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
634
+ - **Power Regression Detection**: Identify commits that increased energy consumption
635
+ - **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
636
+ - **Version Power Comparison**: Compare power consumption across software versions with trend detection
637
+ - **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
638
+ - **Source Code Access**: Before/after source code for refactoring detection
639
+ - **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
640
+ - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
641
+ - **Multi-format Output**: Markdown reports, CSV exports, JSON data
642
+ - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
488
643
  - **Docker Support**: Pre-built images for containerized analysis
489
- - **Programmatic API**: Full Python API for custom workflows and integrations
490
- - **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
491
- - **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
492
644
 
493
645
  ### Energy Measurement
494
646
 
@@ -500,38 +652,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
500
652
  |---------|----------|---------|--------------|
501
653
  | **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
502
654
  | **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
655
+ | **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
656
+ | **Auto** | All platforms | Best available backend | Automatic detection |
503
657
 
504
658
  #### Python API
505
659
 
506
660
  ```python
507
- from greenmining.energy import RAPLEnergyMeter, CodeCarbonMeter
508
-
509
- # RAPL (Linux only)
510
- rapl = RAPLEnergyMeter()
511
- if rapl.is_available():
512
- rapl.start()
513
- # ... run analysis ...
514
- result = rapl.stop()
515
- print(f"Energy: {result.energy_joules:.2f} J")
516
-
517
- # CodeCarbon (cross-platform)
518
- cc = CodeCarbonMeter()
519
- if cc.is_available():
520
- cc.start()
521
- # ... run analysis ...
522
- result = cc.stop()
523
- print(f"Energy: {result.energy_joules:.2f} J")
524
- print(f"Carbon: {result.carbon_grams:.4f} gCO2")
661
+ from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
662
+
663
+ # Auto-detect best backend
664
+ meter = get_energy_meter("auto")
665
+ meter.start()
666
+ # ... run analysis ...
667
+ result = meter.stop()
668
+ print(f"Energy: {result.joules:.2f} J")
669
+ print(f"Power: {result.watts_avg:.2f} W")
670
+
671
+ # Integrated energy tracking during analysis
672
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
673
+
674
+ analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
675
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
676
+ print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
525
677
  ```
526
678
 
527
- #### Experiment Results
679
+ #### Carbon Footprint Reporting
528
680
 
529
- CodeCarbon was verified with a real experiment:
530
- - **Repository**: flask (pallets/flask)
531
- - **Commits analyzed**: 10
532
- - **Energy measured**: 160.6 J
533
- - **Carbon emissions**: 0.0119 gCO2
534
- - **Duration**: 11.28 seconds
681
+ ```python
682
+ from greenmining.energy import CarbonReporter
683
+
684
+ reporter = CarbonReporter(
685
+ country_iso="USA",
686
+ cloud_provider="aws",
687
+ region="us-east-1",
688
+ )
689
+ report = reporter.generate_report(total_joules=3600.0)
690
+ print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
691
+ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
692
+ ```
535
693
 
536
694
  ### Pattern Database
537
695
 
@@ -634,8 +792,14 @@ ruff check greenmining/ tests/
634
792
  - PyGithub >= 2.1.1
635
793
  - PyDriller >= 2.5
636
794
  - pandas >= 2.2.0
637
- - click >= 8.1.7
638
- - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
795
+
796
+ **Optional dependencies:**
797
+
798
+ ```bash
799
+ pip install greenmining[energy] # psutil, codecarbon (energy measurement)
800
+ pip install greenmining[dashboard] # flask (web dashboard)
801
+ pip install greenmining[dev] # pytest, black, ruff, mypy (development)
802
+ ```
639
803
 
640
804
  ## License
641
805