greenmining 1.0.4__tar.gz → 1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {greenmining-1.0.4 → greenmining-1.0.6}/CHANGELOG.md +0 -1
  2. {greenmining-1.0.4/greenmining.egg-info → greenmining-1.0.6}/PKG-INFO +65 -54
  3. {greenmining-1.0.4 → greenmining-1.0.6}/README.md +56 -50
  4. greenmining-1.0.6/greenmining/__init__.py +87 -0
  5. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/__version__.py +1 -1
  6. greenmining-1.0.6/greenmining/analyzers/__init__.py +22 -0
  7. greenmining-1.0.6/greenmining/analyzers/metrics_power_correlator.py +165 -0
  8. greenmining-1.0.6/greenmining/analyzers/power_regression.py +212 -0
  9. greenmining-1.0.6/greenmining/analyzers/version_power_analyzer.py +246 -0
  10. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/config.py +46 -34
  11. greenmining-1.0.6/greenmining/dashboard/__init__.py +5 -0
  12. greenmining-1.0.6/greenmining/dashboard/app.py +200 -0
  13. greenmining-1.0.6/greenmining/energy/__init__.py +20 -0
  14. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/energy/base.py +45 -35
  15. greenmining-1.0.6/greenmining/energy/carbon_reporter.py +242 -0
  16. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/energy/codecarbon_meter.py +25 -24
  17. greenmining-1.0.6/greenmining/energy/cpu_meter.py +144 -0
  18. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/energy/rapl.py +30 -36
  19. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/__init__.py +13 -3
  20. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/commit_extractor.py +9 -5
  21. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/github_fetcher.py +16 -18
  22. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/github_graphql_fetcher.py +45 -55
  23. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/local_repo_analyzer.py +325 -63
  24. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/reports.py +5 -8
  25. {greenmining-1.0.4 → greenmining-1.0.6/greenmining.egg-info}/PKG-INFO +65 -54
  26. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/SOURCES.txt +14 -0
  27. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/requires.txt +8 -1
  28. {greenmining-1.0.4 → greenmining-1.0.6}/pyproject.toml +19 -7
  29. {greenmining-1.0.4 → greenmining-1.0.6}/setup.py +1 -1
  30. greenmining-1.0.4/greenmining/__init__.py +0 -43
  31. greenmining-1.0.4/greenmining/analyzers/__init__.py +0 -13
  32. greenmining-1.0.4/greenmining/energy/__init__.py +0 -13
  33. {greenmining-1.0.4 → greenmining-1.0.6}/LICENSE +0 -0
  34. {greenmining-1.0.4 → greenmining-1.0.6}/MANIFEST.in +0 -0
  35. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/__main__.py +0 -0
  36. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/code_diff_analyzer.py +0 -0
  37. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/qualitative_analyzer.py +0 -0
  38. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/statistical_analyzer.py +0 -0
  39. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/analyzers/temporal_analyzer.py +0 -0
  40. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/controllers/__init__.py +0 -0
  41. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/controllers/repository_controller.py +0 -0
  42. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/gsf_patterns.py +0 -0
  43. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/__init__.py +0 -0
  44. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/aggregated_stats.py +0 -0
  45. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/analysis_result.py +0 -0
  46. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/commit.py +0 -0
  47. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/models/repository.py +0 -0
  48. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/presenters/__init__.py +0 -0
  49. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/presenters/console_presenter.py +0 -0
  50. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/data_aggregator.py +0 -0
  51. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/services/data_analyzer.py +0 -0
  52. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining/utils.py +0 -0
  53. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/dependency_links.txt +0 -0
  54. {greenmining-1.0.4 → greenmining-1.0.6}/greenmining.egg-info/top_level.txt +0 -0
  55. {greenmining-1.0.4 → greenmining-1.0.6}/setup.cfg +0 -0
@@ -84,7 +84,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
84
84
  - Pattern matching engine
85
85
  - Green awareness detection
86
86
  - Data analysis and reporting
87
- - CLI interface with Click
88
87
  - Docker support with multi-stage builds
89
88
  - GitHub Actions CI/CD pipeline
90
89
  - PyPI publishing workflow
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.4
4
- Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
3
+ Version: 1.0.6
4
+ Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/adam-bouafia/greenmining
@@ -9,7 +9,7 @@ Project-URL: Documentation, https://github.com/adam-bouafia/greenmining#readme
9
9
  Project-URL: Repository, https://github.com/adam-bouafia/greenmining
10
10
  Project-URL: Issues, https://github.com/adam-bouafia/greenmining/issues
11
11
  Project-URL: Changelog, https://github.com/adam-bouafia/greenmining/blob/main/CHANGELOG.md
12
- Keywords: green-software,gsf,sustainability,carbon-footprint,microservices,mining,repository-analysis,energy-efficiency,github-analysis
12
+ Keywords: green-software,gsf,msr,mining-software-repositories,green-it,sustainability,carbon-footprint,energy-efficiency,repository-analysis,github-analysis,pydriller,empirical-software-engineering
13
13
  Classifier: Development Status :: 3 - Alpha
14
14
  Classifier: Intended Audience :: Developers
15
15
  Classifier: Intended Audience :: Science/Research
@@ -43,8 +43,13 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
43
43
  Requires-Dist: black>=23.12.0; extra == "dev"
44
44
  Requires-Dist: ruff>=0.1.9; extra == "dev"
45
45
  Requires-Dist: mypy>=1.8.0; extra == "dev"
46
- Requires-Dist: build>=1.0.4; extra == "dev"
46
+ Requires-Dist: build>=1.0.5; extra == "dev"
47
47
  Requires-Dist: twine>=4.0.2; extra == "dev"
48
+ Provides-Extra: energy
49
+ Requires-Dist: psutil>=5.9.0; extra == "energy"
50
+ Requires-Dist: codecarbon>=2.3.0; extra == "energy"
51
+ Provides-Extra: dashboard
52
+ Requires-Dist: flask>=3.0.0; extra == "dashboard"
48
53
  Provides-Extra: docs
49
54
  Requires-Dist: sphinx>=7.2.0; extra == "docs"
50
55
  Requires-Dist: sphinx-rtd-theme>=2.0.0; extra == "docs"
@@ -53,15 +58,30 @@ Dynamic: license-file
53
58
 
54
59
  # greenmining
55
60
 
56
- Green mining for microservices repositories.
61
+ An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
57
62
 
58
63
  [![PyPI](https://img.shields.io/pypi/v/greenmining)](https://pypi.org/project/greenmining/)
59
64
  [![Python](https://img.shields.io/pypi/pyversions/greenmining)](https://pypi.org/project/greenmining/)
60
65
  [![License](https://img.shields.io/github/license/adam-bouafia/greenmining)](LICENSE)
66
+ [![Documentation](https://img.shields.io/badge/docs-readthedocs-blue)](https://greenmining.readthedocs.io/)
61
67
 
62
68
  ## Overview
63
69
 
64
- `greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
70
+ `greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
71
+
72
+ - **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
73
+ - **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
74
+ - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
75
+ - **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
76
+ - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
77
+ - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
78
+ - **Power regression detection** - Identify commits that increased energy consumption
79
+ - **Method-level analysis** - Per-method complexity and metrics via Lizard integration
80
+ - **Version power comparison** - Compare power consumption across software versions
81
+ - **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
82
+ - **Web dashboard** - Flask-based interactive visualization of analysis results
83
+
84
+ Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
65
85
 
66
86
  ## Installation
67
87
 
@@ -105,7 +125,7 @@ if is_green_aware(commit_msg):
105
125
  # Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
106
126
  ```
107
127
 
108
- #### Fetch Repositories with Custom Keywords (NEW)
128
+ #### Fetch Repositories with Custom Keywords
109
129
 
110
130
  ```python
111
131
  from greenmining import fetch_repositories
@@ -144,8 +164,6 @@ for repo in repos[:5]:
144
164
  ```python
145
165
  from greenmining.services.commit_extractor import CommitExtractor
146
166
  from greenmining.services.data_analyzer import DataAnalyzer
147
- from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
148
- from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
149
167
  from greenmining import fetch_repositories
150
168
 
151
169
  # Fetch repositories with custom keywords
@@ -195,18 +213,6 @@ for commit in commits:
195
213
  results.append(result)
196
214
  print(f"Green commit found: {commit.message[:50]}...")
197
215
  print(f" Patterns: {result['known_pattern']}")
198
-
199
- # Access NLP analysis results (NEW)
200
- if 'nlp_analysis' in result:
201
- nlp = result['nlp_analysis']
202
- print(f" NLP: {nlp['morphological_count']} morphological matches, "
203
- f"{nlp['semantic_count']} semantic matches")
204
-
205
- # Access ML features (NEW)
206
- if 'ml_features' in result:
207
- ml = result['ml_features']['text']
208
- print(f" ML Features: {ml['word_count']} words, "
209
- f"keyword density: {ml['keyword_density']:.2f}")
210
216
  ```
211
217
 
212
218
  #### Access Sustainability Patterns Data
@@ -242,7 +248,7 @@ print(f"Available categories: {sorted(categories)}")
242
248
  # 'monitoring', 'network', 'networking', 'resource', 'web']
243
249
  ```
244
250
 
245
- #### Advanced Analysis: Temporal Trends (NEW)
251
+ #### Advanced Analysis: Temporal Trends
246
252
 
247
253
  ```python
248
254
  from greenmining.services.data_aggregator import DataAggregator
@@ -374,7 +380,7 @@ repositories = fetch_repositories(
374
380
  min_stars=10,
375
381
  keywords="software engineering",
376
382
  )
377
- print(f"Fetched {len(repositories)} repositories")
383
+ print(f"Fetched {len(repositories)} repositories")
378
384
 
379
385
  # STAGE 2: Extract Commits
380
386
  print("\nExtracting commits...")
@@ -386,7 +392,7 @@ extractor = CommitExtractor(
386
392
  timeout=120,
387
393
  )
388
394
  all_commits = extractor.extract_from_repositories(repositories)
389
- print(f"Extracted {len(all_commits)} commits")
395
+ print(f"Extracted {len(all_commits)} commits")
390
396
 
391
397
  # Save commits
392
398
  extractor.save_results(
@@ -405,8 +411,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
405
411
  # Count green-aware commits
406
412
  green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
407
413
  green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
408
- print(f"Analyzed {len(analyzed_commits)} commits")
409
- print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
414
+ print(f"Analyzed {len(analyzed_commits)} commits")
415
+ print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
410
416
 
411
417
  # Save analysis
412
418
  analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
@@ -434,7 +440,7 @@ print("\n" + "="*80)
434
440
  print("ANALYSIS COMPLETE")
435
441
  print("="*80)
436
442
  aggregator.print_summary(results)
437
- print(f"\n📁 Results saved in: {output_dir.absolute()}")
443
+ print(f"\nResults saved in: {output_dir.absolute()}")
438
444
  ```
439
445
 
440
446
  **What this example does:**
@@ -567,38 +573,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
567
573
  |---------|----------|---------|--------------|
568
574
  | **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
569
575
  | **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
576
+ | **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
577
+ | **Auto** | All platforms | Best available backend | Automatic detection |
570
578
 
571
579
  #### Python API
572
580
 
573
581
  ```python
574
- from greenmining.energy import RAPLEnergyMeter, CodeCarbonMeter
575
-
576
- # RAPL (Linux only)
577
- rapl = RAPLEnergyMeter()
578
- if rapl.is_available():
579
- rapl.start()
580
- # ... run analysis ...
581
- result = rapl.stop()
582
- print(f"Energy: {result.energy_joules:.2f} J")
583
-
584
- # CodeCarbon (cross-platform)
585
- cc = CodeCarbonMeter()
586
- if cc.is_available():
587
- cc.start()
588
- # ... run analysis ...
589
- result = cc.stop()
590
- print(f"Energy: {result.energy_joules:.2f} J")
591
- print(f"Carbon: {result.carbon_grams:.4f} gCO2")
582
+ from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
583
+
584
+ # Auto-detect best backend
585
+ meter = get_energy_meter("auto")
586
+ meter.start()
587
+ # ... run analysis ...
588
+ result = meter.stop()
589
+ print(f"Energy: {result.joules:.2f} J")
590
+ print(f"Power: {result.watts_avg:.2f} W")
591
+
592
+ # Integrated energy tracking during analysis
593
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
594
+
595
+ analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
596
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
597
+ print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
592
598
  ```
593
599
 
594
- #### Experiment Results
600
+ #### Carbon Footprint Reporting
601
+
602
+ ```python
603
+ from greenmining.energy import CarbonReporter
595
604
 
596
- CodeCarbon was verified with a real experiment:
597
- - **Repository**: flask (pallets/flask)
598
- - **Commits analyzed**: 10
599
- - **Energy measured**: 160.6 J
600
- - **Carbon emissions**: 0.0119 gCO2
601
- - **Duration**: 11.28 seconds
605
+ reporter = CarbonReporter(
606
+ country_iso="USA",
607
+ cloud_provider="aws",
608
+ region="us-east-1",
609
+ )
610
+ report = reporter.generate_report(total_joules=3600.0)
611
+ print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
612
+ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
613
+ ```
602
614
 
603
615
  ### Pattern Database
604
616
 
@@ -700,8 +712,7 @@ ruff check greenmining/ tests/
700
712
  - Python 3.9+
701
713
  - PyGithub >= 2.1.1
702
714
  - PyDriller >= 2.5
703
- - pandas >= 2.2.0
704
- - click >= 8.1.7
715
+ - pandas >= 2.2.0
705
716
  - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
706
717
 
707
718
  ## License
@@ -1,14 +1,29 @@
1
1
  # greenmining
2
2
 
3
- Green mining for microservices repositories.
3
+ An empirical Python library for Mining Software Repositories (MSR) in Green IT research.
4
4
 
5
5
  [![PyPI](https://img.shields.io/pypi/v/greenmining)](https://pypi.org/project/greenmining/)
6
6
  [![Python](https://img.shields.io/pypi/pyversions/greenmining)](https://pypi.org/project/greenmining/)
7
7
  [![License](https://img.shields.io/github/license/adam-bouafia/greenmining)](LICENSE)
8
+ [![Documentation](https://img.shields.io/badge/docs-readthedocs-blue)](https://greenmining.readthedocs.io/)
8
9
 
9
10
  ## Overview
10
11
 
11
- `greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
12
+ `greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
13
+
14
+ - **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
15
+ - **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
16
+ - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
17
+ - **Analyze any repository by URL** - Direct PyDriller-based analysis with support for private repositories
18
+ - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
19
+ - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
20
+ - **Power regression detection** - Identify commits that increased energy consumption
21
+ - **Method-level analysis** - Per-method complexity and metrics via Lizard integration
22
+ - **Version power comparison** - Compare power consumption across software versions
23
+ - **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
24
+ - **Web dashboard** - Flask-based interactive visualization of analysis results
25
+
26
+ Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
12
27
 
13
28
  ## Installation
14
29
 
@@ -52,7 +67,7 @@ if is_green_aware(commit_msg):
52
67
  # Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
53
68
  ```
54
69
 
55
- #### Fetch Repositories with Custom Keywords (NEW)
70
+ #### Fetch Repositories with Custom Keywords
56
71
 
57
72
  ```python
58
73
  from greenmining import fetch_repositories
@@ -91,8 +106,6 @@ for repo in repos[:5]:
91
106
  ```python
92
107
  from greenmining.services.commit_extractor import CommitExtractor
93
108
  from greenmining.services.data_analyzer import DataAnalyzer
94
- from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
95
- from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
96
109
  from greenmining import fetch_repositories
97
110
 
98
111
  # Fetch repositories with custom keywords
@@ -142,18 +155,6 @@ for commit in commits:
142
155
  results.append(result)
143
156
  print(f"Green commit found: {commit.message[:50]}...")
144
157
  print(f" Patterns: {result['known_pattern']}")
145
-
146
- # Access NLP analysis results (NEW)
147
- if 'nlp_analysis' in result:
148
- nlp = result['nlp_analysis']
149
- print(f" NLP: {nlp['morphological_count']} morphological matches, "
150
- f"{nlp['semantic_count']} semantic matches")
151
-
152
- # Access ML features (NEW)
153
- if 'ml_features' in result:
154
- ml = result['ml_features']['text']
155
- print(f" ML Features: {ml['word_count']} words, "
156
- f"keyword density: {ml['keyword_density']:.2f}")
157
158
  ```
158
159
 
159
160
  #### Access Sustainability Patterns Data
@@ -189,7 +190,7 @@ print(f"Available categories: {sorted(categories)}")
189
190
  # 'monitoring', 'network', 'networking', 'resource', 'web']
190
191
  ```
191
192
 
192
- #### Advanced Analysis: Temporal Trends (NEW)
193
+ #### Advanced Analysis: Temporal Trends
193
194
 
194
195
  ```python
195
196
  from greenmining.services.data_aggregator import DataAggregator
@@ -321,7 +322,7 @@ repositories = fetch_repositories(
321
322
  min_stars=10,
322
323
  keywords="software engineering",
323
324
  )
324
- print(f"Fetched {len(repositories)} repositories")
325
+ print(f"Fetched {len(repositories)} repositories")
325
326
 
326
327
  # STAGE 2: Extract Commits
327
328
  print("\nExtracting commits...")
@@ -333,7 +334,7 @@ extractor = CommitExtractor(
333
334
  timeout=120,
334
335
  )
335
336
  all_commits = extractor.extract_from_repositories(repositories)
336
- print(f"Extracted {len(all_commits)} commits")
337
+ print(f"Extracted {len(all_commits)} commits")
337
338
 
338
339
  # Save commits
339
340
  extractor.save_results(
@@ -352,8 +353,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
352
353
  # Count green-aware commits
353
354
  green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
354
355
  green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
355
- print(f"Analyzed {len(analyzed_commits)} commits")
356
- print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
356
+ print(f"Analyzed {len(analyzed_commits)} commits")
357
+ print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
357
358
 
358
359
  # Save analysis
359
360
  analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
@@ -381,7 +382,7 @@ print("\n" + "="*80)
381
382
  print("ANALYSIS COMPLETE")
382
383
  print("="*80)
383
384
  aggregator.print_summary(results)
384
- print(f"\n📁 Results saved in: {output_dir.absolute()}")
385
+ print(f"\nResults saved in: {output_dir.absolute()}")
385
386
  ```
386
387
 
387
388
  **What this example does:**
@@ -514,38 +515,44 @@ greenmining includes built-in energy measurement capabilities for tracking the c
514
515
  |---------|----------|---------|--------------|
515
516
  | **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
516
517
  | **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
518
+ | **CPU Meter** | All platforms | Estimated CPU energy (Joules) | Optional: `pip install psutil` |
519
+ | **Auto** | All platforms | Best available backend | Automatic detection |
517
520
 
518
521
  #### Python API
519
522
 
520
523
  ```python
521
- from greenmining.energy import RAPLEnergyMeter, CodeCarbonMeter
522
-
523
- # RAPL (Linux only)
524
- rapl = RAPLEnergyMeter()
525
- if rapl.is_available():
526
- rapl.start()
527
- # ... run analysis ...
528
- result = rapl.stop()
529
- print(f"Energy: {result.energy_joules:.2f} J")
530
-
531
- # CodeCarbon (cross-platform)
532
- cc = CodeCarbonMeter()
533
- if cc.is_available():
534
- cc.start()
535
- # ... run analysis ...
536
- result = cc.stop()
537
- print(f"Energy: {result.energy_joules:.2f} J")
538
- print(f"Carbon: {result.carbon_grams:.4f} gCO2")
524
+ from greenmining.energy import RAPLEnergyMeter, CPUEnergyMeter, get_energy_meter
525
+
526
+ # Auto-detect best backend
527
+ meter = get_energy_meter("auto")
528
+ meter.start()
529
+ # ... run analysis ...
530
+ result = meter.stop()
531
+ print(f"Energy: {result.joules:.2f} J")
532
+ print(f"Power: {result.watts_avg:.2f} W")
533
+
534
+ # Integrated energy tracking during analysis
535
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
536
+
537
+ analyzer = LocalRepoAnalyzer(energy_tracking=True, energy_backend="auto")
538
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
539
+ print(f"Analysis energy: {result.energy_metrics['joules']:.2f} J")
539
540
  ```
540
541
 
541
- #### Experiment Results
542
+ #### Carbon Footprint Reporting
543
+
544
+ ```python
545
+ from greenmining.energy import CarbonReporter
542
546
 
543
- CodeCarbon was verified with a real experiment:
544
- - **Repository**: flask (pallets/flask)
545
- - **Commits analyzed**: 10
546
- - **Energy measured**: 160.6 J
547
- - **Carbon emissions**: 0.0119 gCO2
548
- - **Duration**: 11.28 seconds
547
+ reporter = CarbonReporter(
548
+ country_iso="USA",
549
+ cloud_provider="aws",
550
+ region="us-east-1",
551
+ )
552
+ report = reporter.generate_report(total_joules=3600.0)
553
+ print(f"CO2: {report.total_emissions_kg * 1000:.4f} grams")
554
+ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
555
+ ```
549
556
 
550
557
  ### Pattern Database
551
558
 
@@ -647,8 +654,7 @@ ruff check greenmining/ tests/
647
654
  - Python 3.9+
648
655
  - PyGithub >= 2.1.1
649
656
  - PyDriller >= 2.5
650
- - pandas >= 2.2.0
651
- - click >= 8.1.7
657
+ - pandas >= 2.2.0
652
658
  - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
653
659
 
654
660
  ## License
@@ -0,0 +1,87 @@
1
+ # Green Microservices Mining - GSF Pattern Analysis Tool.
2
+
3
+ from greenmining.config import Config
4
+ from greenmining.controllers.repository_controller import RepositoryController
5
+ from greenmining.gsf_patterns import (
6
+ GREEN_KEYWORDS,
7
+ GSF_PATTERNS,
8
+ get_pattern_by_keywords,
9
+ is_green_aware,
10
+ )
11
+
12
+ __version__ = "1.0.6"
13
+
14
+
15
+ def fetch_repositories(
16
+ github_token: str,
17
+ max_repos: int = None,
18
+ min_stars: int = None,
19
+ languages: list = None,
20
+ keywords: str = None,
21
+ ):
22
+ # Fetch repositories from GitHub with custom search keywords.
23
+ config = Config()
24
+ config.GITHUB_TOKEN = github_token
25
+ controller = RepositoryController(config)
26
+
27
+ return controller.fetch_repositories(
28
+ max_repos=max_repos,
29
+ min_stars=min_stars,
30
+ languages=languages,
31
+ keywords=keywords,
32
+ )
33
+
34
+
35
+ def analyze_repositories(
36
+ urls: list,
37
+ max_commits: int = 500,
38
+ parallel_workers: int = 1,
39
+ output_format: str = "dict",
40
+ energy_tracking: bool = False,
41
+ energy_backend: str = "rapl",
42
+ method_level_analysis: bool = False,
43
+ include_source_code: bool = False,
44
+ ssh_key_path: str = None,
45
+ github_token: str = None,
46
+ ):
47
+ # Analyze multiple repositories from URLs.
48
+ # Args:
49
+ # urls: List of GitHub repository URLs
50
+ # max_commits: Maximum commits to analyze per repository
51
+ # parallel_workers: Number of parallel analysis workers (1=sequential)
52
+ # output_format: Output format (dict, json, csv)
53
+ # energy_tracking: Enable automatic energy measurement during analysis
54
+ # energy_backend: Energy backend (rapl, codecarbon, cpu_meter, auto)
55
+ # method_level_analysis: Include per-method metrics via Lizard
56
+ # include_source_code: Include source code before/after in results
57
+ # ssh_key_path: SSH key path for private repositories
58
+ # github_token: GitHub token for private HTTPS repositories
59
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
60
+
61
+ analyzer = LocalRepoAnalyzer(
62
+ max_commits=max_commits,
63
+ energy_tracking=energy_tracking,
64
+ energy_backend=energy_backend,
65
+ method_level_analysis=method_level_analysis,
66
+ include_source_code=include_source_code,
67
+ ssh_key_path=ssh_key_path,
68
+ github_token=github_token,
69
+ )
70
+
71
+ return analyzer.analyze_repositories(
72
+ urls=urls,
73
+ parallel_workers=parallel_workers,
74
+ output_format=output_format,
75
+ )
76
+
77
+
78
+ __all__ = [
79
+ "Config",
80
+ "GSF_PATTERNS",
81
+ "GREEN_KEYWORDS",
82
+ "is_green_aware",
83
+ "get_pattern_by_keywords",
84
+ "fetch_repositories",
85
+ "analyze_repositories",
86
+ "__version__",
87
+ ]
@@ -1,3 +1,3 @@
1
1
  # Version information for greenmining.
2
2
 
3
- __version__ = "1.0.4"
3
+ __version__ = "1.0.5"
@@ -0,0 +1,22 @@
1
+ # Analyzers for GreenMining framework.
2
+
3
+ from .code_diff_analyzer import CodeDiffAnalyzer
4
+ from .statistical_analyzer import StatisticalAnalyzer
5
+ from .temporal_analyzer import TemporalAnalyzer
6
+ from .qualitative_analyzer import QualitativeAnalyzer
7
+ from .power_regression import PowerRegressionDetector, PowerRegression
8
+ from .metrics_power_correlator import MetricsPowerCorrelator, CorrelationResult
9
+ from .version_power_analyzer import VersionPowerAnalyzer, VersionPowerReport
10
+
11
+ __all__ = [
12
+ "CodeDiffAnalyzer",
13
+ "StatisticalAnalyzer",
14
+ "TemporalAnalyzer",
15
+ "QualitativeAnalyzer",
16
+ "PowerRegressionDetector",
17
+ "PowerRegression",
18
+ "MetricsPowerCorrelator",
19
+ "CorrelationResult",
20
+ "VersionPowerAnalyzer",
21
+ "VersionPowerReport",
22
+ ]