greenmining 1.1.8__tar.gz → 1.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {greenmining-1.1.8 → greenmining-1.1.9}/CHANGELOG.md +1 -1
  2. {greenmining-1.1.8/greenmining.egg-info → greenmining-1.1.9}/PKG-INFO +8 -8
  3. {greenmining-1.1.8 → greenmining-1.1.9}/README.md +7 -7
  4. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/__init__.py +1 -1
  5. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/controllers/repository_controller.py +77 -14
  6. {greenmining-1.1.8 → greenmining-1.1.9/greenmining.egg-info}/PKG-INFO +8 -8
  7. {greenmining-1.1.8 → greenmining-1.1.9}/pyproject.toml +1 -1
  8. {greenmining-1.1.8 → greenmining-1.1.9}/LICENSE +0 -0
  9. {greenmining-1.1.8 → greenmining-1.1.9}/MANIFEST.in +0 -0
  10. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/__main__.py +0 -0
  11. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/__init__.py +0 -0
  12. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/code_diff_analyzer.py +0 -0
  13. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/metrics_power_correlator.py +0 -0
  14. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/power_regression.py +0 -0
  15. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/qualitative_analyzer.py +0 -0
  16. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/statistical_analyzer.py +0 -0
  17. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/temporal_analyzer.py +0 -0
  18. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/analyzers/version_power_analyzer.py +0 -0
  19. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/config.py +0 -0
  20. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/controllers/__init__.py +0 -0
  21. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/energy/__init__.py +0 -0
  22. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/energy/base.py +0 -0
  23. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/energy/carbon_reporter.py +0 -0
  24. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/energy/codecarbon_meter.py +0 -0
  25. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/energy/cpu_meter.py +0 -0
  26. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/energy/rapl.py +0 -0
  27. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/gsf_patterns.py +0 -0
  28. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/models/__init__.py +0 -0
  29. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/models/aggregated_stats.py +0 -0
  30. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/models/analysis_result.py +0 -0
  31. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/models/commit.py +0 -0
  32. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/models/repository.py +0 -0
  33. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/presenters/__init__.py +0 -0
  34. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/presenters/console_presenter.py +0 -0
  35. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/services/__init__.py +0 -0
  36. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/services/commit_extractor.py +0 -0
  37. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/services/data_aggregator.py +0 -0
  38. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/services/data_analyzer.py +0 -0
  39. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/services/github_graphql_fetcher.py +0 -0
  40. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/services/local_repo_analyzer.py +0 -0
  41. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/services/reports.py +0 -0
  42. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining/utils.py +0 -0
  43. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining.egg-info/SOURCES.txt +0 -0
  44. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining.egg-info/dependency_links.txt +0 -0
  45. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining.egg-info/requires.txt +0 -0
  46. {greenmining-1.1.8 → greenmining-1.1.9}/greenmining.egg-info/top_level.txt +0 -0
  47. {greenmining-1.1.8 → greenmining-1.1.9}/setup.cfg +0 -0
  48. {greenmining-1.1.8 → greenmining-1.1.9}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  # Changelog
2
2
 
3
- ## [1.1.8] - 2026-01-31
3
+ ## [1.1.9] - 2026-01-31
4
4
 
5
5
  ### Removed
6
6
  - Web dashboard module (`greenmining/dashboard/`) and Flask dependency
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.1.8
3
+ Version: 1.1.9
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -70,7 +70,7 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
70
70
 
71
71
  - **Mine repositories at scale** - Search, Fetch and analyze GitHub repositories via GraphQL API with configurable filters
72
72
 
73
- - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
73
+ - **Classify green commits** - Detect 124 sustainability patterns from the Green Software Foundation (GSF) catalog
74
74
  - **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
75
75
  - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
76
76
  - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
@@ -113,7 +113,7 @@ docker pull adambouafia/greenmining:latest
113
113
  from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
114
114
 
115
115
  # Check available patterns
116
- print(f"Total patterns: {len(GSF_PATTERNS)}") # 122 patterns across 15 categories
116
+ print(f"Total patterns: {len(GSF_PATTERNS)}") # 124 patterns across 15 categories
117
117
 
118
118
  # Detect green awareness in commit messages
119
119
  commit_msg = "Optimize Redis caching to reduce energy consumption"
@@ -670,8 +670,8 @@ config = Config(
670
670
 
671
671
  ### Core Capabilities
672
672
 
673
- - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
674
- - **Keyword Analysis**: 321 green software detection keywords
673
+ - **Pattern Detection**: 124 sustainability patterns across 15 categories from the GSF catalog
674
+ - **Keyword Analysis**: 332 green software detection keywords
675
675
  - **Repository Fetching**: GraphQL API with date, star, and language filters
676
676
  - **URL-Based Analysis**: Direct Git-based analysis from GitHub URLs (HTTPS and SSH)
677
677
  - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
@@ -739,7 +739,7 @@ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
739
739
 
740
740
  ### Pattern Database
741
741
 
742
- **122 green software patterns based on:**
742
+ **124 green software patterns based on:**
743
743
  - Green Software Foundation (GSF) Patterns Catalog
744
744
  - VU Amsterdam 2024 research on ML system sustainability
745
745
  - ICSE 2024 conference papers on sustainable software
@@ -749,11 +749,11 @@ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
749
749
  - **Coverage**: 67% of patterns actively detect in real-world commits
750
750
  - **Accuracy**: 100% true positive rate for green-aware commits
751
751
  - **Categories**: 15 distinct sustainability domains covered
752
- - **Keywords**: 321 detection terms across all patterns
752
+ - **Keywords**: 332 detection terms across all patterns
753
753
 
754
754
  ## GSF Pattern Categories
755
755
 
756
- **122 patterns across 15 categories:**
756
+ **124 patterns across 15 categories:**
757
757
 
758
758
  ### 1. Cloud (40 patterns)
759
759
  Auto-scaling, serverless computing, right-sizing instances, region selection for renewable energy, spot instances, idle resource detection, cloud-native architectures
@@ -13,7 +13,7 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
13
13
 
14
14
  - **Mine repositories at scale** - Search, Fetch and analyze GitHub repositories via GraphQL API with configurable filters
15
15
 
16
- - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
16
+ - **Classify green commits** - Detect 124 sustainability patterns from the Green Software Foundation (GSF) catalog
17
17
  - **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
18
18
  - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
19
19
  - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
@@ -56,7 +56,7 @@ docker pull adambouafia/greenmining:latest
56
56
  from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
57
57
 
58
58
  # Check available patterns
59
- print(f"Total patterns: {len(GSF_PATTERNS)}") # 122 patterns across 15 categories
59
+ print(f"Total patterns: {len(GSF_PATTERNS)}") # 124 patterns across 15 categories
60
60
 
61
61
  # Detect green awareness in commit messages
62
62
  commit_msg = "Optimize Redis caching to reduce energy consumption"
@@ -613,8 +613,8 @@ config = Config(
613
613
 
614
614
  ### Core Capabilities
615
615
 
616
- - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
617
- - **Keyword Analysis**: 321 green software detection keywords
616
+ - **Pattern Detection**: 124 sustainability patterns across 15 categories from the GSF catalog
617
+ - **Keyword Analysis**: 332 green software detection keywords
618
618
  - **Repository Fetching**: GraphQL API with date, star, and language filters
619
619
  - **URL-Based Analysis**: Direct Git-based analysis from GitHub URLs (HTTPS and SSH)
620
620
  - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
@@ -682,7 +682,7 @@ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
682
682
 
683
683
  ### Pattern Database
684
684
 
685
- **122 green software patterns based on:**
685
+ **124 green software patterns based on:**
686
686
  - Green Software Foundation (GSF) Patterns Catalog
687
687
  - VU Amsterdam 2024 research on ML system sustainability
688
688
  - ICSE 2024 conference papers on sustainable software
@@ -692,11 +692,11 @@ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
692
692
  - **Coverage**: 67% of patterns actively detect in real-world commits
693
693
  - **Accuracy**: 100% true positive rate for green-aware commits
694
694
  - **Categories**: 15 distinct sustainability domains covered
695
- - **Keywords**: 321 detection terms across all patterns
695
+ - **Keywords**: 332 detection terms across all patterns
696
696
 
697
697
  ## GSF Pattern Categories
698
698
 
699
- **122 patterns across 15 categories:**
699
+ **124 patterns across 15 categories:**
700
700
 
701
701
  ### 1. Cloud (40 patterns)
702
702
  Auto-scaling, serverless computing, right-sizing instances, region selection for renewable energy, spot instances, idle resource detection, cloud-native architectures
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.1.8"
12
+ __version__ = "1.1.9"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -1,4 +1,9 @@
1
- # Repository Controller - Handles repository fetching operations.
1
+ # Repository Controller - Handles repository fetching + cloning operations.
2
+ import os
3
+ import re
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import List, Dict
2
7
 
3
8
  from greenmining.config import Config
4
9
  from greenmining.models.repository import Repository
@@ -13,23 +18,81 @@ class RepositoryController:
13
18
  # Initialize controller with configuration.
14
19
  self.config = config
15
20
  self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
16
-
17
- def fetch_repositories(
18
- self,
19
- max_repos: int = None,
20
- min_stars: int = None,
21
- languages: list[str] = None,
22
- keywords: str = None,
23
- created_after: str = None,
24
- created_before: str = None,
25
- pushed_after: str = None,
26
- pushed_before: str = None,
27
- ) -> list[Repository]:
21
+ self.repos_dir = Path.cwd() / "greenmining_repos"
22
+
23
+ def _sanitize_repo_name(self, repo: Repository, index: int = 0) -> str:
24
+ """Safe unique dir name: owner_repo[_index]. Handles case collisions."""
25
+ base = re.sub(r'[^a-z0-9-]', '_', repo.full_name.replace('/', '_').lower())
26
+ name = f"{base}_{index}" if index else base
27
+ path = self.repos_dir / name
28
+ counter = 1
29
+ while path.exists():
30
+ name = f"{base}_{counter}"
31
+ path = self.repos_dir / name
32
+ counter += 1
33
+ return name
34
+
35
+ def clone_repositories(
36
+ self,
37
+ repositories: List[Repository],
38
+ github_token: str = None,
39
+ cleanup: bool = True,
40
+ depth: int = 1 # Shallow clone
41
+ ) -> List[Dict]:
42
+ """Clone repos to ./greenmining_repos/ with unique sanitized names."""
43
+ self.repos_dir.mkdir(exist_ok=True)
44
+ if cleanup:
45
+ shutil.rmtree(self.repos_dir, ignore_errors=True)
46
+ self.repos_dir.mkdir(exist_ok=True)
47
+ colored_print(f"Cleaned {self.repos_dir}", "yellow")
48
+
49
+ results = []
50
+ for i, repo in enumerate(repositories, 1):
51
+ safe_name = self._sanitize_repo_name(repo, i)
52
+ clone_path = self.repos_dir / safe_name
53
+
54
+ colored_print(f"[{i}/{len(repositories)}] Cloning {repo.full_name} → {safe_name}", "cyan")
55
+
56
+ url = f"https://{github_token}@github.com/{repo.full_name}.git" if github_token else repo.url
57
+ cmd = ["git", "clone", f"--depth={depth}", "-v", url, str(clone_path)]
58
+
59
+ import subprocess
60
+ try:
61
+ subprocess.check_call(cmd, cwd=self.repos_dir.parent)
62
+ colored_print(f"{safe_name}", "green")
63
+ results.append({
64
+ "full_name": repo.full_name,
65
+ "local_path": str(clone_path),
66
+ "success": True
67
+ })
68
+ except subprocess.CalledProcessError as e:
69
+ colored_print(f"{safe_name}: {e}", "red")
70
+ results.append({
71
+ "full_name": repo.full_name,
72
+ "local_path": str(clone_path),
73
+ "success": False,
74
+ "error": str(e)
75
+ })
76
+
77
+ # Save map for analyze_repositories
78
+ save_json_file(results, self.repos_dir / "clone_results.json")
79
+ success_rate = sum(1 for r in results if r["success"]) / len(results) * 100
80
+ colored_print(f"Cloned: {success_rate:.1f}% ({self.repos_dir}/clone_results.json)", "green")
81
+ return results
82
+
83
+
84
+
85
+
86
+
87
+ def fetch_repositories(self, max_repos: int = None, min_stars: int = None,
88
+ languages: list[str] = None, keywords: str = None,
89
+ created_after: str = None, created_before: str = None,
90
+ pushed_after: str = None, pushed_before: str = None) -> list[Repository]:
28
91
  # Fetch repositories from GitHub using GraphQL API.
29
92
  max_repos = max_repos or self.config.MAX_REPOS
30
93
  min_stars = min_stars or self.config.MIN_STARS
31
94
  languages = languages or self.config.SUPPORTED_LANGUAGES
32
- keywords = keywords or "microservices"
95
+ keywords = keywords
33
96
 
34
97
  colored_print(f"Fetching up to {max_repos} repositories...", "cyan")
35
98
  colored_print(f" Keywords: {keywords}", "cyan")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.1.8
3
+ Version: 1.1.9
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -70,7 +70,7 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
70
70
 
71
71
  - **Mine repositories at scale** - Search, Fetch and analyze GitHub repositories via GraphQL API with configurable filters
72
72
 
73
- - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
73
+ - **Classify green commits** - Detect 124 sustainability patterns from the Green Software Foundation (GSF) catalog
74
74
  - **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
75
75
  - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
76
76
  - **Carbon footprint reporting** - CO2 emissions calculation with 20+ country profiles and cloud region support
@@ -113,7 +113,7 @@ docker pull adambouafia/greenmining:latest
113
113
  from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords
114
114
 
115
115
  # Check available patterns
116
- print(f"Total patterns: {len(GSF_PATTERNS)}") # 122 patterns across 15 categories
116
+ print(f"Total patterns: {len(GSF_PATTERNS)}") # 124 patterns across 15 categories
117
117
 
118
118
  # Detect green awareness in commit messages
119
119
  commit_msg = "Optimize Redis caching to reduce energy consumption"
@@ -670,8 +670,8 @@ config = Config(
670
670
 
671
671
  ### Core Capabilities
672
672
 
673
- - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
674
- - **Keyword Analysis**: 321 green software detection keywords
673
+ - **Pattern Detection**: 124 sustainability patterns across 15 categories from the GSF catalog
674
+ - **Keyword Analysis**: 332 green software detection keywords
675
675
  - **Repository Fetching**: GraphQL API with date, star, and language filters
676
676
  - **URL-Based Analysis**: Direct Git-based analysis from GitHub URLs (HTTPS and SSH)
677
677
  - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
@@ -739,7 +739,7 @@ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
739
739
 
740
740
  ### Pattern Database
741
741
 
742
- **122 green software patterns based on:**
742
+ **124 green software patterns based on:**
743
743
  - Green Software Foundation (GSF) Patterns Catalog
744
744
  - VU Amsterdam 2024 research on ML system sustainability
745
745
  - ICSE 2024 conference papers on sustainable software
@@ -749,11 +749,11 @@ print(f"Equivalent: {report.tree_months:.2f} tree-months to offset")
749
749
  - **Coverage**: 67% of patterns actively detect in real-world commits
750
750
  - **Accuracy**: 100% true positive rate for green-aware commits
751
751
  - **Categories**: 15 distinct sustainability domains covered
752
- - **Keywords**: 321 detection terms across all patterns
752
+ - **Keywords**: 332 detection terms across all patterns
753
753
 
754
754
  ## GSF Pattern Categories
755
755
 
756
- **122 patterns across 15 categories:**
756
+ **124 patterns across 15 categories:**
757
757
 
758
758
  ### 1. Cloud (40 patterns)
759
759
  Auto-scaling, serverless computing, right-sizing instances, region selection for renewable energy, spot instances, idle resource detection, cloud-native architectures
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "greenmining"
7
- version = "1.1.8"
7
+ version = "1.1.9"
8
8
  description = "An empirical Python library for Mining Software Repositories (MSR) in Green IT research"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
File without changes
File without changes
File without changes
File without changes