greenmining 1.2.5__tar.gz → 1.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {greenmining-1.2.5 → greenmining-1.2.6}/CHANGELOG.md +17 -0
- {greenmining-1.2.5/greenmining.egg-info → greenmining-1.2.6}/PKG-INFO +1 -1
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/__init__.py +7 -1
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/services/local_repo_analyzer.py +38 -4
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/services/reports.py +21 -21
- {greenmining-1.2.5 → greenmining-1.2.6/greenmining.egg-info}/PKG-INFO +1 -1
- {greenmining-1.2.5 → greenmining-1.2.6}/pyproject.toml +1 -1
- {greenmining-1.2.5 → greenmining-1.2.6}/LICENSE +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/MANIFEST.in +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/README.md +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/__main__.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/analyzers/__init__.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/analyzers/code_diff_analyzer.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/analyzers/metrics_power_correlator.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/analyzers/statistical_analyzer.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/analyzers/temporal_analyzer.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/controllers/__init__.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/controllers/repository_controller.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/energy/__init__.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/energy/base.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/energy/carbon_reporter.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/energy/codecarbon_meter.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/energy/cpu_meter.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/energy/rapl.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/gsf_patterns.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/models/__init__.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/models/aggregated_stats.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/models/analysis_result.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/models/commit.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/models/repository.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/services/__init__.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/services/commit_extractor.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/services/data_aggregator.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/services/data_analyzer.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/services/github_graphql_fetcher.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining/utils.py +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining.egg-info/SOURCES.txt +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining.egg-info/dependency_links.txt +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining.egg-info/requires.txt +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/greenmining.egg-info/top_level.txt +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/setup.cfg +0 -0
- {greenmining-1.2.5 → greenmining-1.2.6}/setup.py +0 -0
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.2.5] - 2026-02-02
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `shallow_clone` parameter (default: True) to significantly reduce repository download size
|
|
7
|
+
- `clone_depth` parameter for custom clone depth (auto-calculated as `max(50, max_commits * 3)` if None)
|
|
8
|
+
- Manual git shallow cloning before PyDriller analysis for dramatic performance improvement
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- Repository cloning now uses `git clone --depth=N` by default, reducing download times by ~90%
|
|
12
|
+
- Clone depth automatically calculated based on max_commits to ensure sufficient history
|
|
13
|
+
|
|
14
|
+
### Performance
|
|
15
|
+
- Reduced clone size from 528 MB to ~50 MB for typical 10-repo analysis (90% reduction)
|
|
16
|
+
- Example: ant-design repo reduced from 184 MB (full) to 14 MB (depth=50) - 92% smaller
|
|
17
|
+
- Analysis time for small experiments reduced from 6+ minutes to under 1 minute
|
|
18
|
+
|
|
3
19
|
## [1.2.4] - 2026-02-01
|
|
4
20
|
|
|
5
21
|
### Added
|
|
@@ -116,6 +132,7 @@
|
|
|
116
132
|
- Green awareness analysis
|
|
117
133
|
- Docker containerization
|
|
118
134
|
|
|
135
|
+
[1.2.5]: https://github.com/adam-bouafia/greenmining/compare/v1.2.4...v1.2.5
|
|
119
136
|
[1.2.4]: https://github.com/adam-bouafia/greenmining/compare/v1.2.3...v1.2.4
|
|
120
137
|
[1.2.3]: https://github.com/adam-bouafia/greenmining/compare/v1.2.1...v1.2.3
|
|
121
138
|
[1.2.1]: https://github.com/adam-bouafia/greenmining/compare/v1.2.0...v1.2.1
|
|
@@ -8,7 +8,7 @@ from greenmining.gsf_patterns import (
|
|
|
8
8
|
is_green_aware,
|
|
9
9
|
)
|
|
10
10
|
|
|
11
|
-
__version__ = "1.2.
|
|
11
|
+
__version__ = "1.2.6"
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def fetch_repositories(
|
|
@@ -75,6 +75,8 @@ def analyze_repositories(
|
|
|
75
75
|
cleanup_after: bool = True,
|
|
76
76
|
skip_merges: bool = True,
|
|
77
77
|
commit_order: str = "newest_first",
|
|
78
|
+
shallow_clone: bool = True,
|
|
79
|
+
clone_depth: int = None,
|
|
78
80
|
):
|
|
79
81
|
# Analyze multiple repositories from URLs.
|
|
80
82
|
# Args:
|
|
@@ -93,6 +95,8 @@ def analyze_repositories(
|
|
|
93
95
|
# cleanup_after: Remove cloned repos after analysis (default True)
|
|
94
96
|
# skip_merges: Skip merge commits (default True)
|
|
95
97
|
# commit_order: "newest_first" (default) or "oldest_first"
|
|
98
|
+
# shallow_clone: Use shallow cloning to reduce download size (default True)
|
|
99
|
+
# clone_depth: Git clone depth (auto-calculated from max_commits if None)
|
|
96
100
|
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
97
101
|
|
|
98
102
|
kwargs = {}
|
|
@@ -116,6 +120,8 @@ def analyze_repositories(
|
|
|
116
120
|
cleanup_after=cleanup_after,
|
|
117
121
|
skip_merges=skip_merges,
|
|
118
122
|
commit_order=commit_order,
|
|
123
|
+
shallow_clone=shallow_clone,
|
|
124
|
+
clone_depth=clone_depth,
|
|
119
125
|
**kwargs,
|
|
120
126
|
)
|
|
121
127
|
|
|
@@ -205,6 +205,8 @@ class LocalRepoAnalyzer:
|
|
|
205
205
|
since_date: datetime | None = None,
|
|
206
206
|
to_date: datetime | None = None,
|
|
207
207
|
commit_order: str = "newest_first",
|
|
208
|
+
shallow_clone: bool = True,
|
|
209
|
+
clone_depth: int | None = None,
|
|
208
210
|
):
|
|
209
211
|
# Initialize the local repository analyzer.
|
|
210
212
|
# Args:
|
|
@@ -222,6 +224,8 @@ class LocalRepoAnalyzer:
|
|
|
222
224
|
# include_source_code: Include source code before/after in results
|
|
223
225
|
# process_metrics: "standard" or "full" PyDriller process metrics
|
|
224
226
|
# commit_order: "newest_first" (default) or "oldest_first"
|
|
227
|
+
# shallow_clone: Use shallow cloning to reduce download size (default True)
|
|
228
|
+
# clone_depth: Git clone depth (auto-calculated from max_commits if None)
|
|
225
229
|
self.clone_path = clone_path or Path.cwd() / "greenmining_repos"
|
|
226
230
|
self.clone_path.mkdir(parents=True, exist_ok=True)
|
|
227
231
|
self.max_commits = max_commits
|
|
@@ -232,6 +236,9 @@ class LocalRepoAnalyzer:
|
|
|
232
236
|
self.compute_process_metrics = compute_process_metrics
|
|
233
237
|
self.cleanup_after = cleanup_after
|
|
234
238
|
self.commit_order = commit_order
|
|
239
|
+
self.shallow_clone = shallow_clone
|
|
240
|
+
# Auto-calculate clone depth: max_commits * 3 to account for merges/skipped commits
|
|
241
|
+
self.clone_depth = clone_depth if clone_depth else max(50, max_commits * 3)
|
|
235
242
|
self.gsf_patterns = GSF_PATTERNS
|
|
236
243
|
|
|
237
244
|
# Phase 1.3: Private repository support
|
|
@@ -472,12 +479,39 @@ class LocalRepoAnalyzer:
|
|
|
472
479
|
clone_parent.mkdir(parents=True, exist_ok=True)
|
|
473
480
|
local_path = clone_parent / repo_name
|
|
474
481
|
|
|
475
|
-
|
|
476
|
-
|
|
482
|
+
# Perform shallow clone manually before PyDriller (much faster!)
|
|
483
|
+
if not local_path.exists():
|
|
484
|
+
import subprocess
|
|
477
485
|
|
|
478
|
-
|
|
486
|
+
clone_cmd = ["git", "clone"]
|
|
487
|
+
if self.shallow_clone:
|
|
488
|
+
clone_cmd.extend(["--depth", str(self.clone_depth)])
|
|
489
|
+
clone_cmd.extend([auth_url, str(local_path)])
|
|
479
490
|
|
|
480
|
-
|
|
491
|
+
colored_print(
|
|
492
|
+
f" Cloning to: {local_path} (depth={self.clone_depth if self.shallow_clone else 'full'})",
|
|
493
|
+
"cyan",
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
subprocess.run(
|
|
498
|
+
clone_cmd,
|
|
499
|
+
capture_output=True,
|
|
500
|
+
text=True,
|
|
501
|
+
check=True,
|
|
502
|
+
timeout=180,
|
|
503
|
+
)
|
|
504
|
+
except subprocess.TimeoutExpired:
|
|
505
|
+
colored_print(" Clone timeout after 180s", "yellow")
|
|
506
|
+
raise
|
|
507
|
+
except subprocess.CalledProcessError as e:
|
|
508
|
+
colored_print(f" Clone failed: {e.stderr}", "red")
|
|
509
|
+
raise
|
|
510
|
+
else:
|
|
511
|
+
colored_print(f" Using existing clone: {local_path}", "cyan")
|
|
512
|
+
|
|
513
|
+
# PyDriller will analyze the already-cloned repo
|
|
514
|
+
repo_config["path_to_repo"] = str(local_path)
|
|
481
515
|
|
|
482
516
|
# Phase 2.2: Start energy measurement if enabled (fresh meter per repo)
|
|
483
517
|
energy_result = None
|
|
@@ -76,12 +76,12 @@ class ReportGenerator:
|
|
|
76
76
|
|
|
77
77
|
return f"""### Executive Summary
|
|
78
78
|
|
|
79
|
-
This report presents findings from analyzing **{format_number(summary[
|
|
79
|
+
This report presents findings from analyzing **{format_number(summary["total_commits"])} commits** across **{format_number(summary["total_repos"])} microservice-based repositories** to identify green software engineering practices.
|
|
80
80
|
|
|
81
81
|
**Key Findings:**
|
|
82
82
|
|
|
83
|
-
- **{format_percentage(summary[
|
|
84
|
-
- **{format_number(summary[
|
|
83
|
+
- **{format_percentage(summary["green_aware_percentage"])}** of commits ({format_number(summary["green_aware_count"])}) explicitly mention energy efficiency, performance optimization, or sustainability concerns
|
|
84
|
+
- **{format_number(summary["repos_with_green_commits"])}** out of {format_number(summary["total_repos"])} repositories contain at least one green-aware commit
|
|
85
85
|
- {pattern_text if pattern_text else "Various green software patterns were detected across the analyzed commits."}
|
|
86
86
|
|
|
87
87
|
**Implications:**
|
|
@@ -106,15 +106,15 @@ Repositories were selected from GitHub based on the following criteria:
|
|
|
106
106
|
|
|
107
107
|
- **Keywords:** {search_keywords}
|
|
108
108
|
- **Programming Languages:** {languages}
|
|
109
|
-
- **Minimum Stars:** {metadata.get(
|
|
109
|
+
- **Minimum Stars:** {metadata.get("min_stars", 100)} (to ensure established projects)
|
|
110
110
|
- **Sort Order:** Stars (descending)
|
|
111
|
-
- **Total Repositories:** {metadata.get(
|
|
111
|
+
- **Total Repositories:** {metadata.get("total_repos", 0)}
|
|
112
112
|
|
|
113
113
|
#### 1.2 Data Extraction Approach
|
|
114
114
|
|
|
115
115
|
Commit data was extracted using PyDriller library:
|
|
116
116
|
|
|
117
|
-
- **Commits Analyzed:** {analysis_metadata.get(
|
|
117
|
+
- **Commits Analyzed:** {analysis_metadata.get("total_commits_analyzed", 0)}
|
|
118
118
|
- **Time Window:** Last 2 years (730 days)
|
|
119
119
|
- **Merge Commits:** Excluded
|
|
120
120
|
- **Minimum Commit Message Length:** 10 characters
|
|
@@ -192,8 +192,8 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
192
192
|
|
|
193
193
|
return f"""#### 2.1 Green Awareness in Commits
|
|
194
194
|
|
|
195
|
-
**Total commits analyzed:** {format_number(summary[
|
|
196
|
-
**Commits with explicit green mention:** {format_number(summary[
|
|
195
|
+
**Total commits analyzed:** {format_number(summary["total_commits"])}
|
|
196
|
+
**Commits with explicit green mention:** {format_number(summary["green_aware_count"])} ({format_percentage(summary["green_aware_percentage"])})
|
|
197
197
|
|
|
198
198
|
**Table: Top 10 Repositories with Highest Green Awareness**
|
|
199
199
|
|
|
@@ -224,10 +224,10 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
224
224
|
# Pattern descriptions
|
|
225
225
|
pattern_details = []
|
|
226
226
|
for i, pattern in enumerate(patterns[:10], 1):
|
|
227
|
-
pattern_details.append(f"""**{i}. {pattern[
|
|
228
|
-
- Frequency: {format_number(pattern[
|
|
229
|
-
- Confidence Distribution: HIGH={conf[
|
|
230
|
-
- Example Commits: {
|
|
227
|
+
pattern_details.append(f"""**{i}. {pattern["pattern_name"]}**
|
|
228
|
+
- Frequency: {format_number(pattern["count"])} commits ({format_percentage(pattern["percentage"])})
|
|
229
|
+
- Confidence Distribution: HIGH={conf["HIGH"]}, MEDIUM={conf["MEDIUM"]}, LOW={conf["LOW"]}
|
|
230
|
+
- Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
|
|
231
231
|
|
|
232
232
|
return f"""#### 2.2 Known Green Patterns & Tactics Applied
|
|
233
233
|
|
|
@@ -252,10 +252,10 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
252
252
|
|
|
253
253
|
pattern_list = []
|
|
254
254
|
for pattern in emergent:
|
|
255
|
-
pattern_list.append(f"""**Pattern:** {pattern[
|
|
256
|
-
- Occurrences: {pattern[
|
|
257
|
-
- Description: {pattern[
|
|
258
|
-
- Example Commits: {
|
|
255
|
+
pattern_list.append(f"""**Pattern:** {pattern["pattern_name"]}
|
|
256
|
+
- Occurrences: {pattern["count"]}
|
|
257
|
+
- Description: {pattern["description"]}
|
|
258
|
+
- Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
|
|
259
259
|
|
|
260
260
|
return f"""#### 2.3 Emerging Practices Discovered
|
|
261
261
|
|
|
@@ -299,7 +299,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
299
299
|
if "error" in stats:
|
|
300
300
|
return f"""#### 2.5 Statistical Analysis
|
|
301
301
|
|
|
302
|
-
**Note:** Statistical analysis encountered an error: {stats[
|
|
302
|
+
**Note:** Statistical analysis encountered an error: {stats["error"]}
|
|
303
303
|
"""
|
|
304
304
|
|
|
305
305
|
sections = []
|
|
@@ -495,16 +495,16 @@ Based on the detected patterns, microservice developers primarily focus on:
|
|
|
495
495
|
|
|
496
496
|
#### 5.1 Summary of Key Findings
|
|
497
497
|
|
|
498
|
-
This study analyzed {format_number(summary[
|
|
498
|
+
This study analyzed {format_number(summary["total_commits"])} commits from {format_number(summary["total_repos"])} microservice repositories and found:
|
|
499
499
|
|
|
500
|
-
1. **{format_percentage(summary[
|
|
501
|
-
2. **{format_number(summary[
|
|
500
|
+
1. **{format_percentage(summary["green_aware_percentage"])}** of commits explicitly address energy/sustainability concerns
|
|
501
|
+
2. **{format_number(summary["repos_with_green_commits"])}** repositories demonstrate some level of green awareness
|
|
502
502
|
3. Common green patterns include: {patterns_text}
|
|
503
503
|
|
|
504
504
|
#### 5.2 Answers to Research Questions
|
|
505
505
|
|
|
506
506
|
**RQ1: What percentage of microservice commits explicitly mention energy efficiency?**
|
|
507
|
-
Answer: {format_percentage(summary[
|
|
507
|
+
Answer: {format_percentage(summary["green_aware_percentage"])} of analyzed commits contain explicit mentions.
|
|
508
508
|
|
|
509
509
|
**RQ2: Which green software tactics are developers applying in practice?**
|
|
510
510
|
Answer: Developers primarily apply caching strategies, resource pooling, database optimization, and asynchronous processing patterns.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "greenmining"
|
|
7
|
-
version = "1.2.
|
|
7
|
+
version = "1.2.6"
|
|
8
8
|
description = "An empirical Python library for Mining Software Repositories (MSR) in Green IT research"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|