greenmining 1.2.5__py3-none-any.whl → 1.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +7 -1
- greenmining/services/local_repo_analyzer.py +62 -4
- greenmining/services/reports.py +21 -21
- {greenmining-1.2.5.dist-info → greenmining-1.2.7.dist-info}/METADATA +1 -1
- {greenmining-1.2.5.dist-info → greenmining-1.2.7.dist-info}/RECORD +8 -8
- {greenmining-1.2.5.dist-info → greenmining-1.2.7.dist-info}/WHEEL +0 -0
- {greenmining-1.2.5.dist-info → greenmining-1.2.7.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.2.5.dist-info → greenmining-1.2.7.dist-info}/top_level.txt +0 -0
greenmining/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from greenmining.gsf_patterns import (
|
|
|
8
8
|
is_green_aware,
|
|
9
9
|
)
|
|
10
10
|
|
|
11
|
-
__version__ = "1.2.
|
|
11
|
+
__version__ = "1.2.7"
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def fetch_repositories(
|
|
@@ -75,6 +75,8 @@ def analyze_repositories(
|
|
|
75
75
|
cleanup_after: bool = True,
|
|
76
76
|
skip_merges: bool = True,
|
|
77
77
|
commit_order: str = "newest_first",
|
|
78
|
+
shallow_clone: bool = True,
|
|
79
|
+
clone_depth: int = None,
|
|
78
80
|
):
|
|
79
81
|
# Analyze multiple repositories from URLs.
|
|
80
82
|
# Args:
|
|
@@ -93,6 +95,8 @@ def analyze_repositories(
|
|
|
93
95
|
# cleanup_after: Remove cloned repos after analysis (default True)
|
|
94
96
|
# skip_merges: Skip merge commits (default True)
|
|
95
97
|
# commit_order: "newest_first" (default) or "oldest_first"
|
|
98
|
+
# shallow_clone: Use shallow cloning to reduce download size (default True)
|
|
99
|
+
# clone_depth: Git clone depth (auto-calculated from max_commits if None)
|
|
96
100
|
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
97
101
|
|
|
98
102
|
kwargs = {}
|
|
@@ -116,6 +120,8 @@ def analyze_repositories(
|
|
|
116
120
|
cleanup_after=cleanup_after,
|
|
117
121
|
skip_merges=skip_merges,
|
|
118
122
|
commit_order=commit_order,
|
|
123
|
+
shallow_clone=shallow_clone,
|
|
124
|
+
clone_depth=clone_depth,
|
|
119
125
|
**kwargs,
|
|
120
126
|
)
|
|
121
127
|
|
|
@@ -205,6 +205,8 @@ class LocalRepoAnalyzer:
|
|
|
205
205
|
since_date: datetime | None = None,
|
|
206
206
|
to_date: datetime | None = None,
|
|
207
207
|
commit_order: str = "newest_first",
|
|
208
|
+
shallow_clone: bool = True,
|
|
209
|
+
clone_depth: int | None = None,
|
|
208
210
|
):
|
|
209
211
|
# Initialize the local repository analyzer.
|
|
210
212
|
# Args:
|
|
@@ -222,6 +224,8 @@ class LocalRepoAnalyzer:
|
|
|
222
224
|
# include_source_code: Include source code before/after in results
|
|
223
225
|
# process_metrics: "standard" or "full" PyDriller process metrics
|
|
224
226
|
# commit_order: "newest_first" (default) or "oldest_first"
|
|
227
|
+
# shallow_clone: Use shallow cloning to reduce download size (default True)
|
|
228
|
+
# clone_depth: Git clone depth (auto-calculated from max_commits if None)
|
|
225
229
|
self.clone_path = clone_path or Path.cwd() / "greenmining_repos"
|
|
226
230
|
self.clone_path.mkdir(parents=True, exist_ok=True)
|
|
227
231
|
self.max_commits = max_commits
|
|
@@ -232,6 +236,10 @@ class LocalRepoAnalyzer:
|
|
|
232
236
|
self.compute_process_metrics = compute_process_metrics
|
|
233
237
|
self.cleanup_after = cleanup_after
|
|
234
238
|
self.commit_order = commit_order
|
|
239
|
+
self.shallow_clone = shallow_clone
|
|
240
|
+
# Auto-calculate clone depth: max_commits * 5 to account for merges/skipped commits
|
|
241
|
+
# and avoid boundary errors where parent commits fall outside the shallow window
|
|
242
|
+
self.clone_depth = clone_depth if clone_depth else max(100, max_commits * 5)
|
|
235
243
|
self.gsf_patterns = GSF_PATTERNS
|
|
236
244
|
|
|
237
245
|
# Phase 1.3: Private repository support
|
|
@@ -472,12 +480,39 @@ class LocalRepoAnalyzer:
|
|
|
472
480
|
clone_parent.mkdir(parents=True, exist_ok=True)
|
|
473
481
|
local_path = clone_parent / repo_name
|
|
474
482
|
|
|
475
|
-
|
|
476
|
-
|
|
483
|
+
# Perform shallow clone manually before PyDriller (much faster!)
|
|
484
|
+
if not local_path.exists():
|
|
485
|
+
import subprocess
|
|
477
486
|
|
|
478
|
-
|
|
487
|
+
clone_cmd = ["git", "clone"]
|
|
488
|
+
if self.shallow_clone:
|
|
489
|
+
clone_cmd.extend(["--depth", str(self.clone_depth)])
|
|
490
|
+
clone_cmd.extend([auth_url, str(local_path)])
|
|
479
491
|
|
|
480
|
-
|
|
492
|
+
colored_print(
|
|
493
|
+
f" Cloning to: {local_path} (depth={self.clone_depth if self.shallow_clone else 'full'})",
|
|
494
|
+
"cyan",
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
try:
|
|
498
|
+
subprocess.run(
|
|
499
|
+
clone_cmd,
|
|
500
|
+
capture_output=True,
|
|
501
|
+
text=True,
|
|
502
|
+
check=True,
|
|
503
|
+
timeout=180,
|
|
504
|
+
)
|
|
505
|
+
except subprocess.TimeoutExpired:
|
|
506
|
+
colored_print(" Clone timeout after 180s", "yellow")
|
|
507
|
+
raise
|
|
508
|
+
except subprocess.CalledProcessError as e:
|
|
509
|
+
colored_print(f" Clone failed: {e.stderr}", "red")
|
|
510
|
+
raise
|
|
511
|
+
else:
|
|
512
|
+
colored_print(f" Using existing clone: {local_path}", "cyan")
|
|
513
|
+
|
|
514
|
+
# PyDriller will analyze the already-cloned repo
|
|
515
|
+
repo_config["path_to_repo"] = str(local_path)
|
|
481
516
|
|
|
482
517
|
# Phase 2.2: Start energy measurement if enabled (fresh meter per repo)
|
|
483
518
|
energy_result = None
|
|
@@ -525,6 +560,29 @@ class LocalRepoAnalyzer:
|
|
|
525
560
|
# Compute process metrics if enabled
|
|
526
561
|
process_metrics = {}
|
|
527
562
|
if self.compute_process_metrics and local_path.exists():
|
|
563
|
+
# Unshallow the repo before process metrics — they need full history
|
|
564
|
+
# for metrics like CommitsCount, ContributorsExperience, HistoryComplexity
|
|
565
|
+
if self.shallow_clone:
|
|
566
|
+
colored_print(" Deepening clone for process metrics...", "cyan")
|
|
567
|
+
try:
|
|
568
|
+
import subprocess
|
|
569
|
+
|
|
570
|
+
subprocess.run(
|
|
571
|
+
["git", "fetch", "--unshallow"],
|
|
572
|
+
cwd=str(local_path),
|
|
573
|
+
capture_output=True,
|
|
574
|
+
text=True,
|
|
575
|
+
check=True,
|
|
576
|
+
timeout=120,
|
|
577
|
+
)
|
|
578
|
+
except subprocess.CalledProcessError:
|
|
579
|
+
# Already unshallowed or not a shallow repo — safe to ignore
|
|
580
|
+
pass
|
|
581
|
+
except subprocess.TimeoutExpired:
|
|
582
|
+
colored_print(
|
|
583
|
+
" Warning: Unshallow timed out, process metrics may be incomplete",
|
|
584
|
+
"yellow",
|
|
585
|
+
)
|
|
528
586
|
colored_print(" Computing process metrics...", "cyan")
|
|
529
587
|
process_metrics = self._compute_process_metrics(str(local_path))
|
|
530
588
|
|
greenmining/services/reports.py
CHANGED
|
@@ -76,12 +76,12 @@ class ReportGenerator:
|
|
|
76
76
|
|
|
77
77
|
return f"""### Executive Summary
|
|
78
78
|
|
|
79
|
-
This report presents findings from analyzing **{format_number(summary[
|
|
79
|
+
This report presents findings from analyzing **{format_number(summary["total_commits"])} commits** across **{format_number(summary["total_repos"])} microservice-based repositories** to identify green software engineering practices.
|
|
80
80
|
|
|
81
81
|
**Key Findings:**
|
|
82
82
|
|
|
83
|
-
- **{format_percentage(summary[
|
|
84
|
-
- **{format_number(summary[
|
|
83
|
+
- **{format_percentage(summary["green_aware_percentage"])}** of commits ({format_number(summary["green_aware_count"])}) explicitly mention energy efficiency, performance optimization, or sustainability concerns
|
|
84
|
+
- **{format_number(summary["repos_with_green_commits"])}** out of {format_number(summary["total_repos"])} repositories contain at least one green-aware commit
|
|
85
85
|
- {pattern_text if pattern_text else "Various green software patterns were detected across the analyzed commits."}
|
|
86
86
|
|
|
87
87
|
**Implications:**
|
|
@@ -106,15 +106,15 @@ Repositories were selected from GitHub based on the following criteria:
|
|
|
106
106
|
|
|
107
107
|
- **Keywords:** {search_keywords}
|
|
108
108
|
- **Programming Languages:** {languages}
|
|
109
|
-
- **Minimum Stars:** {metadata.get(
|
|
109
|
+
- **Minimum Stars:** {metadata.get("min_stars", 100)} (to ensure established projects)
|
|
110
110
|
- **Sort Order:** Stars (descending)
|
|
111
|
-
- **Total Repositories:** {metadata.get(
|
|
111
|
+
- **Total Repositories:** {metadata.get("total_repos", 0)}
|
|
112
112
|
|
|
113
113
|
#### 1.2 Data Extraction Approach
|
|
114
114
|
|
|
115
115
|
Commit data was extracted using PyDriller library:
|
|
116
116
|
|
|
117
|
-
- **Commits Analyzed:** {analysis_metadata.get(
|
|
117
|
+
- **Commits Analyzed:** {analysis_metadata.get("total_commits_analyzed", 0)}
|
|
118
118
|
- **Time Window:** Last 2 years (730 days)
|
|
119
119
|
- **Merge Commits:** Excluded
|
|
120
120
|
- **Minimum Commit Message Length:** 10 characters
|
|
@@ -192,8 +192,8 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
192
192
|
|
|
193
193
|
return f"""#### 2.1 Green Awareness in Commits
|
|
194
194
|
|
|
195
|
-
**Total commits analyzed:** {format_number(summary[
|
|
196
|
-
**Commits with explicit green mention:** {format_number(summary[
|
|
195
|
+
**Total commits analyzed:** {format_number(summary["total_commits"])}
|
|
196
|
+
**Commits with explicit green mention:** {format_number(summary["green_aware_count"])} ({format_percentage(summary["green_aware_percentage"])})
|
|
197
197
|
|
|
198
198
|
**Table: Top 10 Repositories with Highest Green Awareness**
|
|
199
199
|
|
|
@@ -224,10 +224,10 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
224
224
|
# Pattern descriptions
|
|
225
225
|
pattern_details = []
|
|
226
226
|
for i, pattern in enumerate(patterns[:10], 1):
|
|
227
|
-
pattern_details.append(f"""**{i}. {pattern[
|
|
228
|
-
- Frequency: {format_number(pattern[
|
|
229
|
-
- Confidence Distribution: HIGH={conf[
|
|
230
|
-
- Example Commits: {
|
|
227
|
+
pattern_details.append(f"""**{i}. {pattern["pattern_name"]}**
|
|
228
|
+
- Frequency: {format_number(pattern["count"])} commits ({format_percentage(pattern["percentage"])})
|
|
229
|
+
- Confidence Distribution: HIGH={conf["HIGH"]}, MEDIUM={conf["MEDIUM"]}, LOW={conf["LOW"]}
|
|
230
|
+
- Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
|
|
231
231
|
|
|
232
232
|
return f"""#### 2.2 Known Green Patterns & Tactics Applied
|
|
233
233
|
|
|
@@ -252,10 +252,10 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
252
252
|
|
|
253
253
|
pattern_list = []
|
|
254
254
|
for pattern in emergent:
|
|
255
|
-
pattern_list.append(f"""**Pattern:** {pattern[
|
|
256
|
-
- Occurrences: {pattern[
|
|
257
|
-
- Description: {pattern[
|
|
258
|
-
- Example Commits: {
|
|
255
|
+
pattern_list.append(f"""**Pattern:** {pattern["pattern_name"]}
|
|
256
|
+
- Occurrences: {pattern["count"]}
|
|
257
|
+
- Description: {pattern["description"]}
|
|
258
|
+
- Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
|
|
259
259
|
|
|
260
260
|
return f"""#### 2.3 Emerging Practices Discovered
|
|
261
261
|
|
|
@@ -299,7 +299,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
299
299
|
if "error" in stats:
|
|
300
300
|
return f"""#### 2.5 Statistical Analysis
|
|
301
301
|
|
|
302
|
-
**Note:** Statistical analysis encountered an error: {stats[
|
|
302
|
+
**Note:** Statistical analysis encountered an error: {stats["error"]}
|
|
303
303
|
"""
|
|
304
304
|
|
|
305
305
|
sections = []
|
|
@@ -495,16 +495,16 @@ Based on the detected patterns, microservice developers primarily focus on:
|
|
|
495
495
|
|
|
496
496
|
#### 5.1 Summary of Key Findings
|
|
497
497
|
|
|
498
|
-
This study analyzed {format_number(summary[
|
|
498
|
+
This study analyzed {format_number(summary["total_commits"])} commits from {format_number(summary["total_repos"])} microservice repositories and found:
|
|
499
499
|
|
|
500
|
-
1. **{format_percentage(summary[
|
|
501
|
-
2. **{format_number(summary[
|
|
500
|
+
1. **{format_percentage(summary["green_aware_percentage"])}** of commits explicitly address energy/sustainability concerns
|
|
501
|
+
2. **{format_number(summary["repos_with_green_commits"])}** repositories demonstrate some level of green awareness
|
|
502
502
|
3. Common green patterns include: {patterns_text}
|
|
503
503
|
|
|
504
504
|
#### 5.2 Answers to Research Questions
|
|
505
505
|
|
|
506
506
|
**RQ1: What percentage of microservice commits explicitly mention energy efficiency?**
|
|
507
|
-
Answer: {format_percentage(summary[
|
|
507
|
+
Answer: {format_percentage(summary["green_aware_percentage"])} of analyzed commits contain explicit mentions.
|
|
508
508
|
|
|
509
509
|
**RQ2: Which green software tactics are developers applying in practice?**
|
|
510
510
|
Answer: Developers primarily apply caching strategies, resource pooling, database optimization, and asynchronous processing patterns.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256=
|
|
1
|
+
greenmining/__init__.py,sha256=r5vl2wG-r-A7WKeiQ_V_2PghJYc8cAledUXlS2Le-Cg,4789
|
|
2
2
|
greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
|
|
3
3
|
greenmining/gsf_patterns.py,sha256=UvNJPY3HlAx1SicwUqci40TlLg8lCL0tszSOH4haxQs,55921
|
|
4
4
|
greenmining/utils.py,sha256=-dnLUw9taCzvQ2dk6uc66GAohOFiXJFKs9TLSEPk5kM,2893
|
|
@@ -25,10 +25,10 @@ greenmining/services/commit_extractor.py,sha256=qBM9QpGzPZRmGMFufJ6gP8eWIuufTowL
|
|
|
25
25
|
greenmining/services/data_aggregator.py,sha256=BU_HUb-8c0n0sa_7VZRB8jIVnaVhRLf-E6KA4ASh-08,19427
|
|
26
26
|
greenmining/services/data_analyzer.py,sha256=0XqW-slrnt7RotrHDweOqKtoN8XIA7y6p7s2Jau6cMg,7431
|
|
27
27
|
greenmining/services/github_graphql_fetcher.py,sha256=WhSbQGMdkb0D4uLcMKW6xZK77c5AkW-nZf718issap4,11527
|
|
28
|
-
greenmining/services/local_repo_analyzer.py,sha256=
|
|
29
|
-
greenmining/services/reports.py,sha256=
|
|
30
|
-
greenmining-1.2.
|
|
31
|
-
greenmining-1.2.
|
|
32
|
-
greenmining-1.2.
|
|
33
|
-
greenmining-1.2.
|
|
34
|
-
greenmining-1.2.
|
|
28
|
+
greenmining/services/local_repo_analyzer.py,sha256=wJWQx-UnUhrmtte0b3shtuNUzvS5-nQlpMKItYVrKNo,28642
|
|
29
|
+
greenmining/services/reports.py,sha256=QCJZhET3hRkH83htxLkbEP58dE3-7jIZh82Pp60hQcc,23218
|
|
30
|
+
greenmining-1.2.7.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
31
|
+
greenmining-1.2.7.dist-info/METADATA,sha256=QasuhsVc6c-uXlMdaKFkQMHOEcnmTkA1U49WGLdJsnY,10522
|
|
32
|
+
greenmining-1.2.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
33
|
+
greenmining-1.2.7.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
34
|
+
greenmining-1.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|