greenmining 1.2.5__py3-none-any.whl → 1.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -8,7 +8,7 @@ from greenmining.gsf_patterns import (
8
8
  is_green_aware,
9
9
  )
10
10
 
11
- __version__ = "1.2.5"
11
+ __version__ = "1.2.6"
12
12
 
13
13
 
14
14
  def fetch_repositories(
@@ -75,6 +75,8 @@ def analyze_repositories(
75
75
  cleanup_after: bool = True,
76
76
  skip_merges: bool = True,
77
77
  commit_order: str = "newest_first",
78
+ shallow_clone: bool = True,
79
+ clone_depth: int = None,
78
80
  ):
79
81
  # Analyze multiple repositories from URLs.
80
82
  # Args:
@@ -93,6 +95,8 @@ def analyze_repositories(
93
95
  # cleanup_after: Remove cloned repos after analysis (default True)
94
96
  # skip_merges: Skip merge commits (default True)
95
97
  # commit_order: "newest_first" (default) or "oldest_first"
98
+ # shallow_clone: Use shallow cloning to reduce download size (default True)
99
+ # clone_depth: Git clone depth (auto-calculated from max_commits if None)
96
100
  from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
97
101
 
98
102
  kwargs = {}
@@ -116,6 +120,8 @@ def analyze_repositories(
116
120
  cleanup_after=cleanup_after,
117
121
  skip_merges=skip_merges,
118
122
  commit_order=commit_order,
123
+ shallow_clone=shallow_clone,
124
+ clone_depth=clone_depth,
119
125
  **kwargs,
120
126
  )
121
127
 
@@ -205,6 +205,8 @@ class LocalRepoAnalyzer:
205
205
  since_date: datetime | None = None,
206
206
  to_date: datetime | None = None,
207
207
  commit_order: str = "newest_first",
208
+ shallow_clone: bool = True,
209
+ clone_depth: int | None = None,
208
210
  ):
209
211
  # Initialize the local repository analyzer.
210
212
  # Args:
@@ -222,6 +224,8 @@ class LocalRepoAnalyzer:
222
224
  # include_source_code: Include source code before/after in results
223
225
  # process_metrics: "standard" or "full" PyDriller process metrics
224
226
  # commit_order: "newest_first" (default) or "oldest_first"
227
+ # shallow_clone: Use shallow cloning to reduce download size (default True)
228
+ # clone_depth: Git clone depth (auto-calculated from max_commits if None)
225
229
  self.clone_path = clone_path or Path.cwd() / "greenmining_repos"
226
230
  self.clone_path.mkdir(parents=True, exist_ok=True)
227
231
  self.max_commits = max_commits
@@ -232,6 +236,9 @@ class LocalRepoAnalyzer:
232
236
  self.compute_process_metrics = compute_process_metrics
233
237
  self.cleanup_after = cleanup_after
234
238
  self.commit_order = commit_order
239
+ self.shallow_clone = shallow_clone
240
+ # Auto-calculate clone depth: max_commits * 3 to account for merges/skipped commits
241
+ self.clone_depth = clone_depth if clone_depth else max(50, max_commits * 3)
235
242
  self.gsf_patterns = GSF_PATTERNS
236
243
 
237
244
  # Phase 1.3: Private repository support
@@ -472,12 +479,39 @@ class LocalRepoAnalyzer:
472
479
  clone_parent.mkdir(parents=True, exist_ok=True)
473
480
  local_path = clone_parent / repo_name
474
481
 
475
- if local_path.exists():
476
- shutil.rmtree(local_path)
482
+ # Perform shallow clone manually before PyDriller (much faster!)
483
+ if not local_path.exists():
484
+ import subprocess
477
485
 
478
- repo_config["clone_repo_to"] = str(clone_parent)
486
+ clone_cmd = ["git", "clone"]
487
+ if self.shallow_clone:
488
+ clone_cmd.extend(["--depth", str(self.clone_depth)])
489
+ clone_cmd.extend([auth_url, str(local_path)])
479
490
 
480
- colored_print(f" Cloning to: {local_path}", "cyan")
491
+ colored_print(
492
+ f" Cloning to: {local_path} (depth={self.clone_depth if self.shallow_clone else 'full'})",
493
+ "cyan",
494
+ )
495
+
496
+ try:
497
+ subprocess.run(
498
+ clone_cmd,
499
+ capture_output=True,
500
+ text=True,
501
+ check=True,
502
+ timeout=180,
503
+ )
504
+ except subprocess.TimeoutExpired:
505
+ colored_print(" Clone timeout after 180s", "yellow")
506
+ raise
507
+ except subprocess.CalledProcessError as e:
508
+ colored_print(f" Clone failed: {e.stderr}", "red")
509
+ raise
510
+ else:
511
+ colored_print(f" Using existing clone: {local_path}", "cyan")
512
+
513
+ # PyDriller will analyze the already-cloned repo
514
+ repo_config["path_to_repo"] = str(local_path)
481
515
 
482
516
  # Phase 2.2: Start energy measurement if enabled (fresh meter per repo)
483
517
  energy_result = None
@@ -76,12 +76,12 @@ class ReportGenerator:
76
76
 
77
77
  return f"""### Executive Summary
78
78
 
79
- This report presents findings from analyzing **{format_number(summary['total_commits'])} commits** across **{format_number(summary['total_repos'])} microservice-based repositories** to identify green software engineering practices.
79
+ This report presents findings from analyzing **{format_number(summary["total_commits"])} commits** across **{format_number(summary["total_repos"])} microservice-based repositories** to identify green software engineering practices.
80
80
 
81
81
  **Key Findings:**
82
82
 
83
- - **{format_percentage(summary['green_aware_percentage'])}** of commits ({format_number(summary['green_aware_count'])}) explicitly mention energy efficiency, performance optimization, or sustainability concerns
84
- - **{format_number(summary['repos_with_green_commits'])}** out of {format_number(summary['total_repos'])} repositories contain at least one green-aware commit
83
+ - **{format_percentage(summary["green_aware_percentage"])}** of commits ({format_number(summary["green_aware_count"])}) explicitly mention energy efficiency, performance optimization, or sustainability concerns
84
+ - **{format_number(summary["repos_with_green_commits"])}** out of {format_number(summary["total_repos"])} repositories contain at least one green-aware commit
85
85
  - {pattern_text if pattern_text else "Various green software patterns were detected across the analyzed commits."}
86
86
 
87
87
  **Implications:**
@@ -106,15 +106,15 @@ Repositories were selected from GitHub based on the following criteria:
106
106
 
107
107
  - **Keywords:** {search_keywords}
108
108
  - **Programming Languages:** {languages}
109
- - **Minimum Stars:** {metadata.get('min_stars', 100)} (to ensure established projects)
109
+ - **Minimum Stars:** {metadata.get("min_stars", 100)} (to ensure established projects)
110
110
  - **Sort Order:** Stars (descending)
111
- - **Total Repositories:** {metadata.get('total_repos', 0)}
111
+ - **Total Repositories:** {metadata.get("total_repos", 0)}
112
112
 
113
113
  #### 1.2 Data Extraction Approach
114
114
 
115
115
  Commit data was extracted using PyDriller library:
116
116
 
117
- - **Commits Analyzed:** {analysis_metadata.get('total_commits_analyzed', 0)}
117
+ - **Commits Analyzed:** {analysis_metadata.get("total_commits_analyzed", 0)}
118
118
  - **Time Window:** Last 2 years (730 days)
119
119
  - **Merge Commits:** Excluded
120
120
  - **Minimum Commit Message Length:** 10 characters
@@ -192,8 +192,8 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
192
192
 
193
193
  return f"""#### 2.1 Green Awareness in Commits
194
194
 
195
- **Total commits analyzed:** {format_number(summary['total_commits'])}
196
- **Commits with explicit green mention:** {format_number(summary['green_aware_count'])} ({format_percentage(summary['green_aware_percentage'])})
195
+ **Total commits analyzed:** {format_number(summary["total_commits"])}
196
+ **Commits with explicit green mention:** {format_number(summary["green_aware_count"])} ({format_percentage(summary["green_aware_percentage"])})
197
197
 
198
198
  **Table: Top 10 Repositories with Highest Green Awareness**
199
199
 
@@ -224,10 +224,10 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
224
224
  # Pattern descriptions
225
225
  pattern_details = []
226
226
  for i, pattern in enumerate(patterns[:10], 1):
227
- pattern_details.append(f"""**{i}. {pattern['pattern_name']}**
228
- - Frequency: {format_number(pattern['count'])} commits ({format_percentage(pattern['percentage'])})
229
- - Confidence Distribution: HIGH={conf['HIGH']}, MEDIUM={conf['MEDIUM']}, LOW={conf['LOW']}
230
- - Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
227
+ pattern_details.append(f"""**{i}. {pattern["pattern_name"]}**
228
+ - Frequency: {format_number(pattern["count"])} commits ({format_percentage(pattern["percentage"])})
229
+ - Confidence Distribution: HIGH={conf["HIGH"]}, MEDIUM={conf["MEDIUM"]}, LOW={conf["LOW"]}
230
+ - Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
231
231
 
232
232
  return f"""#### 2.2 Known Green Patterns & Tactics Applied
233
233
 
@@ -252,10 +252,10 @@ No novel microservice-specific green practices were automatically detected. Manu
252
252
 
253
253
  pattern_list = []
254
254
  for pattern in emergent:
255
- pattern_list.append(f"""**Pattern:** {pattern['pattern_name']}
256
- - Occurrences: {pattern['count']}
257
- - Description: {pattern['description']}
258
- - Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
255
+ pattern_list.append(f"""**Pattern:** {pattern["pattern_name"]}
256
+ - Occurrences: {pattern["count"]}
257
+ - Description: {pattern["description"]}
258
+ - Example Commits: {", ".join([c[:8] for c in pattern["example_commits"][:3]])}""")
259
259
 
260
260
  return f"""#### 2.3 Emerging Practices Discovered
261
261
 
@@ -299,7 +299,7 @@ No novel microservice-specific green practices were automatically detected. Manu
299
299
  if "error" in stats:
300
300
  return f"""#### 2.5 Statistical Analysis
301
301
 
302
- **Note:** Statistical analysis encountered an error: {stats['error']}
302
+ **Note:** Statistical analysis encountered an error: {stats["error"]}
303
303
  """
304
304
 
305
305
  sections = []
@@ -495,16 +495,16 @@ Based on the detected patterns, microservice developers primarily focus on:
495
495
 
496
496
  #### 5.1 Summary of Key Findings
497
497
 
498
- This study analyzed {format_number(summary['total_commits'])} commits from {format_number(summary['total_repos'])} microservice repositories and found:
498
+ This study analyzed {format_number(summary["total_commits"])} commits from {format_number(summary["total_repos"])} microservice repositories and found:
499
499
 
500
- 1. **{format_percentage(summary['green_aware_percentage'])}** of commits explicitly address energy/sustainability concerns
501
- 2. **{format_number(summary['repos_with_green_commits'])}** repositories demonstrate some level of green awareness
500
+ 1. **{format_percentage(summary["green_aware_percentage"])}** of commits explicitly address energy/sustainability concerns
501
+ 2. **{format_number(summary["repos_with_green_commits"])}** repositories demonstrate some level of green awareness
502
502
  3. Common green patterns include: {patterns_text}
503
503
 
504
504
  #### 5.2 Answers to Research Questions
505
505
 
506
506
  **RQ1: What percentage of microservice commits explicitly mention energy efficiency?**
507
- Answer: {format_percentage(summary['green_aware_percentage'])} of analyzed commits contain explicit mentions.
507
+ Answer: {format_percentage(summary["green_aware_percentage"])} of analyzed commits contain explicit mentions.
508
508
 
509
509
  **RQ2: Which green software tactics are developers applying in practice?**
510
510
  Answer: Developers primarily apply caching strategies, resource pooling, database optimization, and asynchronous processing patterns.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.2.5
3
+ Version: 1.2.6
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- greenmining/__init__.py,sha256=jKktkjxKacTGmUoXMwfuW7DcyUKepUkTCNLjTmU2Hvc,4496
1
+ greenmining/__init__.py,sha256=FvURFMzA2M-JtfP92RiAaCOVvJjN2qNUuEAQr0SPD4o,4789
2
2
  greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
3
3
  greenmining/gsf_patterns.py,sha256=UvNJPY3HlAx1SicwUqci40TlLg8lCL0tszSOH4haxQs,55921
4
4
  greenmining/utils.py,sha256=-dnLUw9taCzvQ2dk6uc66GAohOFiXJFKs9TLSEPk5kM,2893
@@ -25,10 +25,10 @@ greenmining/services/commit_extractor.py,sha256=qBM9QpGzPZRmGMFufJ6gP8eWIuufTowL
25
25
  greenmining/services/data_aggregator.py,sha256=BU_HUb-8c0n0sa_7VZRB8jIVnaVhRLf-E6KA4ASh-08,19427
26
26
  greenmining/services/data_analyzer.py,sha256=0XqW-slrnt7RotrHDweOqKtoN8XIA7y6p7s2Jau6cMg,7431
27
27
  greenmining/services/github_graphql_fetcher.py,sha256=WhSbQGMdkb0D4uLcMKW6xZK77c5AkW-nZf718issap4,11527
28
- greenmining/services/local_repo_analyzer.py,sha256=Cq6NixciZmqDuWpU5976TuhGiaGFNnvvz4Rs11bq2Ug,25891
29
- greenmining/services/reports.py,sha256=HQo52mdhwXGZgRV1BWaIA4WSs6N3Q2_Wgdsbb2RSQZU,23218
30
- greenmining-1.2.5.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
31
- greenmining-1.2.5.dist-info/METADATA,sha256=WVoukYReWst87-OVgNEsi3vXaI51Dsh4h54jq3gieYI,10522
32
- greenmining-1.2.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
33
- greenmining-1.2.5.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
34
- greenmining-1.2.5.dist-info/RECORD,,
28
+ greenmining/services/local_repo_analyzer.py,sha256=Ju3UA9LQc2LpIatvpDiKg9aLhQp5HsldsrdWJDU5Rwo,27406
29
+ greenmining/services/reports.py,sha256=QCJZhET3hRkH83htxLkbEP58dE3-7jIZh82Pp60hQcc,23218
30
+ greenmining-1.2.6.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
31
+ greenmining-1.2.6.dist-info/METADATA,sha256=M7yBMY3OIVcZnq3JxIvRe6ncQnnK78_U90ezvN1WyVA,10522
32
+ greenmining-1.2.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
33
+ greenmining-1.2.6.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
34
+ greenmining-1.2.6.dist-info/RECORD,,