PyPI - greenmining - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

greenmining 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

greenmining/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
     is_green_aware,
 )
-__version__ = "0.1.12"
+__version__ = "1.0.3"
 def fetch_repositories(

greenmining/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information for greenmining."""
-__version__ = "1.0.1"
+__version__ = "1.0.2"

greenmining/services/commit_extractor.py CHANGED Viewed

@@ -12,6 +12,7 @@ from github import Github
 from tqdm import tqdm
 from greenmining.config import get_config
+from greenmining.models.repository import Repository
 from greenmining.utils import (
     colored_print,
     format_timestamp,
@@ -49,11 +50,11 @@ class CommitExtractor:
         self.github = Github(github_token) if github_token else None
         self.timeout = timeout
-    def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    def extract_from_repositories(self, repositories: list[dict[str, Any] | Repository]) -> list[dict[str, Any]]:
         """Extract commits from list of repositories.
         Args:
-            repositories: List of repository metadata
+            repositories: List of repository metadata (dicts or Repository objects)
         Returns:
             List of commit data dictionaries
@@ -89,15 +90,17 @@ class CommitExtractor:
                     pbar.update(1)
                 except TimeoutError:
                     signal.alarm(0)  # Cancel alarm
+                    repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
                     colored_print(
-                        f"\nTimeout processing {repo['full_name']} (>{self.timeout}s)", "yellow"
+                        f"\nTimeout processing {repo_name} (>{self.timeout}s)", "yellow"
                     )
-                    failed_repos.append(repo["full_name"])
+                    failed_repos.append(repo_name)
                     pbar.update(1)
                 except Exception as e:
                     signal.alarm(0)  # Cancel alarm
-                    colored_print(f"\nError processing {repo['full_name']}: {e}", "yellow")
-                    failed_repos.append(repo["full_name"])
+                    repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
+                    colored_print(f"\nError processing {repo_name}: {e}", "yellow")
+                    failed_repos.append(repo_name)
                     pbar.update(1)
         if failed_repos:
@@ -114,13 +117,14 @@ class CommitExtractor:
         """Extract commits from a single repository using GitHub API.
         Args:
-            repo: Repository metadata dictionary
+            repo: Repository metadata (dict or Repository object)
         Returns:
             List of commit dictionaries
         """
         commits = []
-        repo_name = repo["full_name"]
+        # Handle both Repository objects and dicts
+        repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
         try:
             # Get repository from GitHub API

greenmining/services/data_aggregator.py CHANGED Viewed

@@ -16,6 +16,7 @@ from greenmining.analyzers import (
     QualitativeAnalyzer,
 )
 from greenmining.config import get_config
+from greenmining.models.repository import Repository
 from greenmining.utils import (
     colored_print,
     format_number,
@@ -270,8 +271,13 @@ class DataAggregator:
         self, results: list[dict[str, Any]], repos: list[dict[str, Any]]
     ) -> list[dict[str, Any]]:
         """Generate per-language statistics."""
-        # Create repo name to language mapping
-        repo_language_map = {repo["full_name"]: repo.get("language", "Unknown") for repo in repos}
+        # Create repo name to language mapping (handle both Repository objects and dicts)
+        repo_language_map = {}
+        for repo in repos:
+            if isinstance(repo, Repository):
+                repo_language_map[repo.full_name] = repo.language or "Unknown"
+            else:
+                repo_language_map[repo["full_name"]] = repo.get("language", "Unknown")
         # Group commits by language
         language_commits = defaultdict(list)

{greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: greenmining
-Version: 1.0.1
+Version: 1.0.3
 Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
 Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
 License: MIT
@@ -397,6 +397,119 @@ controller.generate_report()
 print("Analysis complete! Check data/ directory for results.")
 ```
+#### Complete Working Example: Full Pipeline
+This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
+```python
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Import from greenmining package
+from greenmining import fetch_repositories
+from greenmining.services.commit_extractor import CommitExtractor
+from greenmining.services.data_analyzer import DataAnalyzer
+from greenmining.services.data_aggregator import DataAggregator
+# Configuration
+token = os.getenv("GITHUB_TOKEN")
+output_dir = Path("results")
+output_dir.mkdir(exist_ok=True)
+# STAGE 1: Fetch Repositories
+print("Fetching repositories...")
+repositories = fetch_repositories(
+    github_token=token,
+    max_repos=100,
+    min_stars=10,
+    keywords="software engineering",
+)
+print(f"✓ Fetched {len(repositories)} repositories")
+# STAGE 2: Extract Commits
+print("\nExtracting commits...")
+extractor = CommitExtractor(
+    github_token=token,
+    max_commits=1000,
+    skip_merges=True,
+    days_back=730,
+    timeout=120,
+)
+all_commits = extractor.extract_from_repositories(repositories)
+print(f"✓ Extracted {len(all_commits)} commits")
+# Save commits
+extractor.save_results(
+    all_commits,
+    output_dir / "commits.json",
+    len(repositories)
+)
+# STAGE 3: Analyze Commits
+print("\nAnalyzing commits...")
+analyzer = DataAnalyzer(
+    enable_nlp=True,
+    enable_ml_features=True,
+    enable_diff_analysis=False,  # Set to True for detailed code analysis (slower)
+)
+analyzed_commits = analyzer.analyze_commits(all_commits)
+# Count green-aware commits
+green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
+green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
+print(f"✓ Analyzed {len(analyzed_commits)} commits")
+print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
+# Save analysis
+analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
+# STAGE 4: Aggregate Results
+print("\nAggregating results...")
+aggregator = DataAggregator(
+    enable_enhanced_stats=True,
+    enable_temporal=True,
+    temporal_granularity="quarter",
+)
+results = aggregator.aggregate(analyzed_commits, repositories)
+# STAGE 5: Save Results
+print("\nSaving results...")
+aggregator.save_results(
+    results,
+    output_dir / "aggregated.json",
+    output_dir / "aggregated.csv",
+    analyzed_commits
+)
+# Print summary
+print("\n" + "="*80)
+print("ANALYSIS COMPLETE")
+print("="*80)
+aggregator.print_summary(results)
+print(f"\n📁 Results saved in: {output_dir.absolute()}")
+```
+**What this example does:**
+1. **Fetches repositories** from GitHub based on keywords and filters
+2. **Extracts commits** from each repository (up to 1000 per repo)
+3. **Analyzes commits** for green software patterns using NLP and ML
+4. **Aggregates results** with temporal analysis and enhanced statistics
+5. **Saves results** to JSON and CSV files for further analysis
+**Expected output files:**
+- `commits.json` - All extracted commits with metadata
+- `analyzed.json` - Commits analyzed for green patterns
+- `aggregated.json` - Summary statistics and pattern distributions
+- `aggregated.csv` - Tabular format for spreadsheet analysis
+- `metadata.json` - Experiment configuration and timing
+**Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
 ### Docker Usage
 ```bash

{greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-greenmining/__init__.py,sha256=c_Vaq_WW6-SkI_es4cQKXpdEtXdfVEnGjIDxACF6bzk,1764
+greenmining/__init__.py,sha256=p_pk0TmyP34o97wTYVTHkXe7qpGtH43GUVD_iCadrYY,1763
 greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
-greenmining/__version__.py,sha256=1jisBTMaMLSYH7jCobgcRNneQXzskU1YHLC5Za-5YsQ,66
+greenmining/__version__.py,sha256=3OgUZ5K2OXa9_-2kjlgye1N6G_QeQDeex2uw33Ja6Cs,66
 greenmining/cli.py,sha256=40eKDEZHNeDVb91xKBG70VfPk45mwb4YjuVCC2efVPA,17458
 greenmining/config.py,sha256=1_puT52zNS589hTxEZ3UCqRC_Qw5Jw2UupUPNbNz_hs,5195
 greenmining/gsf_patterns.py,sha256=Prsk_stnQrfOsk0x0zn-zdevbueAnPfGDM4XNA9PbdA,54664
@@ -23,14 +23,14 @@ greenmining/models/repository.py,sha256=k1X9UYZYLl0RznohOHx_Y5wur-ZBvLcNyc9vPVAr
 greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
 greenmining/presenters/console_presenter.py,sha256=ykJ9Hgors2dRTqQNaqCTxH4fd49F0AslQTgUOr_csI0,5347
 greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
-greenmining/services/commit_extractor.py,sha256=ldwfb6pNMPqaAXEYMIGYyo5yFx-tYcLlAiMpJdKc8Ek,12738
-greenmining/services/data_aggregator.py,sha256=WRYmVoscX0kMyI0CRnYFPhYwOBVI73o573LhibZTcPA,23770
+greenmining/services/commit_extractor.py,sha256=FSgoHpMvoqjZ6b1UQYtwfUaLVX_GDfiR0BVd51y-gYk,13126
+greenmining/services/data_aggregator.py,sha256=OqJvQZp9xaZaSmbwWoiHAHECAghd8agbhVmStDvebOU,24054
 greenmining/services/data_analyzer.py,sha256=HZDQLFZDCwCUGIzRjypyXC09Fl_-zaxhly74n3siwQc,16325
 greenmining/services/github_fetcher.py,sha256=J47-plM_NKXwHDSWNBuSUZMnZnGP6wXiJyrVfeWT9ug,11360
 greenmining/services/reports.py,sha256=NCNI9SCTnSLeAO8WmkNIdkB0hr-XyVpuzV0sovOoUOM,27107
-greenmining-1.0.1.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
-greenmining-1.0.1.dist-info/METADATA,sha256=BxsHBbcm6_287X-8Cq2Phi-twkwXN7E8OrDDdYemhUw,25694
-greenmining-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-greenmining-1.0.1.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
-greenmining-1.0.1.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
-greenmining-1.0.1.dist-info/RECORD,,
+greenmining-1.0.3.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
+greenmining-1.0.3.dist-info/METADATA,sha256=0Hj5qXVUkuJhIUBZBRhSysc3zx6L3py0HpZg9vKcl7Y,29260
+greenmining-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+greenmining-1.0.3.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
+greenmining-1.0.3.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
+greenmining-1.0.3.dist-info/RECORD,,

{greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{greenmining-1.0.1.dist-info → greenmining-1.0.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

greenmining 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

greenmining 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl