PyPI - greenmining - Versions diffs - 1.0.4__tar.gz → 1.0.5__tar.gz - Mend

greenmining 1.0.4tar.gz → 1.0.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{greenmining-1.0.4/greenmining.egg-info → greenmining-1.0.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: greenmining
-Version: 1.0.4
+Version: 1.0.5
 Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
 Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
 License: MIT
@@ -43,7 +43,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
 Requires-Dist: black>=23.12.0; extra == "dev"
 Requires-Dist: ruff>=0.1.9; extra == "dev"
 Requires-Dist: mypy>=1.8.0; extra == "dev"
-Requires-Dist: build>=1.0.4; extra == "dev"
+Requires-Dist: build>=1.0.5; extra == "dev"
 Requires-Dist: twine>=4.0.2; extra == "dev"
 Provides-Extra: docs
 Requires-Dist: sphinx>=7.2.0; extra == "docs"
@@ -61,7 +61,7 @@ Green mining for microservices repositories.
 ## Overview
-`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
+`greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
 ## Installation
@@ -105,7 +105,7 @@ if is_green_aware(commit_msg):
     # Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
 ```
-#### Fetch Repositories with Custom Keywords (NEW)
+#### Fetch Repositories with Custom Keywords
 ```python
 from greenmining import fetch_repositories
@@ -144,8 +144,6 @@ for repo in repos[:5]:
 ```python
 from greenmining.services.commit_extractor import CommitExtractor
 from greenmining.services.data_analyzer import DataAnalyzer
-from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
-from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
 from greenmining import fetch_repositories
 # Fetch repositories with custom keywords
@@ -195,18 +193,6 @@ for commit in commits:
         results.append(result)
         print(f"Green commit found: {commit.message[:50]}...")
         print(f"  Patterns: {result['known_pattern']}")
-        # Access NLP analysis results (NEW)
-        if 'nlp_analysis' in result:
-            nlp = result['nlp_analysis']
-            print(f"  NLP: {nlp['morphological_count']} morphological matches, "
-                  f"{nlp['semantic_count']} semantic matches")
-        # Access ML features (NEW)
-        if 'ml_features' in result:
-            ml = result['ml_features']['text']
-            print(f"  ML Features: {ml['word_count']} words, "
-                  f"keyword density: {ml['keyword_density']:.2f}")
 ```
 #### Access Sustainability Patterns Data
@@ -242,7 +228,7 @@ print(f"Available categories: {sorted(categories)}")
 #          'monitoring', 'network', 'networking', 'resource', 'web']
 ```
-#### Advanced Analysis: Temporal Trends (NEW)
+#### Advanced Analysis: Temporal Trends
 ```python
 from greenmining.services.data_aggregator import DataAggregator
@@ -374,7 +360,7 @@ repositories = fetch_repositories(
     min_stars=10,
     keywords="software engineering",
 )
-print(f"✓ Fetched {len(repositories)} repositories")
+print(f"Fetched {len(repositories)} repositories")
 # STAGE 2: Extract Commits
 print("\nExtracting commits...")
@@ -386,7 +372,7 @@ extractor = CommitExtractor(
     timeout=120,
 )
 all_commits = extractor.extract_from_repositories(repositories)
-print(f"✓ Extracted {len(all_commits)} commits")
+print(f"Extracted {len(all_commits)} commits")
 # Save commits
 extractor.save_results(
@@ -405,8 +391,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
 # Count green-aware commits
 green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
 green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
-print(f"✓ Analyzed {len(analyzed_commits)} commits")
-print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
+print(f"Analyzed {len(analyzed_commits)} commits")
+print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
 # Save analysis
 analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
@@ -434,7 +420,7 @@ print("\n" + "="*80)
 print("ANALYSIS COMPLETE")
 print("="*80)
 aggregator.print_summary(results)
-print(f"\n📁 Results saved in: {output_dir.absolute()}")
+print(f"\nResults saved in: {output_dir.absolute()}")
 ```
 **What this example does:**

{greenmining-1.0.4 → greenmining-1.0.5}/README.md RENAMED Viewed

@@ -8,7 +8,7 @@ Green mining for microservices repositories.
 ## Overview
-`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
+`greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
 ## Installation
@@ -52,7 +52,7 @@ if is_green_aware(commit_msg):
     # Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
 ```
-#### Fetch Repositories with Custom Keywords (NEW)
+#### Fetch Repositories with Custom Keywords
 ```python
 from greenmining import fetch_repositories
@@ -91,8 +91,6 @@ for repo in repos[:5]:
 ```python
 from greenmining.services.commit_extractor import CommitExtractor
 from greenmining.services.data_analyzer import DataAnalyzer
-from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
-from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
 from greenmining import fetch_repositories
 # Fetch repositories with custom keywords
@@ -142,18 +140,6 @@ for commit in commits:
         results.append(result)
         print(f"Green commit found: {commit.message[:50]}...")
         print(f"  Patterns: {result['known_pattern']}")
-        # Access NLP analysis results (NEW)
-        if 'nlp_analysis' in result:
-            nlp = result['nlp_analysis']
-            print(f"  NLP: {nlp['morphological_count']} morphological matches, "
-                  f"{nlp['semantic_count']} semantic matches")
-        # Access ML features (NEW)
-        if 'ml_features' in result:
-            ml = result['ml_features']['text']
-            print(f"  ML Features: {ml['word_count']} words, "
-                  f"keyword density: {ml['keyword_density']:.2f}")
 ```
 #### Access Sustainability Patterns Data
@@ -189,7 +175,7 @@ print(f"Available categories: {sorted(categories)}")
 #          'monitoring', 'network', 'networking', 'resource', 'web']
 ```
-#### Advanced Analysis: Temporal Trends (NEW)
+#### Advanced Analysis: Temporal Trends
 ```python
 from greenmining.services.data_aggregator import DataAggregator
@@ -321,7 +307,7 @@ repositories = fetch_repositories(
     min_stars=10,
     keywords="software engineering",
 )
-print(f"✓ Fetched {len(repositories)} repositories")
+print(f"Fetched {len(repositories)} repositories")
 # STAGE 2: Extract Commits
 print("\nExtracting commits...")
@@ -333,7 +319,7 @@ extractor = CommitExtractor(
     timeout=120,
 )
 all_commits = extractor.extract_from_repositories(repositories)
-print(f"✓ Extracted {len(all_commits)} commits")
+print(f"Extracted {len(all_commits)} commits")
 # Save commits
 extractor.save_results(
@@ -352,8 +338,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
 # Count green-aware commits
 green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
 green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
-print(f"✓ Analyzed {len(analyzed_commits)} commits")
-print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
+print(f"Analyzed {len(analyzed_commits)} commits")
+print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
 # Save analysis
 analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
@@ -381,7 +367,7 @@ print("\n" + "="*80)
 print("ANALYSIS COMPLETE")
 print("="*80)
 aggregator.print_summary(results)
-print(f"\n📁 Results saved in: {output_dir.absolute()}")
+print(f"\nResults saved in: {output_dir.absolute()}")
 ```
 **What this example does:**

{greenmining-1.0.4 → greenmining-1.0.5}/greenmining/__init__.py RENAMED Viewed

@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
     is_green_aware,
 )
-__version__ = "1.0.4"
+__version__ = "1.0.5"
 def fetch_repositories(

{greenmining-1.0.4 → greenmining-1.0.5}/greenmining/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # Version information for greenmining.
-__version__ = "1.0.4"
+__version__ = "1.0.5"

{greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/github_fetcher.py RENAMED Viewed

@@ -1,21 +1,19 @@
-"""
-================================================================================
-DEADCODE - OLD REST API IMPLEMENTATION
-================================================================================
-This file contains the OLD GitHub REST API implementation.
-It has been REPLACED by GitHubGraphQLFetcher for better performance.
-Performance comparison:
-  REST API:    10+ requests for 100 repos, ~2 minutes
-  GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
-USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
-This file is kept for reference only. Do not use in production.
-================================================================================
-"""
+# ================================================================================
+# DEADCODE - OLD REST API IMPLEMENTATION
+# ================================================================================
+#
+# This file contains the OLD GitHub REST API implementation.
+# It has been REPLACED by GitHubGraphQLFetcher for better performance.
+#
+# Performance comparison:
+#   REST API:    10+ requests for 100 repos, ~2 minutes
+#   GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
+#
+# USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
+#
+# This file is kept for reference only. Do not use in production.
+#
+# ================================================================================
 # GitHub repository fetcher for green microservices mining.

{greenmining-1.0.4 → greenmining-1.0.5}/greenmining/services/github_graphql_fetcher.py RENAMED Viewed

@@ -1,9 +1,7 @@
-"""
-GitHub GraphQL API fetcher for faster and more efficient repository fetching.
-GraphQL allows fetching exactly the data you need in a single request,
-reducing API calls and improving rate limit efficiency.
-"""
+# GitHub GraphQL API fetcher for faster and more efficient repository fetching.
+#
+# GraphQL allows fetching exactly the data you need in a single request,
+# reducing API calls and improving rate limit efficiency.
 import json
 import time
@@ -15,25 +13,21 @@ from greenmining.models.repository import Repository
 class GitHubGraphQLFetcher:
-    """
-    Fetch GitHub repositories using GraphQL API v4.
-    Benefits over REST API:
-    - Fetch repos + commits in 1 request instead of 100+ REST calls
-    - Get exactly the fields you need (no over-fetching)
-    - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
-    - More powerful search capabilities
-    """
+    # Fetch GitHub repositories using GraphQL API v4.
+    #
+    # Benefits over REST API:
+    # - Fetch repos + commits in 1 request instead of 100+ REST calls
+    # - Get exactly the fields you need (no over-fetching)
+    # - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
+    # - More powerful search capabilities
     GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
     def __init__(self, token: str):
-        """
-        Initialize GraphQL fetcher.
-        Args:
-            token: GitHub personal access token
-        """
+        # Initialize GraphQL fetcher.
+        #
+        # Args:
+        #     token: GitHub personal access token
         self.token = token
         self.headers = {
             "Authorization": f"Bearer {token}",
@@ -51,22 +45,20 @@ class GitHubGraphQLFetcher:
         pushed_after: Optional[str] = None,
         pushed_before: Optional[str] = None,
     ) -> List[Repository]:
-        """
-        Search GitHub repositories using GraphQL.
-        Args:
-            keywords: Search keywords
-            max_repos: Maximum number of repositories to fetch
-            min_stars: Minimum star count
-            languages: Programming languages to filter
-            created_after: Created after date (YYYY-MM-DD)
-            created_before: Created before date (YYYY-MM-DD)
-            pushed_after: Pushed after date (YYYY-MM-DD)
-            pushed_before: Pushed before date (YYYY-MM-DD)
-        Returns:
-            List of Repository objects
-        """
+        # Search GitHub repositories using GraphQL.
+        #
+        # Args:
+        #     keywords: Search keywords
+        #     max_repos: Maximum number of repositories to fetch
+        #     min_stars: Minimum star count
+        #     languages: Programming languages to filter
+        #     created_after: Created after date (YYYY-MM-DD)
+        #     created_before: Created before date (YYYY-MM-DD)
+        #     pushed_after: Pushed after date (YYYY-MM-DD)
+        #     pushed_before: Pushed before date (YYYY-MM-DD)
+        #
+        # Returns:
+        #     List of Repository objects
         # Build search query
         search_query = self._build_search_query(
             keywords,
@@ -195,7 +187,7 @@ class GitHubGraphQLFetcher:
         pushed_after: Optional[str],
         pushed_before: Optional[str],
     ) -> str:
-        """Build GitHub search query string."""
+        # Build GitHub search query string.
         query_parts = [keywords]
         # Star count
@@ -219,7 +211,7 @@ class GitHubGraphQLFetcher:
         return " ".join(query_parts)
     def _execute_query(self, query: str, variables: Dict[str, Any]) -> Dict[str, Any]:
-        """Execute GraphQL query."""
+        # Execute GraphQL query.
         payload = {"query": query, "variables": variables}
         response = requests.post(
@@ -230,7 +222,7 @@ class GitHubGraphQLFetcher:
         return response.json()
     def _parse_repository(self, node: Dict[str, Any]) -> Repository:
-        """Parse GraphQL repository node to Repository object."""
+        # Parse GraphQL repository node to Repository object.
         # Extract languages
         languages = []
         if node.get("languages") and node["languages"].get("nodes"):
@@ -265,20 +257,18 @@ class GitHubGraphQLFetcher:
     def get_repository_commits(
         self, owner: str, name: str, max_commits: int = 100
     ) -> List[Dict[str, Any]]:
-        """
-        Fetch commits for a specific repository using GraphQL.
-        This is much faster than REST API as it gets all commits in 1-2 requests
-        instead of paginating through 100 individual REST calls.
-        Args:
-            owner: Repository owner
-            name: Repository name
-            max_commits: Maximum commits to fetch
-        Returns:
-            List of commit dictionaries
-        """
+        # Fetch commits for a specific repository using GraphQL.
+        #
+        # This is much faster than REST API as it gets all commits in 1-2 requests
+        # instead of paginating through 100 individual REST calls.
+        #
+        # Args:
+        #     owner: Repository owner
+        #     name: Repository name
+        #     max_commits: Maximum commits to fetch
+        #
+        # Returns:
+        #     List of commit dictionaries
         query = """
         query($owner: String!, $name: String!, $first: Int!) {
           repository(owner: $owner, name: $name) {
@@ -359,7 +349,7 @@ class GitHubGraphQLFetcher:
         return commits
     def save_results(self, repositories: List[Repository], output_file: str):
-        """Save repositories to JSON file."""
+        # Save repositories to JSON file.
         data = {
             "total_repositories": len(repositories),
             "repositories": [repo.to_dict() for repo in repositories],

{greenmining-1.0.4 → greenmining-1.0.5/greenmining.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: greenmining
-Version: 1.0.4
+Version: 1.0.5
 Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
 Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
 License: MIT
@@ -43,7 +43,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
 Requires-Dist: black>=23.12.0; extra == "dev"
 Requires-Dist: ruff>=0.1.9; extra == "dev"
 Requires-Dist: mypy>=1.8.0; extra == "dev"
-Requires-Dist: build>=1.0.4; extra == "dev"
+Requires-Dist: build>=1.0.5; extra == "dev"
 Requires-Dist: twine>=4.0.2; extra == "dev"
 Provides-Extra: docs
 Requires-Dist: sphinx>=7.2.0; extra == "docs"
@@ -61,7 +61,7 @@ Green mining for microservices repositories.
 ## Overview
-`greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
+`greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
 ## Installation
@@ -105,7 +105,7 @@ if is_green_aware(commit_msg):
     # Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
 ```
-#### Fetch Repositories with Custom Keywords (NEW)
+#### Fetch Repositories with Custom Keywords
 ```python
 from greenmining import fetch_repositories
@@ -144,8 +144,6 @@ for repo in repos[:5]:
 ```python
 from greenmining.services.commit_extractor import CommitExtractor
 from greenmining.services.data_analyzer import DataAnalyzer
-from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
-from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
 from greenmining import fetch_repositories
 # Fetch repositories with custom keywords
@@ -195,18 +193,6 @@ for commit in commits:
         results.append(result)
         print(f"Green commit found: {commit.message[:50]}...")
         print(f"  Patterns: {result['known_pattern']}")
-        # Access NLP analysis results (NEW)
-        if 'nlp_analysis' in result:
-            nlp = result['nlp_analysis']
-            print(f"  NLP: {nlp['morphological_count']} morphological matches, "
-                  f"{nlp['semantic_count']} semantic matches")
-        # Access ML features (NEW)
-        if 'ml_features' in result:
-            ml = result['ml_features']['text']
-            print(f"  ML Features: {ml['word_count']} words, "
-                  f"keyword density: {ml['keyword_density']:.2f}")
 ```
 #### Access Sustainability Patterns Data
@@ -242,7 +228,7 @@ print(f"Available categories: {sorted(categories)}")
 #          'monitoring', 'network', 'networking', 'resource', 'web']
 ```
-#### Advanced Analysis: Temporal Trends (NEW)
+#### Advanced Analysis: Temporal Trends
 ```python
 from greenmining.services.data_aggregator import DataAggregator
@@ -374,7 +360,7 @@ repositories = fetch_repositories(
     min_stars=10,
     keywords="software engineering",
 )
-print(f"✓ Fetched {len(repositories)} repositories")
+print(f"Fetched {len(repositories)} repositories")
 # STAGE 2: Extract Commits
 print("\nExtracting commits...")
@@ -386,7 +372,7 @@ extractor = CommitExtractor(
     timeout=120,
 )
 all_commits = extractor.extract_from_repositories(repositories)
-print(f"✓ Extracted {len(all_commits)} commits")
+print(f"Extracted {len(all_commits)} commits")
 # Save commits
 extractor.save_results(
@@ -405,8 +391,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
 # Count green-aware commits
 green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
 green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
-print(f"✓ Analyzed {len(analyzed_commits)} commits")
-print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
+print(f"Analyzed {len(analyzed_commits)} commits")
+print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
 # Save analysis
 analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
@@ -434,7 +420,7 @@ print("\n" + "="*80)
 print("ANALYSIS COMPLETE")
 print("="*80)
 aggregator.print_summary(results)
-print(f"\n📁 Results saved in: {output_dir.absolute()}")
+print(f"\nResults saved in: {output_dir.absolute()}")
 ```
 **What this example does:**

{greenmining-1.0.4 → greenmining-1.0.5}/greenmining.egg-info/requires.txt RENAMED Viewed

@@ -16,7 +16,7 @@ pytest-mock>=3.12.0
 black>=23.12.0
 ruff>=0.1.9
 mypy>=1.8.0
-build>=1.0.4
+build>=1.0.5
 twine>=4.0.2
 [docs]

{greenmining-1.0.4 → greenmining-1.0.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "greenmining"
-version = "1.0.4"
+version = "1.0.5"
 description = "Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -60,7 +60,7 @@ dev = [
     "black>=23.12.0",
     "ruff>=0.1.9",
     "mypy>=1.8.0",
-    "build>=1.0.4",
+    "build>=1.0.5",
     "twine>=4.0.2"
 ]

{greenmining-1.0.4 → greenmining-1.0.5}/setup.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Setup script for backward compatibility with older pip versions."""
+# Setup script for backward compatibility with older pip versions.
 from setuptools import setup