PyPI - rust-crate-pipeline - Versions diffs - 1.2.6__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

rust-crate-pipeline 1.2.6py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

rust_crate_pipeline/__init__.py +25 -25
rust_crate_pipeline/__main__.py +1 -0
rust_crate_pipeline/ai_processing.py +309 -200
rust_crate_pipeline/analysis.py +304 -368
rust_crate_pipeline/azure_ai_processing.py +453 -0
rust_crate_pipeline/config.py +57 -19
rust_crate_pipeline/core/__init__.py +19 -0
rust_crate_pipeline/core/canon_registry.py +133 -0
rust_crate_pipeline/core/irl_engine.py +256 -0
rust_crate_pipeline/core/sacred_chain.py +117 -0
rust_crate_pipeline/crate_analysis.py +54 -0
rust_crate_pipeline/crate_list.txt +424 -0
rust_crate_pipeline/github_token_checker.py +42 -36
rust_crate_pipeline/main.py +386 -102
rust_crate_pipeline/network.py +153 -133
rust_crate_pipeline/pipeline.py +340 -264
rust_crate_pipeline/production_config.py +35 -32
rust_crate_pipeline/scraping/__init__.py +13 -0
rust_crate_pipeline/scraping/unified_scraper.py +259 -0
rust_crate_pipeline/unified_llm_processor.py +637 -0
rust_crate_pipeline/unified_pipeline.py +548 -0
rust_crate_pipeline/utils/file_utils.py +45 -14
rust_crate_pipeline/utils/logging_utils.py +34 -17
rust_crate_pipeline/version.py +47 -2
rust_crate_pipeline-1.3.0.dist-info/METADATA +331 -0
rust_crate_pipeline-1.3.0.dist-info/RECORD +30 -0
rust_crate_pipeline-1.2.6.dist-info/METADATA +0 -573
rust_crate_pipeline-1.2.6.dist-info/RECORD +0 -19
{rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/WHEEL +0 -0
{rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/entry_points.txt +0 -0
{rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/licenses/LICENSE +0 -0
{rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/top_level.txt +0 -0

rust_crate_pipeline/analysis.py CHANGED Viewed

@@ -1,436 +1,372 @@
 # analysis.py
-import os
-import re
 import io
-import json
-import time
+import re
 import tarfile
+import requests
+import logging
 import tempfile
+from typing import Any
+import os
+import sys
+import time
 import subprocess
-import requests
-from datetime import datetime
-from dateutil.relativedelta import relativedelta
-from bs4 import BeautifulSoup
-from typing import Dict, Optional, List
 from .config import EnrichedCrate
+# Add the project root to the path to ensure utils can be imported
+# This is a common pattern in scripts to handle execution from different directories
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+try:
+    from utils.rust_code_analyzer import RustCodeAnalyzer  # type: ignore
+except ImportError as e:
+    logging.error(
+        f"Failed to import RustCodeAnalyzer: {e}. "
+        f"Ensure the utils directory is in the Python path."
+    )
+    # Provide a non-functional fallback to avoid crashing the entire application
+    # if the import fails, but ensure it logs the error.
+    class RustCodeAnalyzer:  # type: ignore
+        def __init__(self, code_content: str) -> None:
+            logging.error(
+                "Using fallback RustCodeAnalyzer. Analysis will be incomplete."
+            )
+            self.code_content = code_content
+        def analyze(self) -> dict[str, Any]:
+            return {
+                "functions": [],
+                "structs": [],
+                "enums": [],
+                "traits": [],
+                "complexity": 0,
+                "lines_of_code": len(self.code_content.split("\n")),
+            }
+        @staticmethod
+        def create_empty_metrics() -> dict[str, Any]:
+            return {}
+        @staticmethod
+        def detect_project_structure(files: list[str]) -> dict[str, bool]:
+            return {}
+        @staticmethod
+        def analyze_rust_content(content: str) -> dict[str, Any]:
+            return {}
+        @staticmethod
+        def aggregate_metrics(
+            metrics: dict[str, Any],
+            content_analysis: dict[str, Any],
+            structure: dict[str, bool],
+        ) -> dict[str, Any]:
+            return metrics
+# Constants for URLs and paths
+CRATES_IO_API_URL = "https://crates.io/api/v1/crates"
+GITHUB_API_URL = "https://api.github.com/repos"
+LIB_RS_URL = "https://lib.rs/crates"
 class SourceAnalyzer:
     @staticmethod
-    def analyze_crate_source(crate: EnrichedCrate) -> Dict:
-        """Orchestrate source analysis from multiple sources"""
-        crate_name = crate.name
-        version = crate.version
+    def analyze_crate_source(crate: EnrichedCrate) -> dict[str, Any]:
+        """Orchestrate source analysis from multiple sources."""
         repo_url = crate.repository
         # Method 1: Try to download from crates.io
         try:
-            url = f"https://crates.io/api/v1/crates/{crate_name}/{version}/download"
-            response = requests.get(url, stream=True)
-            if response.ok:
-                # We got the tarball, analyze it
-                return SourceAnalyzer.analyze_crate_tarball(response.content)
-        except Exception as e:
-            print(f"Failed to download from crates.io: {str(e)}")
+            url = f"{CRATES_IO_API_URL}/{crate.name}/{crate.version}/download"
+            response = requests.get(url, stream=True, timeout=30)
+            response.raise_for_status()
+            logging.info(f"Successfully downloaded {crate.name} from crates.io")
+            return SourceAnalyzer.analyze_crate_tarball(response.content)
+        except requests.RequestException as e:
+            logging.warning(f"Failed to download from crates.io: {e}")
         # Method 2: Try GitHub if we have a GitHub URL
-        if "github.com" in repo_url:
+        if repo_url and "github.com" in repo_url:
+            match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
+            if match:
+                owner, repo_name = match.groups()
+                repo_name = repo_name.replace(".git", "")
+                try:
+                    github_url = f"{GITHUB_API_URL}/{owner}/{repo_name}/tarball"
+                    response = requests.get(github_url, timeout=30)
+                    response.raise_for_status()
+                    logging.info(f"Successfully downloaded {crate.name} from GitHub")
+                    return SourceAnalyzer.analyze_github_tarball(response.content)
+                except requests.RequestException as e:
+                    logging.warning(f"Failed to analyze from GitHub: {e}")
+        # Method 3: Fallback to cloning from the repository directly
+        if repo_url:
             try:
-                # Extract owner/repo from URL
-                match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
-                if match:
-                    owner, repo_name = match.groups()
-                    repo_name = repo_name.split('.')[0]  # Remove .git extension
-                    # Try to download tarball from GitHub
-                    github_url = f"https://api.github.com/repos/{owner}/{repo_name}/tarball"
-                    response = requests.get(github_url)
-                    if response.ok:
-                        return SourceAnalyzer.analyze_github_tarball(response.content)
+                logging.info(f"Attempting to clone repository for {crate.name}")
+                return SourceAnalyzer.analyze_crate_source_from_repo(repo_url)
             except Exception as e:
-                print(f"Failed to analyze from GitHub: {str(e)}")
-        # Method 3: Try lib.rs
-        try:
-            # lib.rs doesn't have a direct download API, but redirects to crates.io or GitHub
-            url = f"https://lib.rs/crates/{crate_name}"
-            response = requests.get(url)
-            if response.ok:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                # Look for repository links
-                repo_links = soup.select('a[href*="github.com"]')
-                if repo_links:
-                    repo_url = repo_links[0]['href']
-                    # We found a GitHub link, now analyze it
-                    return SourceAnalyzer.analyze_crate_source_from_repo(crate_name, version, repo_url)
-        except Exception as e:
-            print(f"Failed to analyze from lib.rs: {str(e)}")
-        # If we get here, we failed to analyze from any source
+                logging.error(f"Failed to clone and analyze repository {repo_url}: {e}")
         return {
-            "error": "Could not analyze crate from any source",
-            "attempted_sources": ["crates.io", "github", "lib.rs"],
+            "error": "Could not analyze crate from any available source.",
+            "attempted_sources": ["crates.io", "github", "git_clone"],
             "file_count": 0,
-            "loc": 0
+            "loc": 0,
         }
     @staticmethod
-    def analyze_crate_tarball(content: bytes) -> Dict:
-        """Analyze a .crate tarball from crates.io"""
-        metrics = {
-            "file_count": 0,
-            "loc": 0,
-            "complexity": [],
-            "types": [],
-            "traits": [],
-            "functions": [],
-            "has_tests": False,
-            "has_examples": False,
-            "has_benchmarks": False
-        }
+    def _analyze_tarball_content(content: bytes) -> dict[str, Any]:
+        """Shared logic to analyze tarball content from any source."""
+        metrics = RustCodeAnalyzer.create_empty_metrics()
         try:
-            # Open the tar file from the content
-            tar_content = io.BytesIO(content)
-            with tarfile.open(fileobj=tar_content, mode='r:gz') as tar:
-                # Get list of Rust files
-                rust_files = [f for f in tar.getnames() if f.endswith('.rs')]
+            with io.BytesIO(content) as tar_content, tarfile.open(
+                fileobj=tar_content, mode="r:gz"
+            ) as tar:
+                rust_files = [f for f in tar.getnames() if f.endswith(".rs")]
                 metrics["file_count"] = len(rust_files)
-                # Check for test/example/bench directories
-                all_files = tar.getnames()
-                metrics["has_tests"] = any('test' in f.lower() for f in all_files)
-                metrics["has_examples"] = any('example' in f.lower() for f in all_files)
-                metrics["has_benchmarks"] = any('bench' in f.lower() for f in all_files)
-                # Analyze each Rust file
-                for filename in rust_files:
-                    try:
-                        member = tar.getmember(filename)
-                        if member.isfile():
-                            file_content = tar.extractfile(member)
-                            if file_content:
-                                content_str = file_content.read().decode('utf-8', errors='ignore')
-                                # Count lines of code
-                                metrics["loc"] += len(content_str.splitlines())
-                                # Extract code elements
-                                fn_matches = re.findall(r'fn\s+([a-zA-Z0-9_]+)', content_str)
-                                struct_matches = re.findall(r'struct\s+([a-zA-Z0-9_]+)', content_str)
-                                trait_matches = re.findall(r'trait\s+([a-zA-Z0-9_]+)', content_str)
-                                metrics["functions"].extend(fn_matches)
-                                metrics["types"].extend(struct_matches)
-                                metrics["traits"].extend(trait_matches)
-                    except Exception as e:
-                        print(f"Error analyzing file {filename}: {str(e)}")
-        except Exception as e:
-            metrics["error"] = str(e)
+                structure = RustCodeAnalyzer.detect_project_structure(tar.getnames())
+                for member in tar.getmembers():
+                    if member.isfile() and member.name.endswith(".rs"):
+                        file_content = tar.extractfile(member)
+                        if file_content:
+                            try:
+                                content_str = file_content.read().decode("utf-8")
+                                analysis = RustCodeAnalyzer.analyze_rust_content(
+                                    content_str
+                                )
+                                metrics = RustCodeAnalyzer.aggregate_metrics(
+                                    metrics, analysis, structure
+                                )
+                            except UnicodeDecodeError:
+                                logging.warning(
+                                    f"Skipping non-UTF-8 file: {member.name}"
+                                )
+        except tarfile.TarError as e:
+            metrics["error"] = f"Failed to read tarball: {e}"
+            logging.error(metrics["error"])
         return metrics
     @staticmethod
-    def analyze_github_tarball(content: bytes) -> Dict:
-        """Analyze a GitHub tarball (which has a different structure)"""
-        metrics = {
-            "file_count": 0,
-            "loc": 0,
-            "complexity": [],
-            "types": [],
-            "traits": [],
-            "functions": [],
-            "has_tests": False,
-            "has_examples": False,
-            "has_benchmarks": False
-        }
-        try:
-            # GitHub tarballs are typically gzipped tar files
-            tar_content = io.BytesIO(content)
-            with tarfile.open(fileobj=tar_content, mode='r:gz') as tar:
-                # GitHub tarballs include the repo name and commit as the top dir
-                # So we need to handle the different structure
-                rust_files = [f for f in tar.getnames() if f.endswith('.rs')]
-                metrics["file_count"] = len(rust_files)
-                # Check for test/example/bench directories
-                all_files = tar.getnames()
-                metrics["has_tests"] = any('test' in f.lower() for f in all_files)
-                metrics["has_examples"] = any('example' in f.lower() for f in all_files)
-                metrics["has_benchmarks"] = any('bench' in f.lower() for f in all_files)
-                # Analyze each Rust file (same as crate tarball)
-                for filename in rust_files:
-                    try:
-                        member = tar.getmember(filename)
-                        if member.isfile():
-                            file_content = tar.extractfile(member)
-                            if file_content:
-                                content_str = file_content.read().decode('utf-8', errors='ignore')
-                                # Count lines of code
-                                metrics["loc"] += len(content_str.splitlines())
-                                # Extract code elements
-                                fn_matches = re.findall(r'fn\s+([a-zA-Z0-9_]+)', content_str)
-                                struct_matches = re.findall(r'struct\s+([a-zA-Z0-9_]+)', content_str)
-                                trait_matches = re.findall(r'trait\s+([a-zA-Z0-9_]+)', content_str)
-                                metrics["functions"].extend(fn_matches)
-                                metrics["types"].extend(struct_matches)
-                                metrics["traits"].extend(trait_matches)
-                    except Exception as e:
-                        print(f"Error analyzing file {filename}: {str(e)}")
-        except Exception as e:
-            metrics["error"] = str(e)
-        return metrics
+    def analyze_crate_tarball(content: bytes) -> dict[str, Any]:
+        """Analyze a .crate tarball from crates.io."""
+        return SourceAnalyzer._analyze_tarball_content(content)
     @staticmethod
-    def analyze_local_directory(directory: str) -> Dict:
-        """Analyze source code from a local directory"""
-        metrics = {
-            "file_count": 0,
-            "loc": 0,
-            "complexity": [],
-            "types": [],
-            "traits": [],
-            "functions": [],
-            "has_tests": False,
-            "has_examples": False,
-            "has_benchmarks": False
-        }
+    def analyze_github_tarball(content: bytes) -> dict[str, Any]:
+        """Analyze a GitHub tarball."""
+        return SourceAnalyzer._analyze_tarball_content(content)
+    @staticmethod
+    def analyze_local_directory(directory: str) -> dict[str, Any]:
+        """Analyze source code from a local directory."""
+        metrics = RustCodeAnalyzer.create_empty_metrics()
         try:
-            # Find all Rust files
-            rust_files = []
-            for root, _, files in os.walk(directory):
-                if "target" in root or ".git" in root:  # Skip build dirs and git
-                    continue
-                rust_files.extend([os.path.join(root, f) for f in files if f.endswith(".rs")])
+            rust_files: list[str] = []
+            all_paths: list[str] = []
+            for root, dirs, files in os.walk(directory):
+                # Exclude target and .git directories
+                dirs[:] = [d for d in dirs if d not in ["target", ".git"]]
+                for file in files:
+                    full_path = os.path.join(root, file)
+                    all_paths.append(full_path)
+                    if file.endswith(".rs"):
+                        rust_files.append(full_path)
             metrics["file_count"] = len(rust_files)
-            # Check if the crate has tests/examples/benchmarks
-            metrics["has_tests"] = any(os.path.exists(os.path.join(directory, d))
-                                      for d in ["tests", "test"])
-            metrics["has_examples"] = os.path.exists(os.path.join(directory, "examples"))
-            metrics["has_benchmarks"] = os.path.exists(os.path.join(directory, "benches"))
-            # Analyze each Rust file
+            structure = RustCodeAnalyzer.detect_project_structure(all_paths)
             for file_path in rust_files:
                 try:
-                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    with open(file_path, encoding="utf-8", errors="ignore") as f:
                         content = f.read()
-                    # Count lines of code
-                    metrics["loc"] += len(content.splitlines())
-                    # Extract code elements
-                    fn_matches = re.findall(r'fn\s+([a-zA-Z0-9_]+)', content)
-                    struct_matches = re.findall(r'struct\s+([a-zA-Z0-9_]+)', content)
-                    trait_matches = re.findall(r'trait\s+([a-zA-Z0-9_]+)', content)
-                    metrics["functions"].extend(fn_matches)
-                    metrics["types"].extend(struct_matches)
-                    metrics["traits"].extend(trait_matches)
+                    analysis = RustCodeAnalyzer.analyze_rust_content(content)
+                    metrics = RustCodeAnalyzer.aggregate_metrics(
+                        metrics, analysis, structure
+                    )
                 except Exception as e:
-                    print(f"Error analyzing file {file_path}: {str(e)}")
+                    logging.warning(f"Error analyzing file {file_path}: {e}")
         except Exception as e:
-            metrics["error"] = str(e)
+            metrics["error"] = f"Failed to analyze local directory {directory}: {e}"
+            logging.error(metrics["error"])
         return metrics
     @staticmethod
-    def analyze_crate_source_from_repo(crate_name: str, version: str, repo_url: str) -> Dict:
-        """Clone and analyze a crate's source code from repository"""
-        temp_dir = f"/tmp/rust_analysis/{crate_name}"
-        os.makedirs(temp_dir, exist_ok=True)
-        try:
-            # Clone repository
-            if not os.path.exists(f"{temp_dir}/.git"):
-                subprocess.run(["git", "clone", "--depth=1", repo_url, temp_dir],
-                              capture_output=True, text=True, check=True)
-            return SourceAnalyzer.analyze_local_directory(temp_dir)
-        except Exception as e:
-            return {
-                "error": f"Failed to clone and analyze repository: {str(e)}",
-                "file_count": 0,
-                "loc": 0
-            }
-        finally:
-            # Clean up (optional)
-            # subprocess.run(["rm", "-rf", temp_dir], capture_output=True)
-            pass
+    def analyze_crate_source_from_repo(repo_url: str) -> dict[str, Any]:
+        """Clone and analyze a crate's source code from a repository."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            try:
+                logging.info(f"Cloning {repo_url} into {temp_dir}")
+                subprocess.run(
+                    ["git", "clone", "--depth=1", repo_url, temp_dir],
+                    capture_output=True,
+                    text=True,
+                    check=True,
+                    timeout=120,
+                )
+                return SourceAnalyzer.analyze_local_directory(temp_dir)
+            except (
+                subprocess.CalledProcessError,
+                subprocess.TimeoutExpired,
+            ) as e:
+                error_output = ""
+                if hasattr(e, "stderr") and e.stderr:
+                    error_output = e.stderr.decode("utf-8", "ignore")
+                else:
+                    error_output = str(e)
+                logging.error(f"Failed to clone repository {repo_url}: {error_output}")
+                return {
+                    "error": f"Failed to clone repository: {error_output}",
+                    "file_count": 0,
+                    "loc": 0,
+                }
 class SecurityAnalyzer:
     @staticmethod
-    def check_security_metrics(crate: EnrichedCrate) -> Dict:
-        """Check security metrics for a crate"""
-        security_data = {
+    def check_security_metrics(crate: EnrichedCrate) -> dict[str, Any]:
+        """Check security metrics for a crate (placeholder)."""
+        security_data: dict[str, Any] = {
             "advisories": [],
             "vulnerability_count": 0,
             "cargo_audit": None,
-            "clippy_warnings": 0,
-            "test_coverage": None
+            "unsafe_blocks": 0,
         }
-        crate_name = crate.name
-        version = crate.version
-        # Check RustSec Advisory Database
-        try:
-            # This would require the RustSec advisory database
-            # For now, just return placeholder data
-            advisories_url = f"https://rustsec.org/advisories/{crate_name}.json"
-            response = requests.get(advisories_url)
-            if response.ok:
-                advisories = response.json()
-                security_data["advisories"] = advisories
-                security_data["vulnerability_count"] = len(advisories)
-        except Exception:
-            pass
-        # Check for common security patterns in code
-        try:
-            # This would analyze the source code for unsafe blocks, etc.
-            # Placeholder for now
-            security_data["unsafe_blocks"] = 0
-            security_data["security_patterns"] = []
-        except Exception:
-            pass
+        # In a real implementation, this would run tools like `cargo-audit`
+        # and parse the output. For now, it remains a placeholder.
+        logging.info(f"Running placeholder security check for {crate.name}")
         return security_data
 class UserBehaviorAnalyzer:
     @staticmethod
-    def fetch_user_behavior_data(crate: EnrichedCrate) -> Dict:
-        """Fetch user behavior data from GitHub and crates.io"""
-        result = {
+    def _get_github_headers() -> dict[str, str]:
+        """Get headers for GitHub API requests, including auth if available."""
+        headers = {"Accept": "application/vnd.github.v3+json"}
+        if token := os.environ.get("GITHUB_TOKEN"):
+            headers["Authorization"] = f"token {token}"
+        return headers
+    @staticmethod
+    def fetch_user_behavior_data(crate: EnrichedCrate) -> dict[str, Any]:
+        """Fetch user behavior data from GitHub and crates.io."""
+        result: dict[str, Any] = {
             "issues": [],
             "pull_requests": [],
             "version_adoption": {},
-            "community_metrics": {}
+            "community_metrics": {},
         }
-        crate_name = crate.name
         repo_url = crate.repository
-        # Extract owner/repo from URL
         if not repo_url or "github.com" not in repo_url:
             return result
-        parts = repo_url.rstrip('/').split('/')
-        if len(parts) < 2:
+        match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
+        if not match:
             return result
-        owner, repo = parts[-2], parts[-1]
-        # Setup GitHub API access - use token if available
-        headers = {"Accept": "application/vnd.github.v3+json"}
-        if os.environ.get("GITHUB_TOKEN"):
-            headers["Authorization"] = f"token {os.environ.get('GITHUB_TOKEN')}"
-        # Fetch recent issues and PRs
-        try:
-            # Get issues (last 30)
-            issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=30"
-            issues_resp = requests.get(issues_url, headers=headers)
-            if issues_resp.ok:
-                issues_data = issues_resp.json()
-                # Process issue data
-                for issue in issues_data:
-                    if "pull_request" in issue:
-                        # This is a PR, not an issue
-                        result["pull_requests"].append({
-                            "number": issue["number"],
-                            "title": issue["title"],
-                            "state": issue["state"],
-                            "created_at": issue["created_at"],
-                            "closed_at": issue["closed_at"],
-                            "url": issue["html_url"]
-                        })
-                    else:
-                        # Regular issue
-                        result["issues"].append({
-                            "number": issue["number"],
-                            "title": issue["title"],
-                            "state": issue["state"],
-                            "created_at": issue["created_at"],
-                            "closed_at": issue["closed_at"],
-                            "url": issue["html_url"]
-                        })
-            # Fetch commit activity for the past year
-            commits_url = f"https://api.github.com/repos/{owner}/{repo}/stats/commit_activity"
-            commits_resp = requests.get(commits_url, headers=headers)
-            if commits_resp.ok:
-                result["community_metrics"]["commit_activity"] = commits_resp.json()
-            # Rate limiting - be nice to GitHub API
-            time.sleep(1)
-        except Exception as e:
-            print(f"Error fetching GitHub data: {str(e)}")
-        # Get version adoption data from crates.io
+        owner, repo = match.groups()
+        repo = repo.replace(".git", "")
+        headers = UserBehaviorAnalyzer._get_github_headers()
+        UserBehaviorAnalyzer._fetch_github_activity(owner, repo, headers, result)
+        UserBehaviorAnalyzer._fetch_crates_io_versions(crate.name, result)
+        return result
+    @staticmethod
+    def _fetch_github_activity(
+        owner: str, repo: str, headers: dict[str, str], result: dict[str, Any]
+    ) -> None:
+        """Fetch issues, PRs, and commit activity from GitHub."""
         try:
-            versions_url = f"https://crates.io/api/v1/crates/{crate_name}/versions"
-            versions_resp = requests.get(versions_url)
-            if versions_resp.ok:
-                versions_data = versions_resp.json()
-                versions = versions_data.get("versions", [])
-                # Process version data
-                for version in versions[:10]:  # Top 10 versions
-                    version_num = version["num"]
-                    downloads = version["downloads"]
-                    created_at = version["created_at"]
-                    result["version_adoption"][version_num] = {
-                        "downloads": downloads,
-                        "created_at": created_at
+            issues_url = f"{GITHUB_API_URL}/{owner}/{repo}/issues?state=all&per_page=30"
+            issues_resp = requests.get(issues_url, headers=headers, timeout=30)
+            issues_resp.raise_for_status()
+            for item in issues_resp.json():
+                is_pr = "pull_request" in item
+                data_list = result["pull_requests"] if is_pr else result["issues"]
+                data_list.append(
+                    {
+                        "number": item["number"],
+                        "title": item["title"],
+                        "state": item["state"],
+                        "created_at": item["created_at"],
+                        "closed_at": item["closed_at"],
+                        "url": item["html_url"],
                     }
-        except Exception as e:
-            print(f"Error fetching crates.io version data: {str(e)}")
-        return result
+                )
+            # Fetch commit activity (retries on 202)
+            activity_url = f"{GITHUB_API_URL}/{owner}/{repo}/stats/commit_activity"
+            for _ in range(3):  # Retry up to 3 times
+                activity_resp = requests.get(activity_url, headers=headers, timeout=60)
+                if activity_resp.status_code == 200:
+                    result["community_metrics"][
+                        "commit_activity"
+                    ] = activity_resp.json()
+                    break
+                elif activity_resp.status_code == 202:
+                    logging.info(
+                        f"GitHub is calculating stats for {owner}/{repo}, waiting..."
+                    )
+                    time.sleep(2)
+                else:
+                    activity_resp.raise_for_status()
+        except requests.RequestException as e:
+            logging.warning(f"Error fetching GitHub data for {owner}/{repo}: {e}")
+    @staticmethod
+    def _fetch_crates_io_versions(crate_name: str, result: dict[str, Any]) -> None:
+        """Fetch version adoption data from crates.io."""
+        try:
+            versions_url = f"{CRATES_IO_API_URL}/{crate_name}/versions"
+            versions_resp = requests.get(versions_url, timeout=30)
+            versions_resp.raise_for_status()
+            versions_data = versions_resp.json().get("versions", [])
+            for version in versions_data[:10]:  # Top 10 versions
+                result["version_adoption"][version["num"]] = {
+                    "downloads": version["downloads"],
+                    "created_at": version["created_at"],
+                }
+        except requests.RequestException as e:
+            logging.warning(
+                f"Error fetching crates.io version data for {crate_name}: {e}"
+            )
 class DependencyAnalyzer:
     @staticmethod
-    def analyze_dependencies(crates: List[EnrichedCrate]) -> Dict:
-        """Analyze dependencies between crates"""
-        dependency_graph = {}
+    def analyze_dependencies(crates: list[EnrichedCrate]) -> dict[str, Any]:
+        """Analyze dependencies within a given list of crates."""
         crate_names = {crate.name for crate in crates}
-        for crate in crates:
-            deps = []
-            for dep in crate.dependencies:
-                if dep.get("crate_id") in crate_names:
-                    deps.append(dep.get("crate_id"))
-            dependency_graph[crate.name] = deps
-        # Find most depended-upon crates
-        reverse_deps = {}
+        dependency_graph: dict[str, list[str]] = {
+            crate.name: [
+                dep_id
+                for dep in crate.dependencies
+                if (dep_id := dep.get("crate_id")) and dep_id in crate_names
+            ]
+            for crate in crates
+        }
+        reverse_deps: dict[str, list[str]] = {}
         for crate_name, deps in dependency_graph.items():
             for dep in deps:
-                if dep not in reverse_deps:
-                    reverse_deps[dep] = []
-                reverse_deps[dep].append(crate_name)
+                if dep:  # Ensure dep is not None
+                    reverse_deps.setdefault(dep, []).append(crate_name)
+        most_depended = sorted(
+            reverse_deps.items(), key=lambda item: len(item[1]), reverse=True
+        )[:10]
         return {
             "dependency_graph": dependency_graph,
             "reverse_dependencies": reverse_deps,
-            "most_depended": sorted(reverse_deps.items(), key=lambda x: len(x[1]), reverse=True)[:10]
+            "most_depended": most_depended,
         }

rust-crate-pipeline 1.2.6__py3-none-any.whl → 1.3.0__py3-none-any.whl

rust-crate-pipeline 1.2.6py3-none-any.whl → 1.3.0py3-none-any.whl