PyPI - rust-crate-pipeline - Versions diffs - 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

rust-crate-pipeline 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

rust_crate_pipeline/__init__.py +18 -27
rust_crate_pipeline/__main__.py +1 -0
rust_crate_pipeline/ai_processing.py +718 -596
rust_crate_pipeline/analysis.py +330 -363
rust_crate_pipeline/azure_ai_processing.py +462 -0
rust_crate_pipeline/config.py +46 -28
rust_crate_pipeline/core/__init__.py +19 -0
rust_crate_pipeline/core/canon_registry.py +133 -0
rust_crate_pipeline/core/irl_engine.py +256 -0
rust_crate_pipeline/core/sacred_chain.py +117 -0
rust_crate_pipeline/crate_analysis.py +54 -0
rust_crate_pipeline/crate_list.txt +424 -0
rust_crate_pipeline/github_token_checker.py +108 -112
rust_crate_pipeline/main.py +329 -109
rust_crate_pipeline/network.py +317 -308
rust_crate_pipeline/pipeline.py +300 -375
rust_crate_pipeline/production_config.py +24 -27
rust_crate_pipeline/progress_monitor.py +334 -0
rust_crate_pipeline/scraping/__init__.py +13 -0
rust_crate_pipeline/scraping/unified_scraper.py +259 -0
rust_crate_pipeline/unified_llm_processor.py +637 -0
rust_crate_pipeline/unified_pipeline.py +548 -0
rust_crate_pipeline/utils/file_utils.py +32 -5
rust_crate_pipeline/utils/logging_utils.py +21 -16
rust_crate_pipeline/version.py +76 -47
rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0

rust_crate_pipeline/analysis.py CHANGED Viewed

@@ -1,447 +1,414 @@
 # analysis.py
+from __future__ import annotations
+import io
+import re
+import tarfile
+import requests
+import logging
+import tempfile
+from typing import Any, Dict, List, Optional, Union
 import os
 import sys
-import re
-import io
 import time
-import tarfile
 import subprocess
-import requests
-from bs4 import BeautifulSoup
-# Import utilities with fallback
-try:
-    # Add the parent directory to the path to import utils
-    sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
-    from utils.rust_code_analyzer import RustCodeAnalyzer
-except ImportError:
-    # Fallback implementation for when utils are not available
-    class RustCodeAnalyzer:
-        def __init__(self, code_content):
-            self.code_content = code_content
-        def analyze(self):
-            return {
-                "functions": [],
-                "structs": [],
-                "enums": [],
-                "traits": [],
-                "complexity": 0,
-                "lines_of_code": len(self.code_content.split('\n'))
-            }
-from typing import Dict, List
+from dataclasses import dataclass
 from .config import EnrichedCrate
-# Import atomic utilities for code reuse
-import sys
-sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+# Create a fallback RustCodeAnalyzer that doesn't depend on external utils
+class RustCodeAnalyzer:
+    """Fallback Rust code analyzer for when the full analyzer is not available."""
+    def __init__(self, code_content: str) -> None:
+        self.code_content = code_content
+    def analyze(self) -> dict[str, Any]:
+        """Basic analysis of Rust code content."""
+        lines = self.code_content.split('\n')
+        return {
+            "functions": self._count_functions(),
+            "structs": self._count_structs(),
+            "enums": self._count_enums(),
+            "traits": self._count_traits(),
+            "complexity": self._calculate_complexity(),
+            "lines_of_code": len(lines),
+        }
-class SourceAnalyzer:
-    @staticmethod
-    def analyze_crate_source(crate: EnrichedCrate) -> Dict:
-        """Orchestrate source analysis from multiple sources"""
-        crate_name = crate.name
-        version = crate.version
-        repo_url = crate.repository
+    def _count_functions(self) -> int:
+        """Count function definitions."""
+        return len(re.findall(r'fn\s+\w+\s*\(', self.code_content))
-        # Method 1: Try to download from crates.io
-        try:
-            url = f"https://crates.io/api/v1/crates/{crate_name}/{version}/download"
-            response = requests.get(url, stream=True)
+    def _count_structs(self) -> int:
+        """Count struct definitions."""
+        return len(re.findall(r'struct\s+\w+', self.code_content))
-            if response.ok:
-                # We got the tarball, analyze it
-                return SourceAnalyzer.analyze_crate_tarball(response.content)
-        except Exception as e:
-            print(f"Failed to download from crates.io: {str(e)}")
+    def _count_enums(self) -> int:
+        """Count enum definitions."""
+        return len(re.findall(r'enum\s+\w+', self.code_content))
-        # Method 2: Try GitHub if we have a GitHub URL
-        if "github.com" in repo_url:
-            try:
-                # Extract owner/repo from URL
-                match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
-                if match:
-                    owner, repo_name = match.groups()
-                    repo_name = repo_name.split(
-                        '.')[0]  # Remove .git extension
-                    # Try to download tarball from GitHub
-                    github_url = f"https://api.github.com/repos/{owner}/{repo_name}/tarball"
-                    response = requests.get(github_url)
-                    if response.ok:
-                        return SourceAnalyzer.analyze_github_tarball(
-                            response.content)
-            except Exception as e:
-                print(f"Failed to analyze from GitHub: {str(e)}")
+    def _count_traits(self) -> int:
+        """Count trait definitions."""
+        return len(re.findall(r'trait\s+\w+', self.code_content))
-        # Method 3: Try lib.rs
-        try:
-            # lib.rs doesn't have a direct download API, but redirects to crates.io or
-            # GitHub
-            url = f"https://lib.rs/crates/{crate_name}"
-            response = requests.get(url)
-            if response.ok:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                # Look for repository links
-                repo_links = soup.select('a[href*="github.com"]')
-                if repo_links:
-                    repo_url = repo_links[0]['href']
-                    # We found a GitHub link, now analyze it
-                    return SourceAnalyzer.analyze_crate_source_from_repo(
-                        crate_name, version, repo_url)
-        except Exception as e:
-            print(f"Failed to analyze from lib.rs: {str(e)}")
+    def _calculate_complexity(self) -> int:
+        """Calculate basic cyclomatic complexity."""
+        complexity = 0
+        complexity += len(re.findall(r'\bif\b', self.code_content))
+        complexity += len(re.findall(r'\bfor\b', self.code_content))
+        complexity += len(re.findall(r'\bwhile\b', self.code_content))
+        complexity += len(re.findall(r'\bmatch\b', self.code_content))
+        return complexity
-        # If we get here, we failed to analyze from any source
+    @staticmethod
+    def create_empty_metrics() -> dict[str, Any]:
+        """Create empty metrics structure."""
         return {
-            "error": "Could not analyze crate from any source",
-            "attempted_sources": ["crates.io", "github", "lib.rs"],
+            "functions": 0,
+            "structs": 0,
+            "enums": 0,
+            "traits": 0,
+            "complexity": 0,
+            "lines_of_code": 0,
             "file_count": 0,
-            "loc": 0
-        }    @ staticmethod
+        }
-    def analyze_crate_tarball(content: bytes) -> Dict:
-        """Analyze a .crate tarball from crates.io - refactored to use atomic utilities"""
-        metrics = RustCodeAnalyzer.create_empty_metrics()
+    @staticmethod
+    def detect_project_structure(files: list[str]) -> dict[str, bool]:
+        """Detect basic project structure."""
+        return {
+            "has_cargo_toml": any("Cargo.toml" in f for f in files),
+            "has_src": any("/src/" in f for f in files),
+            "has_tests": any("/tests/" in f for f in files),
+            "has_examples": any("/examples/" in f for f in files),
+        }
-        try:
-            # Open the tar file from the content
-            tar_content = io.BytesIO(content)
-            with tarfile.open(fileobj=tar_content, mode='r:gz') as tar:
-                # Get list of Rust files
-                rust_files = [f for f in tar.getnames() if f.endswith('.rs')]
-                metrics["file_count"] = len(rust_files)
+    @staticmethod
+    def analyze_rust_content(content: str) -> dict[str, Any]:
+        """Analyze Rust content."""
+        analyzer = RustCodeAnalyzer(content)
+        return analyzer.analyze()
-                # Check for test/example/bench directories using atomic utility
-                all_files = tar.getnames()
-                structure = RustCodeAnalyzer.detect_project_structure(
-                    all_files)
-                # Analyze each Rust file using atomic utility
-                for filename in rust_files:
-                    try:
-                        member = tar.getmember(filename)
-                        if member.isfile():
-                            file_content = tar.extractfile(member)
-                            if file_content:
-                                content_str = file_content.read().decode('utf-8', errors='ignore')
-                                # Use atomic content analysis
-                                content_analysis = RustCodeAnalyzer.analyze_rust_content(
-                                    content_str)
-                                metrics = RustCodeAnalyzer.aggregate_metrics(
-                                    metrics, content_analysis, structure)
+    @staticmethod
+    def aggregate_metrics(
+        metrics: dict[str, Any],
+        content_analysis: dict[str, Any],
+        structure: dict[str, bool],
+    ) -> dict[str, Any]:
+        """Aggregate metrics from multiple sources."""
+        for key, value in content_analysis.items():
+            if isinstance(value, (int, float)):
+                metrics[key] = metrics.get(key, 0) + value
+            elif isinstance(value, list):
+                if key not in metrics:
+                    metrics[key] = []
+                metrics[key].extend(value)
+        # Add structure information
+        metrics.update(structure)
+        return metrics
-                    except Exception as e:
-                        print(f"Error analyzing file {filename}: {str(e)}")
-        except Exception as e:
-            metrics["error"] = str(e)
+# Constants for URLs and paths
+CRATES_IO_API_URL = "https://crates.io/api/v1/crates"
+GITHUB_API_URL = "https://api.github.com/repos"
+LIB_RS_URL = "https://lib.rs/crates"
-        return metrics    @ staticmethod
-    def analyze_github_tarball(content: bytes) -> Dict:
-        """Analyze a GitHub tarball - refactored to use atomic utilities"""
-        metrics = RustCodeAnalyzer.create_empty_metrics()
+class SourceAnalyzer:
+    @staticmethod
+    def analyze_crate_source(crate: EnrichedCrate) -> dict[str, Any]:
+        """Orchestrate source analysis from multiple sources."""
+        repo_url = crate.repository
+        # Method 1: Try to download from crates.io
         try:
-            # GitHub tarballs are typically gzipped tar files
-            tar_content = io.BytesIO(content)
-            with tarfile.open(fileobj=tar_content, mode='r:gz') as tar:
-                # GitHub tarballs include the repo name and commit as the top dir
-                # So we need to handle the different structure
-                rust_files = [f for f in tar.getnames() if f.endswith('.rs')]
-                metrics["file_count"] = len(rust_files)
+            url = f"{CRATES_IO_API_URL}/{crate.name}/{crate.version}/download"
+            response = requests.get(url, stream=True, timeout=30)
+            response.raise_for_status()
+            logging.info(f"Successfully downloaded {crate.name} from crates.io")
+            return SourceAnalyzer.analyze_crate_tarball(response.content)
+        except requests.RequestException as e:
+            logging.warning(f"Failed to download from crates.io: {e}")
-                # Check for test/example/bench directories using atomic utility
-                all_files = tar.getnames()
-                structure = RustCodeAnalyzer.detect_project_structure(
-                    all_files)
-                # Analyze each Rust file using atomic utility (same as crate
-                # tarball)
-                for filename in rust_files:
-                    try:
-                        member = tar.getmember(filename)
-                        if member.isfile():
-                            file_content = tar.extractfile(member)
-                            if file_content:
-                                content_str = file_content.read().decode('utf-8', errors='ignore')
-                                # Use atomic content analysis
-                                content_analysis = RustCodeAnalyzer.analyze_rust_content(
-                                    content_str)
-                                metrics = RustCodeAnalyzer.aggregate_metrics(
-                                    metrics, content_analysis, structure)
+        # Method 2: Try GitHub if we have a GitHub URL
+        if repo_url and "github.com" in repo_url:
+            match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
+            if match:
+                owner, repo_name = match.groups()
+                repo_name = repo_name.replace(".git", "")
+                try:
+                    github_url = f"{GITHUB_API_URL}/{owner}/{repo_name}/tarball"
+                    response = requests.get(github_url, timeout=30)
+                    response.raise_for_status()
+                    logging.info(f"Successfully downloaded {crate.name} from GitHub")
+                    return SourceAnalyzer.analyze_github_tarball(response.content)
+                except requests.RequestException as e:
+                    logging.warning(f"Failed to analyze from GitHub: {e}")
+        # Method 3: Fallback to cloning from the repository directly
+        if repo_url:
+            try:
+                logging.info(f"Attempting to clone repository for {crate.name}")
+                return SourceAnalyzer.analyze_crate_source_from_repo(repo_url)
+            except Exception as e:
+                logging.error(f"Failed to clone and analyze repository {repo_url}: {e}")
-                    except Exception as e:
-                        print(f"Error analyzing file {filename}: {str(e)}")
+        return {
+            "error": "Could not analyze crate from any available source.",
+            "attempted_sources": ["crates.io", "github", "git_clone"],
+            "file_count": 0,
+            "loc": 0,
+        }
-        except Exception as e:
-            metrics["error"] = str(e)
+    @staticmethod
+    def _analyze_tarball_content(content: bytes) -> dict[str, Any]:
+        """Shared logic to analyze tarball content from any source."""
+        metrics = RustCodeAnalyzer.create_empty_metrics()
+        try:
+            with io.BytesIO(content) as tar_content, tarfile.open(
+                fileobj=tar_content, mode="r:gz"
+            ) as tar:
+                rust_files = [f for f in tar.getnames() if f.endswith(".rs")]
+                metrics["file_count"] = len(rust_files)
+                structure = RustCodeAnalyzer.detect_project_structure(tar.getnames())
+                for member in tar.getmembers():
+                    if member.isfile() and member.name.endswith(".rs"):
+                        file_content = tar.extractfile(member)
+                        if file_content:
+                            try:
+                                content_str = file_content.read().decode("utf-8")
+                                analysis = RustCodeAnalyzer.analyze_rust_content(
+                                    content_str
+                                )
+                                metrics = RustCodeAnalyzer.aggregate_metrics(
+                                    metrics, analysis, structure
+                                )
+                            except UnicodeDecodeError:
+                                logging.warning(
+                                    f"Skipping non-UTF-8 file: {member.name}"
+                                )
+        except tarfile.TarError as e:
+            metrics["error"] = f"Failed to read tarball: {e}"
+            logging.error(metrics["error"])
+        return metrics
+    @staticmethod
+    def analyze_crate_tarball(content: bytes) -> dict[str, Any]:
+        """Analyze a .crate tarball from crates.io."""
+        return SourceAnalyzer._analyze_tarball_content(content)
-        return metrics    @ staticmethod
+    @staticmethod
+    def analyze_github_tarball(content: bytes) -> dict[str, Any]:
+        """Analyze a GitHub tarball."""
+        return SourceAnalyzer._analyze_tarball_content(content)
-    def analyze_local_directory(directory: str) -> Dict:
-        """Analyze source code from a local directory - refactored to use atomic utilities"""
+    @staticmethod
+    def analyze_local_directory(directory: str) -> dict[str, Any]:
+        """Analyze source code from a local directory."""
         metrics = RustCodeAnalyzer.create_empty_metrics()
         try:
-            # Find all Rust files
-            rust_files = []
-            for root, _, files in os.walk(directory):
-                if "target" in root or ".git" in root:  # Skip build dirs and git
-                    continue
-                rust_files.extend([os.path.join(root, f)
-                                  for f in files if f.endswith(".rs")])
+            rust_files: list[str] = []
+            all_paths: list[str] = []
+            for root, dirs, files in os.walk(directory):
+                # Exclude target and .git directories
+                dirs[:] = [d for d in dirs if d not in ["target", ".git"]]
+                for file in files:
+                    full_path = os.path.join(root, file)
+                    all_paths.append(full_path)
+                    if file.endswith(".rs"):
+                        rust_files.append(full_path)
             metrics["file_count"] = len(rust_files)
+            structure = RustCodeAnalyzer.detect_project_structure(all_paths)
-            # Check if the crate has tests/examples/benchmarks using atomic
-            # utility
-            project_dirs = [
-                d for d in os.listdir(directory) if os.path.isdir(
-                    os.path.join(
-                        directory, d))]
-            structure = RustCodeAnalyzer.detect_project_structure(
-                project_dirs + ["tests", "examples", "benches"])
-            # Override with actual directory checks
-            structure["has_tests"] = any(
-                os.path.exists(
-                    os.path.join(
-                        directory,
-                        d)) for d in [
-                    "tests",
-                    "test"])
-            structure["has_examples"] = os.path.exists(
-                os.path.join(directory, "examples"))
-            structure["has_benchmarks"] = os.path.exists(
-                os.path.join(directory, "benches"))
-            # Analyze each Rust file using atomic utility
             for file_path in rust_files:
                 try:
-                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    with open(file_path, encoding="utf-8", errors="ignore") as f:
                         content = f.read()
-                    # Use atomic content analysis
-                    content_analysis = RustCodeAnalyzer.analyze_rust_content(
-                        content)
+                    analysis = RustCodeAnalyzer.analyze_rust_content(content)
                     metrics = RustCodeAnalyzer.aggregate_metrics(
-                        metrics, content_analysis, structure)
+                        metrics, analysis, structure
+                    )
                 except Exception as e:
-                    print(f"Error analyzing file {file_path}: {str(e)}")
+                    logging.warning(f"Error analyzing file {file_path}: {e}")
         except Exception as e:
-            metrics["error"] = str(e)
+            metrics["error"] = f"Failed to analyze local directory {directory}: {e}"
+            logging.error(metrics["error"])
         return metrics
     @staticmethod
-    def analyze_crate_source_from_repo(
-            crate_name: str,
-            version: str,
-            repo_url: str) -> Dict:
-        """Clone and analyze a crate's source code from repository"""
-        temp_dir = f"/tmp/rust_analysis/{crate_name}"
-        os.makedirs(temp_dir, exist_ok=True)
-        try:
-            # Clone repository
-            if not os.path.exists(f"{temp_dir}/.git"):
-                subprocess.run(["git",
-                                "clone",
-                                "--depth=1",
-                                repo_url,
-                                temp_dir],
-                               capture_output=True,
-                               text=True,
-                               check=True)
-            return SourceAnalyzer.analyze_local_directory(temp_dir)
-        except Exception as e:
-            return {
-                "error": f"Failed to clone and analyze repository: {str(e)}",
-                "file_count": 0,
-                "loc": 0
-            }
-        finally:
-            # Clean up (optional)
-            # subprocess.run(["rm", "-r", temp_dir], capture_output=True)
-            pass
+    def analyze_crate_source_from_repo(repo_url: str) -> dict[str, Any]:
+        """Clone and analyze a crate's source code from a repository."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            try:
+                logging.info(f"Cloning {repo_url} into {temp_dir}")
+                subprocess.run(
+                    ["git", "clone", "--depth=1", repo_url, temp_dir],
+                    capture_output=True,
+                    text=True,
+                    check=True,
+                    timeout=120,
+                )
+                return SourceAnalyzer.analyze_local_directory(temp_dir)
+            except (
+                subprocess.CalledProcessError,
+                subprocess.TimeoutExpired,
+            ) as e:
+                error_output = ""
+                if hasattr(e, "stderr") and e.stderr:
+                    error_output = e.stderr.decode("utf-8", "ignore")
+                else:
+                    error_output = str(e)
+                logging.error(f"Failed to clone repository {repo_url}: {error_output}")
+                return {
+                    "error": f"Failed to clone repository: {error_output}",
+                    "file_count": 0,
+                    "loc": 0,
+                }
 class SecurityAnalyzer:
     @staticmethod
-    def check_security_metrics(crate: EnrichedCrate) -> Dict:
-        """Check security metrics for a crate"""
-        security_data = {
+    def check_security_metrics(crate: EnrichedCrate) -> dict[str, Any]:
+        """Check security metrics for a crate (placeholder)."""
+        security_data: dict[str, Any] = {
             "advisories": [],
             "vulnerability_count": 0,
             "cargo_audit": None,
-            "clippy_warnings": 0,
-            "test_coverage": None
+            "unsafe_blocks": 0,
         }
-        crate_name = crate.name
-        version = crate.version
-        # Check RustSec Advisory Database
-        try:
-            # This would require the RustSec advisory database
-            # For now, just return placeholder data
-            advisories_url = f"https://rustsec.org/advisories/{crate_name}.json"
-            response = requests.get(advisories_url)
-            if response.ok:
-                advisories = response.json()
-                security_data["advisories"] = advisories
-                security_data["vulnerability_count"] = len(advisories)
-        except Exception:
-            pass
-        # Check for common security patterns in code
-        try:
-            # This would analyze the source code for unsafe blocks, etc.
-            # Placeholder for now
-            security_data["unsafe_blocks"] = 0
-            security_data["security_patterns"] = []
-        except Exception:
-            pass
+        # In a real implementation, this would run tools like `cargo-audit`
+        # and parse the output. For now, it remains a placeholder.
+        logging.info(f"Running placeholder security check for {crate.name}")
         return security_data
 class UserBehaviorAnalyzer:
     @staticmethod
-    def fetch_user_behavior_data(crate: EnrichedCrate) -> Dict:
-        """Fetch user behavior data from GitHub and crates.io"""
-        result = {
+    def _get_github_headers() -> dict[str, str]:
+        """Get headers for GitHub API requests, including auth if available."""
+        headers = {"Accept": "application/vnd.github.v3+json"}
+        if token := os.environ.get("GITHUB_TOKEN"):
+            headers["Authorization"] = f"token {token}"
+        return headers
+    @staticmethod
+    def fetch_user_behavior_data(crate: EnrichedCrate) -> dict[str, Any]:
+        """Fetch user behavior data from GitHub and crates.io."""
+        result: dict[str, Any] = {
             "issues": [],
             "pull_requests": [],
             "version_adoption": {},
-            "community_metrics": {}
+            "community_metrics": {},
         }
-        crate_name = crate.name
         repo_url = crate.repository
-        # Extract owner/repo from URL
         if not repo_url or "github.com" not in repo_url:
             return result
-        parts = repo_url.rstrip('/').split('/')
-        if len(parts) < 2:
+        match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
+        if not match:
             return result
-        owner, repo = parts[-2], parts[-1]
+        owner, repo = match.groups()
+        repo = repo.replace(".git", "")
-        # Setup GitHub API access - use token if available
-        headers = {"Accept": "application/vnd.github.v3+json"}
-        if os.environ.get("GITHUB_TOKEN"):
-            headers["Authorization"] = f"token {
-                os.environ.get('GITHUB_TOKEN')}"
+        headers = UserBehaviorAnalyzer._get_github_headers()
+        UserBehaviorAnalyzer._fetch_github_activity(owner, repo, headers, result)
+        UserBehaviorAnalyzer._fetch_crates_io_versions(crate.name, result)
-        # Fetch recent issues and PRs
-        try:
-            # Get issues (last 30)
-            issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=30"
-            issues_resp = requests.get(issues_url, headers=headers)
-            if issues_resp.ok:
-                issues_data = issues_resp.json()
-                # Process issue data
-                for issue in issues_data:
-                    if "pull_request" in issue:
-                        # This is a PR, not an issue
-                        result["pull_requests"].append({
-                            "number": issue["number"],
-                            "title": issue["title"],
-                            "state": issue["state"],
-                            "created_at": issue["created_at"],
-                            "closed_at": issue["closed_at"],
-                            "url": issue["html_url"]
-                        })
-                    else:
-                        # Regular issue
-                        result["issues"].append({
-                            "number": issue["number"],
-                            "title": issue["title"],
-                            "state": issue["state"],
-                            "created_at": issue["created_at"],
-                            "closed_at": issue["closed_at"],
-                            "url": issue["html_url"]
-                        })
-            # Fetch commit activity for the past year
-            commits_url = f"https://api.github.com/repos/{owner}/{repo}/stats/commit_activity"
-            commits_resp = requests.get(commits_url, headers=headers)
-            if commits_resp.ok:
-                result["community_metrics"]["commit_activity"] = commits_resp.json()
-            # Rate limiting - be nice to GitHub API
-            time.sleep(1)
-        except Exception as e:
-            print(f"Error fetching GitHub data: {str(e)}")
+        return result
-        # Get version adoption data from crates.io
+    @staticmethod
+    def _fetch_github_activity(
+        owner: str, repo: str, headers: dict[str, str], result: dict[str, Any]
+    ) -> None:
+        """Fetch issues, PRs, and commit activity from GitHub."""
         try:
-            versions_url = f"https://crates.io/api/v1/crates/{crate_name}/versions"
-            versions_resp = requests.get(versions_url)
-            if versions_resp.ok:
-                versions_data = versions_resp.json()
-                versions = versions_data.get("versions", [])
-                # Process version data
-                for version in versions[:10]:  # Top 10 versions
-                    version_num = version["num"]
-                    downloads = version["downloads"]
-                    created_at = version["created_at"]
-                    result["version_adoption"][version_num] = {
-                        "downloads": downloads,
-                        "created_at": created_at
+            issues_url = f"{GITHUB_API_URL}/{owner}/{repo}/issues?state=all&per_page=30"
+            issues_resp = requests.get(issues_url, headers=headers, timeout=30)
+            issues_resp.raise_for_status()
+            for item in issues_resp.json():
+                is_pr = "pull_request" in item
+                data_list = result["pull_requests"] if is_pr else result["issues"]
+                data_list.append(
+                    {
+                        "number": item["number"],
+                        "title": item["title"],
+                        "state": item["state"],
+                        "created_at": item["created_at"],
+                        "closed_at": item["closed_at"],
+                        "url": item["html_url"],
                     }
-        except Exception as e:
-            print(f"Error fetching crates.io version data: {str(e)}")
+                )
+            # Fetch commit activity (retries on 202)
+            activity_url = f"{GITHUB_API_URL}/{owner}/{repo}/stats/commit_activity"
+            for _ in range(3):  # Retry up to 3 times
+                activity_resp = requests.get(activity_url, headers=headers, timeout=60)
+                if activity_resp.status_code == 200:
+                    result["community_metrics"][
+                        "commit_activity"
+                    ] = activity_resp.json()
+                    break
+                elif activity_resp.status_code == 202:
+                    logging.info(
+                        f"GitHub is calculating stats for {owner}/{repo}, waiting..."
+                    )
+                    time.sleep(2)
+                else:
+                    activity_resp.raise_for_status()
+        except requests.RequestException as e:
+            logging.warning(f"Error fetching GitHub data for {owner}/{repo}: {e}")
-        return result
+    @staticmethod
+    def _fetch_crates_io_versions(crate_name: str, result: dict[str, Any]) -> None:
+        """Fetch version adoption data from crates.io."""
+        try:
+            versions_url = f"{CRATES_IO_API_URL}/{crate_name}/versions"
+            versions_resp = requests.get(versions_url, timeout=30)
+            versions_resp.raise_for_status()
+            versions_data = versions_resp.json().get("versions", [])
+            for version in versions_data[:10]:  # Top 10 versions
+                result["version_adoption"][version["num"]] = {
+                    "downloads": version["downloads"],
+                    "created_at": version["created_at"],
+                }
+        except requests.RequestException as e:
+            logging.warning(
+                f"Error fetching crates.io version data for {crate_name}: {e}"
+            )
 class DependencyAnalyzer:
     @staticmethod
-    def analyze_dependencies(crates: List[EnrichedCrate]) -> Dict:
-        """Analyze dependencies between crates"""
-        dependency_graph = {}
+    def analyze_dependencies(crates: list[EnrichedCrate]) -> dict[str, Any]:
+        """Analyze dependencies within a given list of crates."""
         crate_names = {crate.name for crate in crates}
+        dependency_graph: dict[str, list[str]] = {
+            crate.name: [
+                dep_id
+                for dep in crate.dependencies
+                if (dep_id := dep.get("crate_id")) and dep_id in crate_names
+            ]
+            for crate in crates
+        }
-        for crate in crates:
-            deps = []
-            for dep in crate.dependencies:
-                if dep.get("crate_id") in crate_names:
-                    deps.append(dep.get("crate_id"))
-            dependency_graph[crate.name] = deps
-        # Find most depended-upon crates
-        reverse_deps = {}
+        reverse_deps: dict[str, list[str]] = {}
         for crate_name, deps in dependency_graph.items():
             for dep in deps:
-                if dep not in reverse_deps:
-                    reverse_deps[dep] = []
-                reverse_deps[dep].append(crate_name)
+                if dep:  # Ensure dep is not None
+                    reverse_deps.setdefault(dep, []).append(crate_name)
+        most_depended = sorted(
+            reverse_deps.items(), key=lambda item: len(item[1]), reverse=True
+        )[:10]
         return {
             "dependency_graph": dependency_graph,
             "reverse_dependencies": reverse_deps,
-            "most_depended": sorted(
-                reverse_deps.items(),
-                key=lambda x: len(
-                    x[1]),
-                reverse=True)[
-                :10]}
+            "most_depended": most_depended,
+        }

rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

rust-crate-pipeline 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl