PyPI - rust-crate-pipeline - Versions diffs - 1.4.1__tar.gz → 1.4.3__tar.gz - Mend

rust-crate-pipeline 1.4.1tar.gz → 1.4.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

rust_crate_pipeline-1.4.3/CHANGELOG_v1.4.2.md ADDED Viewed

@@ -0,0 +1,10 @@
+# Changelog v1.4.2
+## [1.4.2] - {today}
+### Changed
+- Updated project to version 1.4.2 for release.
+- General maintenance and dependency updates.
+- Ensured `setup.py` dynamically reads version from `version.py`.
+---

rust_crate_pipeline-1.4.3/CHANGELOG_v1.4.3.md ADDED Viewed

@@ -0,0 +1,13 @@
+# Changelog v1.4.3
+## [1.4.3] - {today}
+### Added
+- Implemented full crate analysis, including `cargo check`, `cargo clippy`, and `cargo audit`, to adhere to "Rule Zero".
+- Added `--crates-file` argument to `run_pipeline_with_llm.py` to allow processing a large number of crates from a file.
+### Changed
+- Incremented project version to 1.4.3.
+- Fixed various bugs in the pipeline execution and data handling.
+---

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rust-crate-pipeline
-Version: 1.4.1
+Version: 1.4.3
 Summary: A comprehensive pipeline for analyzing Rust crates with AI enrichment and enhanced scraping
 Home-page: https://github.com/SigilDERG/rust-crate-pipeline
 Author: SigilDERG Team

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/README_LLM_PROVIDERS.md RENAMED Viewed

@@ -51,12 +51,29 @@ python run_pipeline_with_llm.py --llm-provider <provider> --llm-model <model> --
 ### Provider-Specific Configuration
 #### Azure OpenAI
+Set the following environment variables:
+```bash
+export AZURE_OPENAI_ENDPOINT="<your_endpoint>"
+export AZURE_OPENAI_API_KEY="<your_api_key>"
+export AZURE_OPENAI_DEPLOYMENT_NAME="<your_deployment_name>"
+```
+Then, run the pipeline:
 ```bash
 python run_pipeline_with_llm.py \
   --llm-provider azure \
   --llm-model gpt-4o \
-  --llm-api-key YOUR_AZURE_API_KEY \
-  --azure-deployment gpt-4o \
+  --crates tokio serde
+```
+You can still override these with command-line arguments if needed:
+```bash
+python run_pipeline_with_llm.py \\
+  --llm-provider azure \\
+  --llm-model gpt-4o \\
+  --llm-api-key YOUR_AZURE_API_KEY \\
+  --azure-deployment YOUR_AZURE_DEPLOYMENT \\
   --crates tokio serde
 ```

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "rust-crate-pipeline"
-version = "1.4.1"
+version = "1.4.3"
 authors = [
     {name = "SigilDERG Team", email = "sigilderg@example.com"}
 ]

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/rust_crate_pipeline/config.py RENAMED Viewed

@@ -2,7 +2,7 @@
 import os
 import warnings
 from dataclasses import dataclass, field, asdict
-from typing import Any, Union, TYPE_CHECKING
+from typing import Any, Union, TYPE_CHECKING, Optional
 if TYPE_CHECKING:
     from typing import Dict, List
@@ -37,14 +37,21 @@ class PipelineConfig:
     )
     crawl4ai_timeout: int = 30
     output_path: str = "output"
+    llm_max_retries: int = 3
+    output_dir: str = "output"
+    verbose: bool = False
+    budget: Optional[float] = None
     # Azure OpenAI Configuration
     use_azure_openai: bool = True
-    azure_openai_endpoint: str = "https://david-mc08tirc-eastus2.services.ai.azure.com/"
-    azure_openai_api_key: str = "2hw0jjqwjtKke7DMGiJSPtlj6GhuLCNdQWPXoDGN2I3JMvzp4PmGJQQJ99BFACHYHv6XJ3w3AAAAACOGFPYA"
-    azure_openai_deployment_name: str = "gpt-4o"  # or your specific deployment name
+    azure_openai_endpoint: str = os.getenv("AZURE_OPENAI_ENDPOINT", "https://david-mc08tirc-eastus2.services.ai.azure.com/")
+    azure_openai_api_key: str = os.getenv("AZURE_OPENAI_API_KEY", "2hw0jjqwjtKke7DMGiJSPtlj6GhuLCNdQWPXoDGN2I3JMvzp4PmGJQQJ99BFACHYHv6XJ3w3AAAAACOGFPYA")
+    azure_openai_deployment_name: str = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-4o")
     azure_openai_api_version: str = "2024-02-15-preview"
+    class Config:
+        validate_assignment = True
 @dataclass
 class CrateMetadata:

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/rust_crate_pipeline/core/irl_engine.py RENAMED Viewed

@@ -14,29 +14,13 @@ class IRLEngine(SacredChainBase):
         super().__init__()
         self.config = config
         self.canon_registry = canon_registry or CanonRegistry()
-        self.crawler: Optional[Any] = None
         self.logger = logging.getLogger(__name__)
     async def __aenter__(self) -> "IRLEngine":
-        try:
-            from crawl4ai import AsyncWebCrawler, BrowserConfig
-            browser_config = BrowserConfig(headless=True, browser_type="chromium")
-            self.crawler = AsyncWebCrawler(config=browser_config)
-            await self.crawler.start()
-            self.logger.info("IRL Engine initialized with full traceability")
-        except ImportError:
-            self.logger.warning("Crawl4AI not available - IRL Engine running in limited mode")
-        except Exception as e:
-            self.logger.warning(f"Failed to initialize crawler: {e}")
+        self.logger.info("IRL Engine initialized with full traceability")
         return self
     async def __aexit__(self, exc_type: Optional[type], exc_val: Optional[Exception], exc_tb: Optional[Any]) -> None:
-        if self.crawler:
-            try:
-                await self.crawler.stop()
-            except Exception as e:
-                self.logger.warning(f"Error stopping crawler: {e}")
         self._finalize_audit_log()
     def _finalize_audit_log(self) -> None:
@@ -120,9 +104,8 @@ class IRLEngine(SacredChainBase):
         reasoning_steps.append(f"Metadata extracted: {len(metadata)} fields")
         docs = {}
-        if self.crawler:
-            docs = await self._analyze_documentation(input_data)
-            reasoning_steps.append(f"Documentation analyzed: quality {docs.get('quality_score', 0):.1f}")
+        docs = await self._analyze_documentation(input_data)
+        reasoning_steps.append(f"Documentation analyzed: quality {docs.get('quality_score', 0):.1f}")
         sentiment = await self._analyze_community_sentiment(input_data)
         reasoning_steps.append(f"Sentiment analyzed: {sentiment.get('overall', 'unknown')}")
@@ -144,9 +127,6 @@ class IRLEngine(SacredChainBase):
         }
     async def _analyze_documentation(self, input_data: str) -> Dict[str, Any]:
-        if not self.crawler:
-            return {"quality_score": 5.0, "error": "No crawler available"}
         try:
             return {
                 "quality_score": 7.0,

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/rust_crate_pipeline/main.py RENAMED Viewed

@@ -468,7 +468,7 @@ def main() -> None:
                 logging.info("Falling back to standard pipeline")
                 logging.debug("Creating standard pipeline as Sigil fallback")
-                standard_pipeline = CrateDataPipeline(config)
+                standard_pipeline = CrateDataPipeline(config, **pipeline_kwargs)
                 logging.debug("Standard pipeline created, about to run asynchronously")
                 # Run standard pipeline (asynchronous)
@@ -487,7 +487,7 @@ def main() -> None:
         else:
             logging.info("Standard pipeline mode")
             logging.debug("Creating standard pipeline")
-            standard_pipeline = CrateDataPipeline(config)
+            standard_pipeline = CrateDataPipeline(config, **pipeline_kwargs)
             logging.info(f"Starting pipeline with {len(vars(args))} arguments")
             logging.debug("Standard pipeline created, about to run asynchronously")

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/rust_crate_pipeline/pipeline.py RENAMED Viewed

@@ -34,10 +34,21 @@ except ImportError:
     logging.warning("Enhanced scraping not available - using basic methods")
+class CustomJSONEncoder(json.JSONEncoder):
+    """Custom JSON encoder to handle non-serializable objects"""
+    def default(self, obj):
+        if hasattr(obj, 'to_dict'):
+            return obj.to_dict()
+        elif hasattr(obj, '__dict__'):
+            return obj.__dict__
+        else:
+            return str(obj)
 class CrateDataPipeline:
     """Orchestrates the entire data collection, enrichment, and analysis pipeline."""
-    def __init__(self, config: PipelineConfig) -> None:
+    def __init__(self, config: PipelineConfig, crate_list: "List[str] | None" = None, **kwargs) -> None:
         self.config = config
         self.api_client = CrateAPIClient(config)
         self.github_client = GitHubBatchClient(config)
@@ -60,7 +71,13 @@ class CrateDataPipeline:
         # Initialize cargo analyzer
         self.cargo_analyzer = CrateAnalyzer(".")
-        self.crates = self._get_crate_list()
+        # Use provided crate_list or load from file
+        if crate_list:
+            self.crates = crate_list
+            logging.info(f"Using provided crate list: {len(crate_list)} crates")
+        else:
+            self.crates = self._get_crate_list()
         self.output_dir = self._create_output_dir()
         self.enhanced_scraper: Any = (
             self._initialize_enhanced_scraper()
@@ -280,7 +297,7 @@ class CrateDataPipeline:
         with open(filename, "w") as f:
             for item in data:
-                f.write(json.dumps(item.to_dict()) + "\n")
+                f.write(json.dumps(item.to_dict(), cls=CustomJSONEncoder) + "\n")
         logging.info(f"Saved checkpoint to {filename}")
         return filename
@@ -297,7 +314,7 @@ class CrateDataPipeline:
         )
         with open(final_output_path, "w") as f:
             for item in data:
-                f.write(json.dumps(item.to_dict()) + "\n")
+                f.write(json.dumps(item.to_dict(), cls=CustomJSONEncoder) + "\n")
         # Save dependency analysis
         dep_file_path = os.path.join(

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/rust_crate_pipeline/unified_llm_processor.py RENAMED Viewed

@@ -13,6 +13,7 @@ if TYPE_CHECKING:
 try:
     import litellm
     from litellm import completion
+    from litellm.cost_calculator import cost_per_token
     LITELLM_AVAILABLE = True
 except ImportError:
     LITELLM_AVAILABLE = False
@@ -44,6 +45,35 @@ class LLMConfig:
     lmstudio_host: Optional[str] = None
+class BudgetManager:
+    """Monitors and enforces spending limits for LLM calls."""
+    def __init__(self, budget: float = 90.0):
+        self.budget = budget
+        self.total_cost = 0.0
+    def update_cost(self, model: str, completion_tokens: int, prompt_tokens: int) -> None:
+        """Update the total cost with the latest API call."""
+        try:
+            cost, _ = cost_per_token(
+                model=model,
+                completion_tokens=completion_tokens,
+                prompt_tokens=prompt_tokens,
+            )
+            self.total_cost += cost
+        except Exception:
+            # If cost cannot be determined, do not track.
+            pass
+    def is_over_budget(self) -> bool:
+        """Check if the cumulative cost has exceeded the budget."""
+        return self.total_cost > self.budget
+    def get_total_cost(self) -> float:
+        """Return the current total cost."""
+        return self.total_cost
 class Section(TypedDict, total=True):
     heading: str
     content: str
@@ -62,9 +92,10 @@ class UnifiedLLMProcessor:
     - And all other LiteLLM providers
     """
-    def __init__(self, config: LLMConfig) -> None:
+    def __init__(self, config: LLMConfig, budget_manager: Optional[BudgetManager] = None) -> None:
         self.config = config
         self.logger = logging.getLogger(__name__)
+        self.budget_manager = budget_manager or BudgetManager()
         if not LITELLM_AVAILABLE:
             raise ImportError("LiteLLM is required. Install with: pip install litellm")
@@ -275,72 +306,50 @@ class UnifiedLLMProcessor:
         max_tokens: Optional[int] = None,
         system_message: str = "You are a helpful AI assistant that analyzes Rust crates and provides insights."
     ) -> Optional[str]:
-        """Call LLM using LiteLLM with provider-specific configuration"""
+        """Call the LLM with the given prompt and parameters."""
+        if self.budget_manager and self.budget_manager.is_over_budget():
+            self.logger.warning("Budget exceeded. Skipping LLM call.")
+            return None
+        model_name = self._get_model_name()
+        # Prepare arguments for the completion call
+        args: Dict[str, Any] = {
+            "model": model_name,
+            "messages": [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": prompt}
+            ],
+            "temperature": temperature if temperature is not None else self.config.temperature,
+            "max_tokens": max_tokens if max_tokens is not None else self.config.max_tokens,
+            "timeout": self.config.timeout
+        }
+        # Provider-specific arguments
+        if self.config.provider == "azure":
+            args["api_base"] = self.config.api_base
+            args["api_key"] = self.config.api_key
+            args["api_version"] = self.config.azure_api_version
+            # For Azure, model can be just the deployment name
+            args["model"] = self.config.azure_deployment or self.config.model
+        else:
+            args["api_base"] = self._get_api_base()
+            args["api_key"] = self.config.api_key
         try:
-            # Use config defaults if not provided
-            temp = temperature if temperature is not None else self.config.temperature
-            tokens = max_tokens if max_tokens is not None else self.config.max_tokens
-            # Prepare the completion call parameters
-            completion_params: Dict[str, Any] = {
-                "model": self._get_model_name(),
-                "messages": [
-                    {"role": "system", "content": system_message},
-                    {"role": "user", "content": prompt}
-                ],
-                "temperature": temp,
-                "max_tokens": tokens,
-                "timeout": self.config.timeout
-            }
-            # Add provider-specific parameters
-            if self.config.provider == "azure":
-                if self.config.api_base:
-                    completion_params["api_base"] = self.config.api_base
-                if self.config.api_key:
-                    completion_params["api_key"] = self.config.api_key
-                if self.config.azure_deployment:
-                    completion_params["deployment_id"] = self.config.azure_deployment
-                if self.config.azure_api_version:
-                    completion_params["api_version"] = self.config.azure_api_version
-            elif self.config.provider in ["ollama", "lmstudio"]:
-                # Local providers don't need API keys
-                pass
-            else:
-                # Other providers (OpenAI, Anthropic, etc.)
-                if self.config.api_key:
-                    completion_params["api_key"] = self.config.api_key
-                if self.config.api_base:
-                    completion_params["api_base"] = self.config.api_base
-            self.logger.debug(f"Calling LLM with provider: {self.config.provider}, model: {self.config.model}")
+            response = completion(**args)
-            response = completion(**completion_params)
+            # Update budget
+            if self.budget_manager:
+                completion_tokens = response.usage.completion_tokens # type: ignore
+                prompt_tokens = response.usage.prompt_tokens # type: ignore
+                self.budget_manager.update_cost(model=model_name, completion_tokens=completion_tokens, prompt_tokens=prompt_tokens)
+            return response.choices[0].message.content # type: ignore
-            # Handle different response formats from LiteLLM
-            # LiteLLM has complex response objects that vary by provider
-            try:
-                if hasattr(response, 'choices') and response.choices:  # type: ignore[attr-defined]
-                    choice = response.choices[0]  # type: ignore[attr-defined]
-                    if hasattr(choice, 'message') and hasattr(choice.message, 'content'):  # type: ignore[attr-defined]
-                        return choice.message.content  # type: ignore[attr-defined]
-                    elif hasattr(choice, 'content'):  # type: ignore[attr-defined]
-                        return choice.content  # type: ignore[attr-defined]
-                elif hasattr(response, 'content'):  # type: ignore[attr-defined]
-                    return response.content  # type: ignore[attr-defined]
-                elif isinstance(response, str):
-                    return response
-                else:
-                    self.logger.error(f"Unexpected response format: {response}")
-                    return None
-            except Exception as e:
-                self.logger.error(f"Error parsing LLM response: {e}")
-                return None
         except Exception as e:
-            self.logger.error(f"Error calling LLM ({self.config.provider}): {str(e)}")
+            self.logger.error(f"LLM call failed: {e}")
             return None
     def validate_and_retry(
@@ -610,7 +619,9 @@ def create_llm_processor_from_config(pipeline_config: PipelineConfig) -> Unified
             max_retries=pipeline_config.max_retries
         )
-    return UnifiedLLMProcessor(llm_config)
+    budget_manager = BudgetManager(budget=pipeline_config.budget) if pipeline_config.budget is not None else None
+    return UnifiedLLMProcessor(llm_config, budget_manager=budget_manager)
 def create_llm_processor_from_args(
@@ -620,9 +631,10 @@ def create_llm_processor_from_args(
     api_key: Optional[str] = None,
     temperature: float = 0.2,
     max_tokens: int = 256,
+    budget: Optional[float] = None,
     **kwargs
 ) -> UnifiedLLMProcessor:
-    """Create LLM processor from command line arguments"""
+    """Create a UnifiedLLMProcessor from command-line arguments."""
     llm_config = LLMConfig(
         provider=provider,
@@ -634,4 +646,6 @@ def create_llm_processor_from_args(
         **kwargs
     )
-    return UnifiedLLMProcessor(llm_config)
+    budget_manager = BudgetManager(budget=budget) if budget is not None else None
+    return UnifiedLLMProcessor(llm_config, budget_manager=budget_manager)

{rust_crate_pipeline-1.4.1 → rust_crate_pipeline-1.4.3}/rust_crate_pipeline/unified_pipeline.py RENAMED Viewed

@@ -3,6 +3,11 @@ import json
 import logging
 import time
 import argparse
+import os
+import tempfile
+import aiohttp
+import tarfile
+import gzip
 from pathlib import Path
 from typing import Dict, List, Optional, Any, Union, TYPE_CHECKING
@@ -62,8 +67,6 @@ class UnifiedSigilPipeline:
                 "verbose": False,
                 "word_count_threshold": 10,
                 "crawl_config": {
-                    "max_retries": self.config.max_retries,
-                    "timeout": self.config.crawl4ai_timeout,
                 }
             }
             self.scraper = UnifiedScraper(scraper_config)
@@ -108,17 +111,22 @@ class UnifiedSigilPipeline:
         if self.scraper:
             await self.scraper.__aexit__(exc_type, exc_val, exc_tb)
-    async def analyze_crate(self, crate_name: str) -> SacredChainTrace:
+    async def analyze_crate(self, crate_name: str, crate_version: Optional[str] = None) -> SacredChainTrace:
         if not crate_name or not isinstance(crate_name, str):
             raise ValueError("crate_name must be a non-empty string")
         self.logger.info(f"🔍 Starting analysis of crate: {crate_name}")
         try:
+            if crate_version is None:
+                crate_version = await self._get_latest_crate_version(crate_name)
+                if not crate_version:
+                    raise RuntimeError(f"Could not determine latest version for {crate_name}")
             documentation_results = await self._gather_documentation(crate_name)
             sacred_chain_trace = await self._perform_sacred_chain_analysis(
-                crate_name, documentation_results
+                crate_name, crate_version, documentation_results
             )
             await self._generate_analysis_report(crate_name, sacred_chain_trace)
@@ -155,7 +163,7 @@ class UnifiedSigilPipeline:
             raise
     async def _perform_sacred_chain_analysis(
-        self, crate_name: str, documentation_results: Dict[str, ScrapingResult]
+        self, crate_name: str, crate_version: str, documentation_results: Dict[str, ScrapingResult]
     ) -> SacredChainTrace:
         if not self.irl_engine:
             raise RuntimeError("IRL Engine not initialized")
@@ -173,7 +181,7 @@ class UnifiedSigilPipeline:
                 sacred_chain_trace.audit_info["documentation_sources"] = list(documentation_results.keys())
             # Add crate analysis results if available
-            await self._add_crate_analysis_results(crate_name, sacred_chain_trace)
+            await self._add_crate_analysis_results(crate_name, crate_version, sacred_chain_trace)
             # Add AI enrichment if available
             await self._add_ai_enrichment(crate_name, sacred_chain_trace)
@@ -184,22 +192,144 @@ class UnifiedSigilPipeline:
             self.logger.error(f"❌ Sacred Chain analysis failed: {e}")
             raise
-    async def _add_crate_analysis_results(self, crate_name: str, trace: SacredChainTrace) -> None:
+    async def _add_crate_analysis_results(self, crate_name: str, crate_version: str, trace: SacredChainTrace) -> None:
         """Add cargo analysis results to the sacred chain trace"""
         try:
-            # For now, we'll use a temporary directory approach
-            # In a real implementation, you'd download/extract the crate first
-            self.logger.info(f"🔍 Adding crate analysis results for {crate_name}")
-            # This would be implemented based on your crate source strategy
-            # For now, we'll add a placeholder
-            trace.audit_info["crate_analysis"] = {
-                "status": "not_implemented",
-                "note": "Crate analysis requires downloading/extracting the crate source"
-            }
+            self.logger.info(f"🔍 Adding crate analysis results for {crate_name} v{crate_version}")
+            with tempfile.TemporaryDirectory() as temp_dir_str:
+                temp_dir = Path(temp_dir_str)
+                crate_source_path = await self._download_and_extract_crate(crate_name, crate_version, temp_dir)
+                if not crate_source_path:
+                    trace.audit_info["crate_analysis"] = {"status": "error", "note": "Failed to download or extract crate."}
+                    return
+                check_results = await self._run_cargo_command(
+                    ["cargo", "check", "--message-format=json"],
+                    cwd=crate_source_path
+                )
+                clippy_results = await self._run_cargo_command(
+                    ["cargo", "clippy", "--message-format=json"],
+                    cwd=crate_source_path
+                )
+                audit_results = await self._run_cargo_audit(crate_source_path)
+                trace.audit_info["crate_analysis"] = {
+                    "status": "completed",
+                    "check": check_results,
+                    "clippy": clippy_results,
+                    "audit": audit_results,
+                    "note": "Crate analysis performed."
+                }
         except Exception as e:
             self.logger.warning(f"⚠️  Failed to add crate analysis results: {e}")
+            trace.audit_info["crate_analysis"] = {"status": "error", "note": str(e)}
+    async def _download_and_extract_crate(self, crate_name: str, crate_version: str, target_dir: Path) -> Optional[Path]:
+        """Downloads and extracts a crate from crates.io."""
+        crate_url = f"https://static.crates.io/crates/{crate_name}/{crate_name}-{crate_version}.crate"
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(crate_url) as response:
+                    if response.status != 200:
+                        self.logger.error(f"Failed to download {crate_url}: HTTP {response.status}")
+                        return None
+                    # Save the .crate file
+                    crate_file_path = target_dir / f"{crate_name}-{crate_version}.crate"
+                    with open(crate_file_path, "wb") as f:
+                        f.write(await response.read())
+                    # Extract the tarball
+                    with gzip.open(crate_file_path, 'rb') as gz_file:
+                        with tarfile.open(fileobj=gz_file, mode='r') as tar_file:
+                            tar_file.extractall(path=target_dir)
+                    # The crate is usually extracted into a directory named `{crate_name}-{crate_version}`
+                    crate_source_dir = target_dir / f"{crate_name}-{crate_version}"
+                    if crate_source_dir.is_dir():
+                        return crate_source_dir
+                    else:
+                        self.logger.error(f"Could not find extracted directory: {crate_source_dir}")
+                        return None
+        except Exception as e:
+            self.logger.error(f"Error downloading or extracting crate {crate_name}: {e}")
+            return None
+    async def _get_latest_crate_version(self, crate_name: str) -> Optional[str]:
+        """Fetches the latest version of a crate from crates.io API."""
+        api_url = f"https://crates.io/api/v1/crates/{crate_name}"
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(api_url) as response:
+                    if response.status != 200:
+                        self.logger.error(f"Failed to fetch crate info from {api_url}: HTTP {response.status}")
+                        return None
+                    data = await response.json()
+                    return data.get("crate", {}).get("max_version")
+        except Exception as e:
+            self.logger.error(f"Error fetching latest crate version for {crate_name}: {e}")
+            return None
+    async def _run_cargo_command(self, command: List[str], cwd: Path) -> List[Dict[str, Any]]:
+        """Runs a cargo command and returns the parsed JSON output."""
+        self.logger.info(f"Running command: {' '.join(command)} in {cwd}")
+        process = await asyncio.create_subprocess_exec(
+            *command,
+            cwd=cwd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        if process.returncode != 0:
+            self.logger.warning(f"Cargo command failed with exit code {process.returncode}")
+            self.logger.warning(f"Stderr: {stderr.decode(errors='ignore')}")
+        results = []
+        if stdout:
+            for line in stdout.decode(errors='ignore').splitlines():
+                if line.strip():
+                    try:
+                        results.append(json.loads(line))
+                    except json.JSONDecodeError:
+                        self.logger.warning(f"Could not parse JSON line: {line}")
+        return results
+    async def _run_cargo_audit(self, cwd: Path) -> Optional[Dict[str, Any]]:
+        """Runs cargo audit and returns the parsed JSON output."""
+        command = ["cargo", "audit", "--json"]
+        self.logger.info(f"Running command: {' '.join(command)} in {cwd}")
+        process = await asyncio.create_subprocess_exec(
+            *command,
+            cwd=cwd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        if process.returncode != 0:
+            # cargo-audit exits with a non-zero status code if vulnerabilities are found.
+            # We still want to parse the output.
+            self.logger.info(f"Cargo audit finished with exit code {process.returncode}")
+        if stdout:
+            try:
+                return json.loads(stdout)
+            except json.JSONDecodeError:
+                self.logger.warning(f"Could not parse cargo audit JSON output: {stdout.decode(errors='ignore')}")
+        if stderr:
+             self.logger.warning(f"Stderr from cargo audit: {stderr.decode(errors='ignore')}")
+        return None
     async def _add_ai_enrichment(self, crate_name: str, trace: SacredChainTrace) -> None:
         """Add AI enrichment results to the sacred chain trace"""
@@ -244,6 +374,9 @@ class UnifiedSigilPipeline:
                 enhanced_dependencies=[]
             )
+            # Store the metadata used for enrichment
+            trace.audit_info["crate_metadata"] = mock_crate.to_dict()
             # Enrich the crate using unified LLM processor
             enriched_crate = self.unified_llm_processor.enrich_crate(mock_crate)
@@ -295,6 +428,9 @@ class UnifiedSigilPipeline:
                 enhanced_dependencies=[]
             )
+            # Store the metadata used for enrichment
+            trace.audit_info["crate_metadata"] = mock_crate.to_dict()
             # Enrich the crate using Azure OpenAI
             enriched_crate = self.ai_enricher.enrich_crate(mock_crate)

rust-crate-pipeline 1.4.1__tar.gz → 1.4.3__tar.gz

rust-crate-pipeline 1.4.1tar.gz → 1.4.3tar.gz