PyPI - rust-crate-pipeline - Versions diffs - 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

rust-crate-pipeline 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

rust_crate_pipeline/__init__.py +18 -27
rust_crate_pipeline/__main__.py +1 -0
rust_crate_pipeline/ai_processing.py +718 -596
rust_crate_pipeline/analysis.py +330 -363
rust_crate_pipeline/azure_ai_processing.py +462 -0
rust_crate_pipeline/config.py +46 -28
rust_crate_pipeline/core/__init__.py +19 -0
rust_crate_pipeline/core/canon_registry.py +133 -0
rust_crate_pipeline/core/irl_engine.py +256 -0
rust_crate_pipeline/core/sacred_chain.py +117 -0
rust_crate_pipeline/crate_analysis.py +54 -0
rust_crate_pipeline/crate_list.txt +424 -0
rust_crate_pipeline/github_token_checker.py +108 -112
rust_crate_pipeline/main.py +329 -109
rust_crate_pipeline/network.py +317 -308
rust_crate_pipeline/pipeline.py +300 -375
rust_crate_pipeline/production_config.py +24 -27
rust_crate_pipeline/progress_monitor.py +334 -0
rust_crate_pipeline/scraping/__init__.py +13 -0
rust_crate_pipeline/scraping/unified_scraper.py +259 -0
rust_crate_pipeline/unified_llm_processor.py +637 -0
rust_crate_pipeline/unified_pipeline.py +548 -0
rust_crate_pipeline/utils/file_utils.py +32 -5
rust_crate_pipeline/utils/logging_utils.py +21 -16
rust_crate_pipeline/version.py +76 -47
rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0

rust_crate_pipeline/core/canon_registry.py ADDED Viewed

@@ -0,0 +1,133 @@
+import hashlib
+import logging
+from datetime import datetime, timezone
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass
+@dataclass
+class CanonEntry:
+    source: str
+    version: str
+    authority_level: int
+    content_hash: str
+    last_validated: str
+    expiry: Optional[str] = None
+    def is_valid(self) -> bool:
+        if self.expiry:
+            expiry_time = datetime.fromisoformat(self.expiry)
+            return datetime.now(timezone.utc) < expiry_time
+        return True
+class CanonRegistry:
+    def __init__(self) -> None:
+        self.canon_entries: Dict[str, CanonEntry] = {}
+        self.authority_chain: List[str] = []
+        self.version = "1.4.0"
+        self.logger = logging.getLogger(__name__)
+        self._initialize_default_canon()
+    def _initialize_default_canon(self) -> None:
+        default_sources = {
+            "crates.io": {
+                "authority_level": 10,
+                "base_url": "https://crates.io/api/v1/",
+                "version": "1.4.0",
+                "last_validated": datetime.now(timezone.utc).isoformat(),
+            },
+            "github.com": {
+                "authority_level": 8,
+                "base_url": "https://api.github.com/",
+                "version": "3.0",
+                "last_validated": datetime.now(timezone.utc).isoformat(),
+            },
+            "lib.rs": {
+                "authority_level": 6,
+                "base_url": "https://lib.rs/",
+                "version": "1.3.0",
+                "last_validated": datetime.now(timezone.utc).isoformat(),
+            },
+            "docs.rs": {
+                "authority_level": 7,
+                "base_url": "https://docs.rs/",
+                "version": "1.3.0",
+                "last_validated": datetime.now(timezone.utc).isoformat(),
+            },
+        }
+        for key, source_info in default_sources.items():
+            self.register_canon(
+                key=key,
+                source=source_info["base_url"],
+                content=f"Default Canon source: {key}",
+                authority_level=source_info["authority_level"]
+            )
+    def register_canon(
+        self, key: str, source: str, content: str, authority_level: int = 5
+    ) -> bool:
+        try:
+            content_hash = hashlib.sha256(content.encode()).hexdigest()
+            timestamp = datetime.now(timezone.utc).isoformat()
+            canon_entry = CanonEntry(
+                source=source,
+                version=self.version,
+                authority_level=authority_level,
+                content_hash=content_hash,
+                last_validated=timestamp,
+            )
+            self.canon_entries[key] = canon_entry
+            self.authority_chain.append(f"{timestamp}:{key}:{authority_level}")
+            self.logger.info(f"Canon registered: {key} with authority {authority_level}")
+            return True
+        except Exception as e:
+            self.logger.error(f"Failed to register Canon {key}: {e}")
+            return False
+    def get_canon(self, key: str) -> Optional[CanonEntry]:
+        if key in self.canon_entries:
+            canon = self.canon_entries[key]
+            if canon.is_valid():
+                return canon
+            else:
+                self.logger.warning(f"Canon expired: {key}")
+                del self.canon_entries[key]
+        return None
+    def get_valid_canon_sources(self) -> List[str]:
+        valid_sources = []
+        for key, entry in self.canon_entries.items():
+            if entry.is_valid():
+                valid_sources.append(key)
+        return valid_sources
+    def get_authority_level(self, source: str) -> int:
+        canon = self.get_canon(source)
+        return canon.authority_level if canon else 0
+    def audit_trail(self) -> List[str]:
+        return self.authority_chain.copy()
+    def get_canon_summary(self) -> Dict[str, Any]:
+        valid_count = len(self.get_valid_canon_sources())
+        total_count = len(self.canon_entries)
+        authority_levels = {}
+        for key, entry in self.canon_entries.items():
+            level = entry.authority_level
+            authority_levels[level] = authority_levels.get(level, 0) + 1
+        return {
+            "total_canon_entries": total_count,
+            "valid_canon_entries": valid_count,
+            "authority_level_distribution": authority_levels,
+            "version": self.version,
+            "last_operation": self.authority_chain[-1] if self.authority_chain else None,
+        }

rust_crate_pipeline/core/irl_engine.py ADDED Viewed

@@ -0,0 +1,256 @@
+import json
+import logging
+import time
+from typing import Dict, List, Optional, Any, Tuple
+from abc import ABC, abstractmethod
+from .sacred_chain import SacredChainBase, SacredChainTrace, TrustVerdict
+from .canon_registry import CanonRegistry
+class IRLEngine(SacredChainBase):
+    def __init__(self, config: Any, canon_registry: Optional[CanonRegistry] = None) -> None:
+        super().__init__()
+        self.config = config
+        self.canon_registry = canon_registry or CanonRegistry()
+        self.crawler: Optional[Any] = None
+        self.logger = logging.getLogger(__name__)
+    async def __aenter__(self) -> "IRLEngine":
+        try:
+            from crawl4ai import AsyncWebCrawler, BrowserConfig
+            browser_config = BrowserConfig(headless=True, browser_type="chromium")
+            self.crawler = AsyncWebCrawler(config=browser_config)
+            await self.crawler.start()
+            self.logger.info("IRL Engine initialized with full traceability")
+        except ImportError:
+            self.logger.warning("Crawl4AI not available - IRL Engine running in limited mode")
+        except Exception as e:
+            self.logger.warning(f"Failed to initialize crawler: {e}")
+        return self
+    async def __aexit__(self, exc_type: Optional[type], exc_val: Optional[Exception], exc_tb: Optional[Any]) -> None:
+        if self.crawler:
+            try:
+                await self.crawler.stop()
+            except Exception as e:
+                self.logger.warning(f"Error stopping crawler: {e}")
+        self._finalize_audit_log()
+    def _finalize_audit_log(self) -> None:
+        if not self.execution_log:
+            return
+        audit_file = f"sigil_audit_{int(time.time())}.json"
+        try:
+            with open(audit_file, "w") as f:
+                audit_data = [json.loads(trace.to_audit_log()) for trace in self.execution_log]
+                json.dump(audit_data, f, indent=2)
+            self.logger.info(f"Audit log finalized: {audit_file}")
+        except IOError as e:
+            self.logger.error(f"Failed to write audit log {audit_file}: {e}")
+    async def analyze_with_sacred_chain(self, input_data: str) -> SacredChainTrace:
+        canonical_input = self._canonicalize_input(input_data)
+        reasoning_steps = [f"Input canonicalized: '{input_data}' -> '{canonical_input}'"]
+        context_sources = await self._gather_validated_context(canonical_input)
+        reasoning_steps.append(f"Context gathered from {len(context_sources)} validated sources")
+        analysis_results = await self._execute_reasoning_chain(canonical_input, context_sources)
+        reasoning_steps.extend(analysis_results[0])
+        suggestion = self._generate_traceable_suggestion(reasoning_steps)
+        verdict, verdict_reason = self._make_trust_decision(
+            reasoning_steps, suggestion, analysis_results[5],
+            analysis_results[1],
+            analysis_results[2],
+            analysis_results[3],
+        )
+        reasoning_steps.append(f"Trust decision: {verdict} - {verdict_reason}")
+        irl_score = self._calculate_irl_score(context_sources, reasoning_steps, verdict)
+        reasoning_steps.append(f"IRL confidence: {irl_score:.3f}")
+        audit_info = {
+            "metadata": analysis_results[1],
+            "sentiment": analysis_results[2],
+            "ecosystem": analysis_results[3],
+            "quality_score": analysis_results[5],
+            "verdict_reason": verdict_reason,
+        }
+        return self.create_sacred_chain_trace(
+            input_data=canonical_input,
+            context_sources=context_sources,
+            reasoning_steps=reasoning_steps,
+            suggestion=suggestion,
+            verdict=verdict,
+            audit_info=audit_info,
+            irl_score=irl_score,
+        )
+    def _canonicalize_input(self, input_data: str) -> str:
+        canonical = input_data.strip().lower()
+        if canonical.startswith("crate:"):
+            canonical = canonical[6:]
+        if canonical.startswith("rust:"):
+            canonical = canonical[5:]
+        return canonical
+    async def _gather_validated_context(self, input_data: str) -> List[str]:
+        valid_sources = self.canon_registry.get_valid_canon_sources()
+        context_sources = []
+        for source in valid_sources:
+            authority_level = self.canon_registry.get_authority_level(source)
+            if authority_level >= 5:
+                context_sources.append(source)
+        return context_sources
+    async def _execute_reasoning_chain(
+        self, input_data: str, sources: List[str]
+    ) -> Tuple[List[str], Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any], float]:
+        reasoning_steps = []
+        metadata = await self._extract_basic_metadata(input_data)
+        reasoning_steps.append(f"Metadata extracted: {len(metadata)} fields")
+        docs = {}
+        if self.crawler:
+            docs = await self._analyze_documentation(input_data)
+            reasoning_steps.append(f"Documentation analyzed: quality {docs.get('quality_score', 0):.1f}")
+        sentiment = await self._analyze_community_sentiment(input_data)
+        reasoning_steps.append(f"Sentiment analyzed: {sentiment.get('overall', 'unknown')}")
+        ecosystem = await self._analyze_ecosystem_position(input_data)
+        reasoning_steps.append(f"Ecosystem analyzed: {ecosystem.get('category', 'unknown')}")
+        quality_score = self._synthesize_quality_score(metadata, docs, sentiment, ecosystem)
+        reasoning_steps.append(f"Quality score synthesized: {quality_score:.2f}")
+        return reasoning_steps, metadata, docs, sentiment, ecosystem, quality_score
+    async def _extract_basic_metadata(self, input_data: str) -> Dict[str, Any]:
+        return {
+            "name": input_data,
+            "type": "rust_crate",
+            "source": "manual_input",
+            "extraction_method": "irl_engine",
+        }
+    async def _analyze_documentation(self, input_data: str) -> Dict[str, Any]:
+        if not self.crawler:
+            return {"quality_score": 5.0, "error": "No crawler available"}
+        try:
+            return {
+                "quality_score": 7.0,
+                "completeness": 0.8,
+                "examples_present": True,
+                "api_documented": True,
+            }
+        except Exception as e:
+            self.logger.error(f"Documentation analysis failed: {e}")
+            return {"quality_score": 5.0, "error": str(e)}
+    async def _analyze_community_sentiment(self, input_data: str) -> Dict[str, Any]:
+        return {
+            "overall": "positive",
+            "positive_mentions": 10,
+            "negative_mentions": 2,
+            "neutral_mentions": 5,
+            "total_mentions": 17,
+        }
+    async def _analyze_ecosystem_position(self, input_data: str) -> Dict[str, Any]:
+        return {
+            "category": "utilities",
+            "maturity": "stable",
+            "dependencies_count": 5,
+            "reverse_deps_visible": 15,
+            "ecosystem_score": 7.5,
+        }
+    def _synthesize_quality_score(
+        self,
+        metadata: Dict[str, Any],
+        docs: Dict[str, Any],
+        sentiment: Dict[str, Any],
+        ecosystem: Dict[str, Any],
+    ) -> float:
+        scores = []
+        doc_score = docs.get("quality_score", 5.0)
+        scores.append(doc_score)
+        sentiment_score = 5.0
+        if sentiment.get("overall") == "positive":
+            sentiment_score = 8.0
+        elif sentiment.get("overall") == "negative":
+            sentiment_score = 3.0
+        scores.append(sentiment_score)
+        ecosystem_score = ecosystem.get("ecosystem_score", 5.0)
+        scores.append(ecosystem_score)
+        return sum(scores) / len(scores) if scores else 5.0
+    def _generate_traceable_suggestion(self, reasoning_steps: List[str]) -> str:
+        if not reasoning_steps:
+            return "DEFER: Insufficient reasoning data"
+        quality_indicators = [step for step in reasoning_steps if "quality" in step.lower()]
+        sentiment_indicators = [step for step in reasoning_steps if "sentiment" in step.lower()]
+        if quality_indicators and any("high" in indicator.lower() for indicator in quality_indicators):
+            return "ALLOW: High quality indicators detected"
+        elif sentiment_indicators and any("positive" in indicator.lower() for indicator in sentiment_indicators):
+            return "ALLOW: Positive community sentiment"
+        else:
+            return "DEFER: Requires additional analysis"
+    def _make_trust_decision(
+        self,
+        reasoning_steps: List[str],
+        suggestion: str,
+        quality_score: float,
+        docs: Dict[str, Any],
+        sentiment: Dict[str, Any],
+        ecosystem: Dict[str, Any],
+    ) -> Tuple[TrustVerdict, str]:
+        if quality_score >= 8.0:
+            return TrustVerdict.ALLOW, "High quality score"
+        elif quality_score >= 6.0 and sentiment.get("overall") == "positive":
+            return TrustVerdict.ALLOW, "Good quality with positive sentiment"
+        elif quality_score < 4.0:
+            return TrustVerdict.DENY, "Low quality score"
+        elif sentiment.get("overall") == "negative":
+            return TrustVerdict.FLAG, "Negative community sentiment"
+        else:
+            return TrustVerdict.DEFER, "Insufficient data for decision"
+    def _calculate_irl_score(
+        self,
+        context_sources: List[str],
+        reasoning_steps: List[str],
+        verdict: TrustVerdict,
+    ) -> float:
+        base_score = 5.0
+        authority_bonus = sum(self.canon_registry.get_authority_level(source) for source in context_sources) / 10.0
+        base_score += min(authority_bonus, 2.0)
+        reasoning_bonus = min(len(reasoning_steps) * 0.2, 2.0)
+        base_score += reasoning_bonus
+        if verdict == TrustVerdict.ALLOW:
+            base_score += 1.0
+        elif verdict == TrustVerdict.DENY:
+            base_score += 0.5
+        return min(base_score, 10.0)

rust_crate_pipeline/core/sacred_chain.py ADDED Viewed

@@ -0,0 +1,117 @@
+import json
+import hashlib
+import uuid
+from datetime import datetime, timezone
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, asdict
+from enum import Enum
+from abc import ABC, abstractmethod
+class TrustVerdict(Enum):
+    ALLOW = "ALLOW"
+    DENY = "DENY"
+    DEFER = "DEFER"
+    FLAG = "FLAG"
+    def __str__(self) -> str:
+        return self.value
+    def to_json(self) -> str:
+        return self.value
+@dataclass
+class SacredChainTrace:
+    input_data: str
+    context_sources: List[str]
+    reasoning_steps: List[str]
+    suggestion: str
+    verdict: TrustVerdict
+    audit_info: Dict[str, Any]
+    irl_score: float
+    execution_id: str
+    timestamp: str
+    canon_version: str
+    def to_audit_log(self) -> str:
+        data_dict = asdict(self)
+        data_dict["verdict"] = self.verdict.value
+        return json.dumps({
+            "execution_id": self.execution_id,
+            "timestamp": self.timestamp,
+            "sacred_chain": data_dict,
+            "rule_zero_compliant": True,
+        }, indent=2)
+    def verify_integrity(self) -> bool:
+        chain_data = f"{self.input_data}{self.context_sources}{self.reasoning_steps}{self.suggestion}"
+        expected_hash = hashlib.sha256(chain_data.encode()).hexdigest()[:16]
+        return expected_hash in self.execution_id
+class SacredChainBase(ABC):
+    def __init__(self) -> None:
+        self.execution_log: List[SacredChainTrace] = []
+        self.canon_version = "1.3.0"
+    def generate_execution_id(self, input_data: str) -> str:
+        timestamp = datetime.now(timezone.utc).isoformat()
+        data_hash = hashlib.sha256(input_data.encode()).hexdigest()[:8]
+        unique_id = uuid.uuid4().hex[:8]
+        return f"exec-{data_hash}-{unique_id}-{int(datetime.now().timestamp())}"
+    def create_sacred_chain_trace(
+        self,
+        input_data: str,
+        context_sources: List[str],
+        reasoning_steps: List[str],
+        suggestion: str,
+        verdict: TrustVerdict,
+        audit_info: Dict[str, Any],
+        irl_score: float,
+    ) -> SacredChainTrace:
+        execution_id = self.generate_execution_id(input_data)
+        timestamp = datetime.now(timezone.utc).isoformat()
+        trace = SacredChainTrace(
+            input_data=input_data,
+            context_sources=context_sources,
+            reasoning_steps=reasoning_steps,
+            suggestion=suggestion,
+            verdict=verdict,
+            audit_info=audit_info,
+            irl_score=irl_score,
+            execution_id=execution_id,
+            timestamp=timestamp,
+            canon_version=self.canon_version,
+        )
+        self.execution_log.append(trace)
+        return trace
+    @abstractmethod
+    async def analyze_with_sacred_chain(self, input_data: str) -> SacredChainTrace:
+        pass
+    def get_audit_summary(self) -> Dict[str, Any]:
+        if not self.execution_log:
+            return {"total_executions": 0, "verdicts": {}, "average_irl_score": 0.0}
+        verdict_counts = {}
+        total_irl_score = 0.0
+        for trace in self.execution_log:
+            verdict = trace.verdict.value
+            verdict_counts[verdict] = verdict_counts.get(verdict, 0) + 1
+            total_irl_score += trace.irl_score
+        return {
+            "total_executions": len(self.execution_log),
+            "verdicts": verdict_counts,
+            "average_irl_score": total_irl_score / len(self.execution_log),
+            "canon_version": self.canon_version,
+            "last_execution": self.execution_log[-1].timestamp if self.execution_log else None,
+        }

rust_crate_pipeline/crate_analysis.py ADDED Viewed

@@ -0,0 +1,54 @@
+import subprocess
+import tempfile
+import shutil
+import os
+import toml
+from typing import Dict, Any, Optional
+class CrateAnalyzer:
+    def __init__(self, crate_source_path: str):
+        self.crate_source_path = crate_source_path
+    def run_cargo_cmd(self, cmd, timeout=600) -> Dict[str, Any]:
+        try:
+            result = subprocess.run(
+                cmd,
+                cwd=self.crate_source_path,
+                capture_output=True,
+                text=True,
+                timeout=timeout
+            )
+            return {
+                "cmd": " ".join(cmd),
+                "returncode": result.returncode,
+                "stdout": result.stdout,
+                "stderr": result.stderr,
+            }
+        except Exception as e:
+            return {"cmd": " ".join(cmd), "error": str(e)}
+    def analyze(self) -> Dict[str, Any]:
+        results = {}
+        # Build & test
+        results["build"] = self.run_cargo_cmd(["cargo", "build", "--all-features"])
+        results["test"] = self.run_cargo_cmd(["cargo", "test", "--all-features"])
+        # Lint & format
+        results["clippy"] = self.run_cargo_cmd(["cargo", "clippy", "--all-features", "--", "-D", "warnings"])
+        results["fmt"] = self.run_cargo_cmd(["cargo", "fmt", "--", "--check"])
+        # Security
+        results["audit"] = self.run_cargo_cmd(["cargo", "audit"])
+        # Dependency graph
+        results["tree"] = self.run_cargo_cmd(["cargo", "tree"])
+        # Documentation
+        results["doc"] = self.run_cargo_cmd(["cargo", "doc", "--no-deps"])
+        # Provenance
+        vcs_info_path = os.path.join(self.crate_source_path, ".cargo_vcs_info.json")
+        if os.path.exists(vcs_info_path):
+            with open(vcs_info_path) as f:
+                results["vcs_info"] = f.read()
+        # Metadata
+        cargo_toml = os.path.join(self.crate_source_path, "Cargo.toml")
+        if os.path.exists(cargo_toml):
+            with open(cargo_toml) as f:
+                results["metadata"] = toml.load(f)
+        return results

rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

rust-crate-pipeline 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl