PyPI - ossuary-risk - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ossuary-risk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

ossuary/__init__.py +7 -0
ossuary/api/__init__.py +1 -0
ossuary/api/main.py +173 -0
ossuary/cli.py +309 -0
ossuary/collectors/__init__.py +8 -0
ossuary/collectors/base.py +26 -0
ossuary/collectors/git.py +231 -0
ossuary/collectors/github.py +495 -0
ossuary/collectors/npm.py +113 -0
ossuary/collectors/pypi.py +118 -0
ossuary/db/__init__.py +15 -0
ossuary/db/models.py +197 -0
ossuary/db/session.py +49 -0
ossuary/scoring/__init__.py +16 -0
ossuary/scoring/engine.py +318 -0
ossuary/scoring/factors.py +175 -0
ossuary/scoring/reputation.py +326 -0
ossuary/sentiment/__init__.py +5 -0
ossuary/sentiment/analyzer.py +232 -0
ossuary_risk-0.1.0.dist-info/METADATA +241 -0
ossuary_risk-0.1.0.dist-info/RECORD +23 -0
ossuary_risk-0.1.0.dist-info/WHEEL +4 -0
ossuary_risk-0.1.0.dist-info/entry_points.txt +2 -0

ossuary/scoring/engine.py ADDED Viewed

@@ -0,0 +1,318 @@
+"""Risk scoring engine implementation."""
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Optional
+from ossuary.scoring.factors import ProtectiveFactors, RiskBreakdown, RiskLevel
+from ossuary.scoring.reputation import ReputationBreakdown, ReputationScorer
+@dataclass
+class PackageMetrics:
+    """Collected metrics for a package."""
+    # Core metrics from git history
+    maintainer_concentration: float = 0.0
+    commits_last_year: int = 0
+    unique_contributors: int = 0
+    top_contributor_email: str = ""
+    top_contributor_name: str = ""
+    last_commit_date: Optional[datetime] = None
+    # External API data
+    weekly_downloads: int = 0
+    # Maintainer info (basic)
+    maintainer_username: Optional[str] = None
+    maintainer_public_repos: int = 0
+    maintainer_total_stars: int = 0
+    has_github_sponsors: bool = False
+    # Reputation data (for composite scoring)
+    maintainer_account_created: Optional[datetime] = None
+    maintainer_repos: list[dict] = None  # Full repo data
+    maintainer_sponsor_count: int = 0
+    maintainer_orgs: list[str] = None
+    packages_maintained: list[str] = None  # Packages by this maintainer
+    # Computed reputation
+    reputation: Optional[ReputationBreakdown] = None
+    # Repository info
+    is_org_owned: bool = False
+    org_admin_count: int = 0
+    cii_badge_level: str = "none"
+    # Sentiment analysis results
+    average_sentiment: float = 0.0
+    frustration_detected: bool = False
+    frustration_evidence: list[str] = None
+    def __post_init__(self):
+        if self.frustration_evidence is None:
+            self.frustration_evidence = []
+        if self.maintainer_repos is None:
+            self.maintainer_repos = []
+        if self.maintainer_orgs is None:
+            self.maintainer_orgs = []
+        if self.packages_maintained is None:
+            self.packages_maintained = []
+class RiskScorer:
+    """
+    Risk scoring engine implementing the ossuary methodology.
+    Score = Base Risk + Activity Modifier + Protective Factors
+    Range: 0-100 (higher = riskier)
+    """
+    # Tier-1 maintainer thresholds
+    TIER1_REPOS_THRESHOLD = 500
+    TIER1_STARS_THRESHOLD = 100_000
+    # Download thresholds for visibility factor
+    MASSIVE_VISIBILITY_THRESHOLD = 50_000_000
+    HIGH_VISIBILITY_THRESHOLD = 10_000_000
+    def calculate_base_risk(self, concentration: float) -> int:
+        """
+        Calculate base risk from maintainer concentration.
+        Args:
+            concentration: Percentage of commits from top contributor (0-100)
+        Returns:
+            Base risk score (20-100)
+        """
+        if concentration < 30:
+            return 20
+        elif concentration < 50:
+            return 40
+        elif concentration < 70:
+            return 60
+        elif concentration < 90:
+            return 80
+        else:
+            return 100
+    def calculate_activity_modifier(self, commits_last_year: int) -> int:
+        """
+        Calculate activity modifier from commit frequency.
+        Args:
+            commits_last_year: Number of commits in the last 12 months
+        Returns:
+            Activity modifier (-30 to +20)
+        """
+        if commits_last_year > 50:
+            return -30  # Active: reduces risk significantly
+        elif commits_last_year >= 12:
+            return -15  # Moderate: reduces risk somewhat
+        elif commits_last_year >= 4:
+            return 0  # Low: neutral
+        else:
+            return 20  # Abandoned: increases risk critically
+    def calculate_protective_factors(
+        self, metrics: PackageMetrics, ecosystem: str = "npm"
+    ) -> ProtectiveFactors:
+        """
+        Calculate all protective factors.
+        Args:
+            metrics: Collected package metrics
+            ecosystem: Package ecosystem for reputation lookup
+        Returns:
+            ProtectiveFactors breakdown
+        """
+        pf = ProtectiveFactors()
+        # Factor 1: Maintainer Reputation (composite score)
+        if metrics.reputation:
+            # Use pre-calculated reputation
+            reputation = metrics.reputation
+        else:
+            # Calculate reputation on the fly
+            reputation_scorer = ReputationScorer()
+            reputation = reputation_scorer.calculate(
+                username=metrics.maintainer_username or "",
+                account_created=metrics.maintainer_account_created,
+                repos=metrics.maintainer_repos,
+                sponsor_count=metrics.maintainer_sponsor_count,
+                orgs=metrics.maintainer_orgs,
+                packages_maintained=metrics.packages_maintained,
+                ecosystem=ecosystem,
+            )
+        pf.reputation_score = reputation.tier.risk_reduction
+        if pf.reputation_score != 0:
+            pf.reputation_evidence = (
+                f"{reputation.username}: {reputation.total_score} pts ({reputation.tier.value}) - "
+                f"tenure={reputation.tenure_score}, portfolio={reputation.portfolio_score}, "
+                f"stars={reputation.stars_score}, sponsors={reputation.sponsors_score}"
+            )
+        # Factor 2: Economic Sustainability (-15)
+        if metrics.has_github_sponsors:
+            pf.funding_score = -15
+            pf.funding_evidence = "GitHub Sponsors enabled"
+        # Factor 3: Organization Ownership (-15)
+        if metrics.is_org_owned and metrics.org_admin_count >= 3:
+            pf.org_score = -15
+        # Factor 4: Download Visibility (-10 to -20)
+        if metrics.weekly_downloads > self.MASSIVE_VISIBILITY_THRESHOLD:
+            pf.visibility_score = -20
+        elif metrics.weekly_downloads > self.HIGH_VISIBILITY_THRESHOLD:
+            pf.visibility_score = -10
+        # Factor 5: Distributed Governance (-10)
+        if metrics.maintainer_concentration < 40:
+            pf.distributed_score = -10
+        # Factor 6: Active Community (-10)
+        if metrics.unique_contributors > 20:
+            pf.community_score = -10
+        # Factor 7: CII Best Practices (-10)
+        if metrics.cii_badge_level in ("gold", "silver", "passing"):
+            pf.cii_score = -10
+        # Factor 8: Economic Frustration (+20)
+        if metrics.frustration_detected:
+            pf.frustration_score = 20
+            pf.frustration_evidence = metrics.frustration_evidence
+        # Factor 9: Sentiment Analysis (-10 to +10)
+        # Negative sentiment (< -0.3) increases risk
+        # Positive sentiment (> 0.3) slightly reduces risk
+        if metrics.average_sentiment < -0.3:
+            pf.sentiment_score = 10
+            pf.sentiment_evidence = ["Negative sentiment detected in communications"]
+        elif metrics.average_sentiment > 0.3:
+            pf.sentiment_score = -5
+        return pf
+    def generate_explanation(self, breakdown: RiskBreakdown) -> str:
+        """Generate human-readable explanation of the score."""
+        parts = []
+        # Concentration explanation
+        conc = breakdown.maintainer_concentration
+        if conc >= 90:
+            parts.append(f"Critical concentration ({conc:.0f}%): single person controls nearly all commits")
+        elif conc >= 70:
+            parts.append(f"High concentration ({conc:.0f}%): majority of commits from one person")
+        elif conc >= 50:
+            parts.append(f"Moderate concentration ({conc:.0f}%): some bus factor risk")
+        else:
+            parts.append(f"Distributed commits ({conc:.0f}%): healthy contributor diversity")
+        # Activity explanation
+        if breakdown.activity_modifier == 20:
+            parts.append("Project appears abandoned (<4 commits/year)")
+        elif breakdown.activity_modifier == -30:
+            parts.append("Actively maintained (>50 commits/year)")
+        elif breakdown.activity_modifier == -15:
+            parts.append("Moderately active (12-50 commits/year)")
+        elif breakdown.activity_modifier == 0:
+            parts.append("Low activity (4-11 commits/year)")
+        # Protective factors summary
+        pf_total = breakdown.protective_factors.total
+        if pf_total < -30:
+            parts.append(f"Strong protective factors ({pf_total:+d} points)")
+        elif pf_total < 0:
+            parts.append(f"Some protective factors ({pf_total:+d} points)")
+        elif pf_total > 0:
+            parts.append(f"Warning signals present ({pf_total:+d} points)")
+        # Frustration alert
+        if breakdown.protective_factors.frustration_score > 0:
+            parts.append("ALERT: Economic frustration signals detected")
+        return f"{breakdown.risk_level.semaphore} {breakdown.risk_level.value} ({breakdown.final_score}). " + ". ".join(
+            parts
+        )
+    def generate_recommendations(self, breakdown: RiskBreakdown) -> list[str]:
+        """Generate actionable recommendations based on the score."""
+        recs = []
+        if breakdown.final_score >= 80:
+            recs.append("IMMEDIATE: Identify alternative packages or prepare to fork")
+            recs.append("Do not accept new versions without manual code review")
+            recs.append("Monitor for maintainer changes or ownership transfers")
+        elif breakdown.final_score >= 60:
+            recs.append("Review new releases carefully before updating")
+            recs.append("Consider contributing to reduce maintainer concentration")
+            recs.append("Monitor project health metrics monthly")
+        elif breakdown.final_score >= 40:
+            recs.append("Standard monitoring recommended")
+            recs.append("Keep dependencies updated")
+        else:
+            recs.append("Low risk - standard dependency management practices apply")
+        # Specific recommendations
+        if breakdown.protective_factors.frustration_score > 0:
+            recs.insert(0, "URGENT: Maintainer frustration detected - elevated sabotage risk")
+        if breakdown.maintainer_concentration > 90 and breakdown.commits_last_year < 10:
+            recs.insert(0, "HIGH PRIORITY: Single maintainer + low activity = prime takeover target")
+        return recs
+    def calculate(
+        self,
+        package_name: str,
+        ecosystem: str,
+        metrics: PackageMetrics,
+        repo_url: Optional[str] = None,
+    ) -> RiskBreakdown:
+        """
+        Calculate complete risk score for a package.
+        Args:
+            package_name: Name of the package
+            ecosystem: Package ecosystem (npm, pypi)
+            metrics: Collected package metrics
+            repo_url: Repository URL (optional)
+        Returns:
+            Complete RiskBreakdown
+        """
+        breakdown = RiskBreakdown(
+            package_name=package_name,
+            ecosystem=ecosystem,
+            repo_url=repo_url,
+        )
+        # Copy metrics
+        breakdown.maintainer_concentration = metrics.maintainer_concentration
+        breakdown.commits_last_year = metrics.commits_last_year
+        breakdown.unique_contributors = metrics.unique_contributors
+        breakdown.weekly_downloads = metrics.weekly_downloads
+        # Calculate components
+        breakdown.base_risk = self.calculate_base_risk(metrics.maintainer_concentration)
+        breakdown.activity_modifier = self.calculate_activity_modifier(metrics.commits_last_year)
+        breakdown.protective_factors = self.calculate_protective_factors(metrics, ecosystem)
+        # Calculate final score (clamped to 0-100)
+        raw_score = breakdown.base_risk + breakdown.activity_modifier + breakdown.protective_factors.total
+        breakdown.final_score = max(0, min(100, raw_score))
+        # Determine risk level
+        breakdown.risk_level = RiskLevel.from_score(breakdown.final_score)
+        # Generate explanation and recommendations
+        breakdown.explanation = self.generate_explanation(breakdown)
+        breakdown.recommendations = self.generate_recommendations(breakdown)
+        return breakdown

ossuary/scoring/factors.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""Risk scoring factors and data structures."""
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional
+class RiskLevel(str, Enum):
+    """Risk level classification."""
+    CRITICAL = "CRITICAL"
+    HIGH = "HIGH"
+    MODERATE = "MODERATE"
+    LOW = "LOW"
+    VERY_LOW = "VERY_LOW"
+    @classmethod
+    def from_score(cls, score: int) -> "RiskLevel":
+        """Get risk level from numeric score."""
+        if score >= 80:
+            return cls.CRITICAL
+        elif score >= 60:
+            return cls.HIGH
+        elif score >= 40:
+            return cls.MODERATE
+        elif score >= 20:
+            return cls.LOW
+        else:
+            return cls.VERY_LOW
+    @property
+    def semaphore(self) -> str:
+        """Get semaphore emoji for this risk level."""
+        return {
+            RiskLevel.CRITICAL: "🔴",
+            RiskLevel.HIGH: "🟠",
+            RiskLevel.MODERATE: "🟡",
+            RiskLevel.LOW: "🟢",
+            RiskLevel.VERY_LOW: "🟢",
+        }[self]
+    @property
+    def description(self) -> str:
+        """Human-readable description of the risk level."""
+        return {
+            RiskLevel.CRITICAL: "Immediate risk - action required",
+            RiskLevel.HIGH: "Elevated risk - intervention recommended",
+            RiskLevel.MODERATE: "Requires active monitoring",
+            RiskLevel.LOW: "Minor concerns, generally stable",
+            RiskLevel.VERY_LOW: "Safe, well-governed package",
+        }[self]
+@dataclass
+class ProtectiveFactors:
+    """Breakdown of protective factors that reduce risk."""
+    # Factor scores (negative = reduces risk, positive = increases risk)
+    reputation_score: int = 0  # -25 for tier-1 maintainer
+    funding_score: int = 0  # -15 for GitHub Sponsors
+    org_score: int = 0  # -15 for org with 3+ admins
+    visibility_score: int = 0  # -20 for >50M downloads, -10 for >10M
+    distributed_score: int = 0  # -10 for <40% concentration
+    community_score: int = 0  # -10 for >20 contributors
+    cii_score: int = 0  # -10 for CII badge
+    frustration_score: int = 0  # +20 for detected frustration
+    sentiment_score: int = 0  # -10 to +20 based on sentiment analysis
+    # Evidence for each factor
+    reputation_evidence: Optional[str] = None
+    funding_evidence: Optional[str] = None
+    frustration_evidence: list[str] = field(default_factory=list)
+    sentiment_evidence: list[str] = field(default_factory=list)
+    @property
+    def total(self) -> int:
+        """Calculate total protective factor modifier."""
+        return (
+            self.reputation_score
+            + self.funding_score
+            + self.org_score
+            + self.visibility_score
+            + self.distributed_score
+            + self.community_score
+            + self.cii_score
+            + self.frustration_score
+            + self.sentiment_score
+        )
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "reputation": {
+                "score": self.reputation_score,
+                "evidence": self.reputation_evidence,
+            },
+            "funding": {"score": self.funding_score, "evidence": self.funding_evidence},
+            "organization": {"score": self.org_score},
+            "visibility": {"score": self.visibility_score},
+            "distributed_governance": {"score": self.distributed_score},
+            "community": {"score": self.community_score},
+            "cii_badge": {"score": self.cii_score},
+            "frustration": {
+                "score": self.frustration_score,
+                "evidence": self.frustration_evidence,
+            },
+            "sentiment": {
+                "score": self.sentiment_score,
+                "evidence": self.sentiment_evidence,
+            },
+            "total": self.total,
+        }
+@dataclass
+class RiskBreakdown:
+    """Complete risk assessment result."""
+    # Package identification
+    package_name: str
+    ecosystem: str
+    repo_url: Optional[str] = None
+    # Core metrics
+    maintainer_concentration: float = 0.0
+    commits_last_year: int = 0
+    unique_contributors: int = 0
+    weekly_downloads: int = 0
+    # Score components
+    base_risk: int = 0
+    activity_modifier: int = 0
+    protective_factors: ProtectiveFactors = field(default_factory=ProtectiveFactors)
+    # Final score
+    final_score: int = 0
+    risk_level: RiskLevel = RiskLevel.VERY_LOW
+    # Explanation
+    explanation: str = ""
+    recommendations: list[str] = field(default_factory=list)
+    # Data completeness tracking
+    data_sources: dict[str, bool] = field(default_factory=dict)
+    warnings: list[str] = field(default_factory=list)
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "package": {
+                "name": self.package_name,
+                "ecosystem": self.ecosystem,
+                "repo_url": self.repo_url,
+            },
+            "metrics": {
+                "maintainer_concentration": self.maintainer_concentration,
+                "commits_last_year": self.commits_last_year,
+                "unique_contributors": self.unique_contributors,
+                "weekly_downloads": self.weekly_downloads,
+            },
+            "score": {
+                "final": self.final_score,
+                "risk_level": self.risk_level.value,
+                "semaphore": self.risk_level.semaphore,
+                "components": {
+                    "base_risk": self.base_risk,
+                    "activity_modifier": self.activity_modifier,
+                    "protective_factors": self.protective_factors.to_dict(),
+                },
+            },
+            "explanation": self.explanation,
+            "recommendations": self.recommendations,
+            "data_sources": self.data_sources,
+            "warnings": self.warnings,
+        }