PyPI - ossuary-risk - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ossuary-risk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

ossuary/__init__.py +7 -0
ossuary/api/__init__.py +1 -0
ossuary/api/main.py +173 -0
ossuary/cli.py +309 -0
ossuary/collectors/__init__.py +8 -0
ossuary/collectors/base.py +26 -0
ossuary/collectors/git.py +231 -0
ossuary/collectors/github.py +495 -0
ossuary/collectors/npm.py +113 -0
ossuary/collectors/pypi.py +118 -0
ossuary/db/__init__.py +15 -0
ossuary/db/models.py +197 -0
ossuary/db/session.py +49 -0
ossuary/scoring/__init__.py +16 -0
ossuary/scoring/engine.py +318 -0
ossuary/scoring/factors.py +175 -0
ossuary/scoring/reputation.py +326 -0
ossuary/sentiment/__init__.py +5 -0
ossuary/sentiment/analyzer.py +232 -0
ossuary_risk-0.1.0.dist-info/METADATA +241 -0
ossuary_risk-0.1.0.dist-info/RECORD +23 -0
ossuary_risk-0.1.0.dist-info/WHEEL +4 -0
ossuary_risk-0.1.0.dist-info/entry_points.txt +2 -0

ossuary/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+Ossuary - OSS Supply Chain Risk Scoring
+Where abandoned packages come to rest.
+"""
+__version__ = "0.1.0"

ossuary/api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """API module for ossuary."""

ossuary/api/main.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""FastAPI application for ossuary."""
+from datetime import datetime
+from typing import Optional
+from fastapi import FastAPI, HTTPException, Query
+from pydantic import BaseModel
+from ossuary import __version__
+from ossuary.collectors.git import GitCollector
+from ossuary.collectors.github import GitHubCollector
+from ossuary.collectors.npm import NpmCollector
+from ossuary.collectors.pypi import PyPICollector
+from ossuary.scoring.engine import PackageMetrics, RiskScorer
+from ossuary.scoring.factors import RiskLevel
+from ossuary.sentiment.analyzer import SentimentAnalyzer
+app = FastAPI(
+    title="Ossuary",
+    description="OSS Supply Chain Risk Scoring API - Where abandoned packages come to rest",
+    version=__version__,
+)
+# Response models
+class ScoreResponse(BaseModel):
+    """Response model for score endpoint."""
+    package: str
+    ecosystem: str
+    repo_url: Optional[str]
+    score: int
+    risk_level: str
+    semaphore: str
+    explanation: str
+    breakdown: dict
+    recommendations: list[str]
+class HealthResponse(BaseModel):
+    """Health check response."""
+    status: str
+    version: str
+@app.get("/health", response_model=HealthResponse)
+async def health():
+    """Health check endpoint."""
+    return HealthResponse(status="healthy", version=__version__)
+@app.get("/score/{ecosystem}/{package:path}", response_model=ScoreResponse)
+async def get_score(
+    ecosystem: str,
+    package: str,
+    repo_url: Optional[str] = Query(None, description="Repository URL (auto-detected if not provided)"),
+    cutoff_date: Optional[str] = Query(None, description="Cutoff date for T-1 analysis (YYYY-MM-DD)"),
+):
+    """
+    Calculate risk score for a package.
+    Args:
+        ecosystem: Package ecosystem (npm or pypi)
+        package: Package name
+        repo_url: Optional repository URL
+        cutoff_date: Optional cutoff date for historical analysis
+    """
+    if ecosystem not in ("npm", "pypi"):
+        raise HTTPException(status_code=400, detail=f"Unsupported ecosystem: {ecosystem}")
+    cutoff = None
+    if cutoff_date:
+        try:
+            cutoff = datetime.strptime(cutoff_date, "%Y-%m-%d")
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid date format. Use YYYY-MM-DD")
+    try:
+        # Get package info
+        if ecosystem == "npm":
+            pkg_collector = NpmCollector()
+            pkg_data = await pkg_collector.collect(package)
+            await pkg_collector.close()
+            if not repo_url:
+                repo_url = pkg_data.repository_url
+            weekly_downloads = pkg_data.weekly_downloads
+        else:  # pypi
+            pkg_collector = PyPICollector()
+            pkg_data = await pkg_collector.collect(package)
+            await pkg_collector.close()
+            if not repo_url:
+                repo_url = pkg_data.repository_url
+            weekly_downloads = pkg_data.weekly_downloads
+        if not repo_url:
+            raise HTTPException(
+                status_code=400,
+                detail="Could not find repository URL. Please provide with repo_url query parameter",
+            )
+        # Collect git data
+        git_collector = GitCollector()
+        git_metrics = await git_collector.collect(repo_url, cutoff)
+        # Collect GitHub data
+        github_collector = GitHubCollector()
+        github_data = await github_collector.collect(repo_url)
+        await github_collector.close()
+        # Run sentiment analysis
+        sentiment_analyzer = SentimentAnalyzer()
+        commit_sentiment = sentiment_analyzer.analyze_commits([c.message for c in git_metrics.commits])
+        issue_sentiment = sentiment_analyzer.analyze_issues(
+            [{"title": i.title, "body": i.body, "comments": i.comments} for i in github_data.issues]
+        )
+        # Aggregate sentiment
+        total_frustration = commit_sentiment.frustration_count + issue_sentiment.frustration_count
+        avg_sentiment = (commit_sentiment.average_compound + issue_sentiment.average_compound) / 2
+        # Build metrics
+        metrics = PackageMetrics(
+            maintainer_concentration=git_metrics.maintainer_concentration,
+            commits_last_year=git_metrics.commits_last_year,
+            unique_contributors=git_metrics.unique_contributors,
+            top_contributor_email=git_metrics.top_contributor_email,
+            last_commit_date=git_metrics.last_commit_date,
+            weekly_downloads=weekly_downloads,
+            maintainer_username=github_data.maintainer_username,
+            maintainer_public_repos=github_data.maintainer_public_repos,
+            maintainer_total_stars=github_data.maintainer_total_stars,
+            has_github_sponsors=github_data.has_github_sponsors,
+            is_org_owned=github_data.is_org_owned,
+            org_admin_count=github_data.org_admin_count,
+            average_sentiment=avg_sentiment,
+            frustration_detected=total_frustration > 0,
+            frustration_evidence=commit_sentiment.frustration_evidence + issue_sentiment.frustration_evidence,
+        )
+        # Calculate score
+        scorer = RiskScorer()
+        breakdown = scorer.calculate(package, ecosystem, metrics, repo_url)
+        return ScoreResponse(
+            package=package,
+            ecosystem=ecosystem,
+            repo_url=repo_url,
+            score=breakdown.final_score,
+            risk_level=breakdown.risk_level.value,
+            semaphore=breakdown.risk_level.semaphore,
+            explanation=breakdown.explanation,
+            breakdown=breakdown.to_dict()["score"]["components"],
+            recommendations=breakdown.recommendations,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def root():
+    """Root endpoint with API info."""
+    return {
+        "name": "Ossuary",
+        "description": "OSS Supply Chain Risk Scoring API",
+        "version": __version__,
+        "docs": "/docs",
+        "endpoints": {
+            "score": "/score/{ecosystem}/{package}",
+            "health": "/health",
+        },
+    }

ossuary/cli.py ADDED Viewed

@@ -0,0 +1,309 @@
+"""Command-line interface for ossuary."""
+import asyncio
+import json
+import sys
+from datetime import datetime
+from typing import Optional
+import typer
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+from ossuary import __version__
+from ossuary.collectors.git import GitCollector
+from ossuary.collectors.github import GitHubCollector
+from ossuary.collectors.npm import NpmCollector
+from ossuary.collectors.pypi import PyPICollector
+from ossuary.db.session import init_db
+from ossuary.scoring.engine import PackageMetrics, RiskScorer
+from ossuary.scoring.factors import RiskLevel
+from ossuary.scoring.reputation import ReputationScorer
+from ossuary.sentiment.analyzer import SentimentAnalyzer
+app = typer.Typer(
+    name="ossuary",
+    help="OSS Supply Chain Risk Scoring - Where abandoned packages come to rest",
+    add_completion=False,
+)
+console = Console()
+def version_callback(value: bool):
+    if value:
+        console.print(f"ossuary version {__version__}")
+        raise typer.Exit()
+@app.callback()
+def main(
+    version: bool = typer.Option(
+        None,
+        "--version",
+        "-v",
+        callback=version_callback,
+        is_eager=True,
+        help="Show version and exit",
+    ),
+):
+    """Ossuary - OSS Supply Chain Risk Scoring."""
+    pass
+@app.command()
+def init():
+    """Initialize the database."""
+    console.print("Initializing database...")
+    init_db()
+    console.print("[green]Database initialized successfully[/green]")
+@app.command()
+def score(
+    package: str = typer.Argument(..., help="Package name to analyze"),
+    ecosystem: str = typer.Option("npm", "--ecosystem", "-e", help="Package ecosystem (npm, pypi)"),
+    repo_url: Optional[str] = typer.Option(None, "--repo", "-r", help="Repository URL (auto-detected if not provided)"),
+    cutoff_date: Optional[str] = typer.Option(None, "--cutoff", "-c", help="Cutoff date for T-1 analysis (YYYY-MM-DD)"),
+    output_json: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
+):
+    """Calculate risk score for a package."""
+    asyncio.run(_score_package(package, ecosystem, repo_url, cutoff_date, output_json))
+async def _score_package(
+    package: str,
+    ecosystem: str,
+    repo_url: Optional[str],
+    cutoff_date: Optional[str],
+    output_json: bool,
+):
+    """Internal async function to score a package."""
+    cutoff = None
+    if cutoff_date:
+        try:
+            cutoff = datetime.strptime(cutoff_date, "%Y-%m-%d")
+        except ValueError:
+            console.print("[red]Invalid date format. Use YYYY-MM-DD[/red]")
+            raise typer.Exit(1)
+    with console.status(f"[bold blue]Analyzing {package}...[/bold blue]"):
+        # Get package info
+        if ecosystem == "npm":
+            pkg_collector = NpmCollector()
+            pkg_data = await pkg_collector.collect(package)
+            await pkg_collector.close()
+            if not repo_url:
+                repo_url = pkg_data.repository_url
+            weekly_downloads = pkg_data.weekly_downloads
+        elif ecosystem == "pypi":
+            pkg_collector = PyPICollector()
+            pkg_data = await pkg_collector.collect(package)
+            await pkg_collector.close()
+            if not repo_url:
+                repo_url = pkg_data.repository_url
+            weekly_downloads = pkg_data.weekly_downloads
+        else:
+            console.print(f"[red]Unsupported ecosystem: {ecosystem}[/red]")
+            raise typer.Exit(1)
+        if not repo_url:
+            console.print("[red]Could not find repository URL. Please provide with --repo[/red]")
+            raise typer.Exit(1)
+        console.print(f"  Repository: {repo_url}")
+        # Collect git data
+        console.print("  Collecting git history...")
+        git_collector = GitCollector()
+        git_metrics = await git_collector.collect(repo_url, cutoff)
+        # Try to find top contributor's GitHub username from git email
+        top_contributor_username = None
+        if git_metrics.top_contributor_email:
+            # Try to extract username from email (e.g., user@users.noreply.github.com)
+            email = git_metrics.top_contributor_email
+            if "noreply.github.com" in email:
+                # Format: username@users.noreply.github.com or 12345+username@users.noreply.github.com
+                parts = email.split("@")[0]
+                if "+" in parts:
+                    top_contributor_username = parts.split("+")[1]
+                else:
+                    top_contributor_username = parts
+            # Otherwise we'll rely on the git author name or repo owner
+        # Collect GitHub data (pass top contributor info to get correct maintainer data)
+        console.print("  Collecting GitHub data...")
+        github_collector = GitHubCollector()
+        github_data = await github_collector.collect(
+            repo_url,
+            top_contributor_username=top_contributor_username,
+            top_contributor_email=git_metrics.top_contributor_email,
+        )
+        await github_collector.close()
+        # Parse account created date
+        maintainer_account_created = None
+        if github_data.maintainer_account_created:
+            try:
+                maintainer_account_created = datetime.fromisoformat(
+                    github_data.maintainer_account_created.replace("Z", "+00:00")
+                )
+            except ValueError:
+                pass
+        # Calculate reputation score
+        console.print("  Calculating reputation...")
+        reputation_scorer = ReputationScorer()
+        reputation = reputation_scorer.calculate(
+            username=github_data.maintainer_username,
+            account_created=maintainer_account_created,
+            repos=github_data.maintainer_repos,
+            sponsor_count=github_data.maintainer_sponsor_count,
+            orgs=github_data.maintainer_orgs,
+            packages_maintained=[package],  # At minimum, they maintain this package
+            ecosystem=ecosystem,
+        )
+        # Run sentiment analysis
+        console.print("  Analyzing sentiment...")
+        sentiment_analyzer = SentimentAnalyzer()
+        commit_sentiment = sentiment_analyzer.analyze_commits([c.message for c in git_metrics.commits])
+        issue_sentiment = sentiment_analyzer.analyze_issues(
+            [{"title": i.title, "body": i.body, "comments": i.comments} for i in github_data.issues]
+        )
+        # Aggregate sentiment
+        total_frustration = commit_sentiment.frustration_count + issue_sentiment.frustration_count
+        avg_sentiment = (commit_sentiment.average_compound + issue_sentiment.average_compound) / 2
+        # Build metrics
+        metrics = PackageMetrics(
+            maintainer_concentration=git_metrics.maintainer_concentration,
+            commits_last_year=git_metrics.commits_last_year,
+            unique_contributors=git_metrics.unique_contributors,
+            top_contributor_email=git_metrics.top_contributor_email,
+            top_contributor_name=git_metrics.top_contributor_name,
+            last_commit_date=git_metrics.last_commit_date,
+            weekly_downloads=weekly_downloads,
+            maintainer_username=github_data.maintainer_username,
+            maintainer_public_repos=github_data.maintainer_public_repos,
+            maintainer_total_stars=github_data.maintainer_total_stars,
+            has_github_sponsors=github_data.has_github_sponsors,
+            maintainer_account_created=maintainer_account_created,
+            maintainer_repos=github_data.maintainer_repos,
+            maintainer_sponsor_count=github_data.maintainer_sponsor_count,
+            maintainer_orgs=github_data.maintainer_orgs,
+            packages_maintained=[package],
+            reputation=reputation,
+            is_org_owned=github_data.is_org_owned,
+            org_admin_count=github_data.org_admin_count,
+            average_sentiment=avg_sentiment,
+            frustration_detected=total_frustration > 0,
+            frustration_evidence=commit_sentiment.frustration_evidence + issue_sentiment.frustration_evidence,
+        )
+        # Calculate score
+        scorer = RiskScorer()
+        breakdown = scorer.calculate(package, ecosystem, metrics, repo_url)
+    # Output results
+    if output_json:
+        console.print(json.dumps(breakdown.to_dict(), indent=2))
+    else:
+        _display_results(breakdown, git_metrics, github_data, commit_sentiment, issue_sentiment)
+def _display_results(breakdown, git_metrics, github_data, commit_sentiment, issue_sentiment):
+    """Display results in a formatted way."""
+    # Semaphore color
+    color = {
+        RiskLevel.CRITICAL: "red",
+        RiskLevel.HIGH: "orange1",
+        RiskLevel.MODERATE: "yellow",
+        RiskLevel.LOW: "green",
+        RiskLevel.VERY_LOW: "green",
+    }[breakdown.risk_level]
+    # Main score panel
+    score_text = f"[bold {color}]{breakdown.risk_level.semaphore} {breakdown.final_score} - {breakdown.risk_level.value}[/bold {color}]"
+    console.print(Panel(score_text, title=f"[bold]{breakdown.package_name}[/bold]", border_style=color))
+    # Score breakdown table
+    table = Table(title="Score Breakdown")
+    table.add_column("Component", style="cyan")
+    table.add_column("Value", style="magenta")
+    table.add_column("Points", justify="right")
+    table.add_row(
+        "Base Risk (Concentration)",
+        f"{breakdown.maintainer_concentration:.0f}%",
+        f"{breakdown.base_risk:+d}",
+    )
+    table.add_row(
+        "Activity Modifier",
+        f"{breakdown.commits_last_year} commits/yr",
+        f"{breakdown.activity_modifier:+d}",
+    )
+    table.add_row(
+        "Protective Factors",
+        "",
+        f"{breakdown.protective_factors.total:+d}",
+    )
+    table.add_section()
+    table.add_row("[bold]Final Score[/bold]", "", f"[bold]{breakdown.final_score}[/bold]")
+    console.print(table)
+    # Protective factors detail
+    pf = breakdown.protective_factors
+    pf_table = Table(title="Protective Factors Detail")
+    pf_table.add_column("Factor", style="cyan")
+    pf_table.add_column("Points", justify="right")
+    pf_table.add_column("Evidence")
+    if pf.reputation_score != 0:
+        pf_table.add_row("Tier-1 Reputation", f"{pf.reputation_score:+d}", pf.reputation_evidence or "")
+    if pf.funding_score != 0:
+        pf_table.add_row("GitHub Sponsors", f"{pf.funding_score:+d}", pf.funding_evidence or "")
+    if pf.org_score != 0:
+        pf_table.add_row("Organization", f"{pf.org_score:+d}", f"{github_data.org_admin_count} admins")
+    if pf.visibility_score != 0:
+        pf_table.add_row("Visibility", f"{pf.visibility_score:+d}", f"{breakdown.weekly_downloads:,} downloads/wk")
+    if pf.distributed_score != 0:
+        pf_table.add_row("Distributed", f"{pf.distributed_score:+d}", f"<40% concentration")
+    if pf.community_score != 0:
+        pf_table.add_row("Community", f"{pf.community_score:+d}", f"{breakdown.unique_contributors} contributors")
+    if pf.frustration_score != 0:
+        pf_table.add_row(
+            "[red]Frustration[/red]",
+            f"[red]{pf.frustration_score:+d}[/red]",
+            "; ".join(pf.frustration_evidence[:2]) if pf.frustration_evidence else "",
+        )
+    if pf.sentiment_score != 0:
+        pf_table.add_row("Sentiment", f"{pf.sentiment_score:+d}", "")
+    if pf_table.row_count > 0:
+        console.print(pf_table)
+    # Explanation
+    console.print(f"\n[bold]Explanation:[/bold] {breakdown.explanation}")
+    # Recommendations
+    console.print("\n[bold]Recommendations:[/bold]")
+    for rec in breakdown.recommendations:
+        console.print(f"  • {rec}")
+@app.command()
+def check(
+    packages_file: str = typer.Argument(..., help="JSON file with packages to check"),
+    output: Optional[str] = typer.Option(None, "--output", "-o", help="Output JSON file"),
+):
+    """Check multiple packages from a JSON file."""
+    console.print(f"[yellow]Batch checking not yet implemented[/yellow]")
+    raise typer.Exit(1)
+if __name__ == "__main__":
+    app()

ossuary/collectors/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Data collectors for various sources."""
+from ossuary.collectors.git import GitCollector
+from ossuary.collectors.github import GitHubCollector
+from ossuary.collectors.npm import NpmCollector
+from ossuary.collectors.pypi import PyPICollector
+__all__ = ["GitCollector", "GitHubCollector", "NpmCollector", "PyPICollector"]

ossuary/collectors/base.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""Base collector interface."""
+from abc import ABC, abstractmethod
+from typing import Any
+class BaseCollector(ABC):
+    """Abstract base class for data collectors."""
+    @abstractmethod
+    async def collect(self, identifier: str) -> dict[str, Any]:
+        """
+        Collect data for the given identifier.
+        Args:
+            identifier: Package name, repo URL, or other identifier
+        Returns:
+            Dictionary of collected data
+        """
+        pass
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Check if this collector is available (has required credentials, etc.)."""
+        pass