ossuary-risk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ """PyPI registry collector."""
2
+
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+ import httpx
8
+
9
+ from ossuary.collectors.base import BaseCollector
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class PyPIData:
16
+ """Data collected from PyPI."""
17
+
18
+ name: str = ""
19
+ version: str = ""
20
+ description: str = ""
21
+ homepage: str = ""
22
+ repository_url: str = ""
23
+ weekly_downloads: int = 0
24
+ maintainers: list[str] = None
25
+
26
+ def __post_init__(self):
27
+ if self.maintainers is None:
28
+ self.maintainers = []
29
+
30
+
31
+ class PyPICollector(BaseCollector):
32
+ """Collector for PyPI data."""
33
+
34
+ PYPI_URL = "https://pypi.org/pypi"
35
+ STATS_URL = "https://pypistats.org/api"
36
+
37
+ def __init__(self):
38
+ """Initialize PyPI collector."""
39
+ self.client = httpx.AsyncClient(timeout=30.0)
40
+
41
+ def is_available(self) -> bool:
42
+ """PyPI collector is always available."""
43
+ return True
44
+
45
+ async def get_package_info(self, package_name: str) -> Optional[dict]:
46
+ """Get package metadata from PyPI."""
47
+ try:
48
+ response = await self.client.get(f"{self.PYPI_URL}/{package_name}/json")
49
+ if response.status_code == 200:
50
+ return response.json()
51
+ except httpx.HTTPError as e:
52
+ logger.error(f"PyPI API error: {e}")
53
+ return None
54
+
55
+ async def get_weekly_downloads(self, package_name: str) -> int:
56
+ """Get approximate weekly download count."""
57
+ try:
58
+ response = await self.client.get(f"{self.STATS_URL}/packages/{package_name}/recent")
59
+ if response.status_code == 200:
60
+ data = response.json().get("data", {})
61
+ monthly = data.get("last_month", 0)
62
+ return monthly // 4 # Approximate weekly
63
+ except httpx.HTTPError as e:
64
+ logger.error(f"PyPI stats error: {e}")
65
+ return 0
66
+
67
+ def _extract_repo_url(self, info: dict) -> str:
68
+ """Extract repository URL from package info."""
69
+ # Check project_urls first
70
+ project_urls = info.get("project_urls", {}) or {}
71
+ for key in ["Repository", "Source", "Source Code", "GitHub", "Code"]:
72
+ if key in project_urls:
73
+ return project_urls[key]
74
+
75
+ # Check home_page
76
+ home_page = info.get("home_page", "") or ""
77
+ if "github.com" in home_page or "gitlab.com" in home_page:
78
+ return home_page
79
+
80
+ return ""
81
+
82
+ async def collect(self, package_name: str) -> PyPIData:
83
+ """
84
+ Collect PyPI package data.
85
+
86
+ Args:
87
+ package_name: PyPI package name
88
+
89
+ Returns:
90
+ PyPIData with package information
91
+ """
92
+ data = PyPIData(name=package_name)
93
+
94
+ # Get package metadata
95
+ pkg_info = await self.get_package_info(package_name)
96
+ if pkg_info:
97
+ info = pkg_info.get("info", {})
98
+ data.version = info.get("version", "")
99
+ data.description = info.get("summary", "")
100
+ data.homepage = info.get("home_page", "")
101
+ data.repository_url = self._extract_repo_url(info)
102
+
103
+ # Get maintainer/author
104
+ author = info.get("author", "")
105
+ maintainer = info.get("maintainer", "")
106
+ if maintainer:
107
+ data.maintainers = [maintainer]
108
+ elif author:
109
+ data.maintainers = [author]
110
+
111
+ # Get download stats
112
+ data.weekly_downloads = await self.get_weekly_downloads(package_name)
113
+
114
+ return data
115
+
116
+ async def close(self):
117
+ """Close the HTTP client."""
118
+ await self.client.aclose()
ossuary/db/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """Database models and session management."""
2
+
3
+ from ossuary.db.models import Base, Package, Commit, Issue, Score, SentimentRecord
4
+ from ossuary.db.session import get_session, init_db
5
+
6
+ __all__ = [
7
+ "Base",
8
+ "Package",
9
+ "Commit",
10
+ "Issue",
11
+ "Score",
12
+ "SentimentRecord",
13
+ "get_session",
14
+ "init_db",
15
+ ]
ossuary/db/models.py ADDED
@@ -0,0 +1,197 @@
1
+ """SQLAlchemy models for ossuary."""
2
+
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Optional
6
+
7
+ from sqlalchemy import (
8
+ JSON,
9
+ DateTime,
10
+ Float,
11
+ ForeignKey,
12
+ Index,
13
+ Integer,
14
+ String,
15
+ Text,
16
+ UniqueConstraint,
17
+ )
18
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
19
+
20
+
21
+ class Base(DeclarativeBase):
22
+ """Base class for all models."""
23
+
24
+ pass
25
+
26
+
27
+ class Ecosystem(str, Enum):
28
+ """Supported package ecosystems."""
29
+
30
+ NPM = "npm"
31
+ PYPI = "pypi"
32
+
33
+
34
+ class Package(Base):
35
+ """A package being tracked."""
36
+
37
+ __tablename__ = "packages"
38
+
39
+ id: Mapped[int] = mapped_column(Integer, primary_key=True)
40
+ name: Mapped[str] = mapped_column(String(255), nullable=False)
41
+ ecosystem: Mapped[str] = mapped_column(String(50), nullable=False)
42
+ repo_url: Mapped[Optional[str]] = mapped_column(String(500))
43
+
44
+ # Metadata from registry
45
+ description: Mapped[Optional[str]] = mapped_column(Text)
46
+ homepage: Mapped[Optional[str]] = mapped_column(String(500))
47
+
48
+ # Tracking
49
+ created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
50
+ last_analyzed: Mapped[Optional[datetime]] = mapped_column(DateTime)
51
+
52
+ # Relationships
53
+ commits: Mapped[list["Commit"]] = relationship(back_populates="package", cascade="all, delete-orphan")
54
+ issues: Mapped[list["Issue"]] = relationship(back_populates="package", cascade="all, delete-orphan")
55
+ scores: Mapped[list["Score"]] = relationship(back_populates="package", cascade="all, delete-orphan")
56
+ sentiment_records: Mapped[list["SentimentRecord"]] = relationship(
57
+ back_populates="package", cascade="all, delete-orphan"
58
+ )
59
+
60
+ __table_args__ = (
61
+ UniqueConstraint("name", "ecosystem", name="uq_package_name_ecosystem"),
62
+ Index("ix_package_ecosystem", "ecosystem"),
63
+ )
64
+
65
+
66
+ class Commit(Base):
67
+ """A commit from a package's repository."""
68
+
69
+ __tablename__ = "commits"
70
+
71
+ id: Mapped[int] = mapped_column(Integer, primary_key=True)
72
+ package_id: Mapped[int] = mapped_column(ForeignKey("packages.id", ondelete="CASCADE"))
73
+
74
+ sha: Mapped[str] = mapped_column(String(40), nullable=False)
75
+ author_name: Mapped[str] = mapped_column(String(255))
76
+ author_email: Mapped[str] = mapped_column(String(255))
77
+ authored_date: Mapped[datetime] = mapped_column(DateTime)
78
+
79
+ committer_name: Mapped[Optional[str]] = mapped_column(String(255))
80
+ committer_email: Mapped[Optional[str]] = mapped_column(String(255))
81
+ committed_date: Mapped[Optional[datetime]] = mapped_column(DateTime)
82
+
83
+ message: Mapped[str] = mapped_column(Text)
84
+
85
+ # Relationships
86
+ package: Mapped["Package"] = relationship(back_populates="commits")
87
+
88
+ __table_args__ = (
89
+ UniqueConstraint("package_id", "sha", name="uq_commit_package_sha"),
90
+ Index("ix_commit_authored_date", "authored_date"),
91
+ Index("ix_commit_author_email", "author_email"),
92
+ )
93
+
94
+
95
+ class Issue(Base):
96
+ """An issue or PR from a package's repository."""
97
+
98
+ __tablename__ = "issues"
99
+
100
+ id: Mapped[int] = mapped_column(Integer, primary_key=True)
101
+ package_id: Mapped[int] = mapped_column(ForeignKey("packages.id", ondelete="CASCADE"))
102
+
103
+ number: Mapped[int] = mapped_column(Integer)
104
+ title: Mapped[str] = mapped_column(String(500))
105
+ body: Mapped[Optional[str]] = mapped_column(Text)
106
+ state: Mapped[str] = mapped_column(String(20)) # open, closed
107
+ is_pull_request: Mapped[bool] = mapped_column(default=False)
108
+
109
+ author_login: Mapped[Optional[str]] = mapped_column(String(255))
110
+ created_at: Mapped[datetime] = mapped_column(DateTime)
111
+ updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime)
112
+ closed_at: Mapped[Optional[datetime]] = mapped_column(DateTime)
113
+
114
+ # Store comments as JSON array
115
+ comments: Mapped[Optional[dict]] = mapped_column(JSON)
116
+
117
+ # Relationships
118
+ package: Mapped["Package"] = relationship(back_populates="issues")
119
+
120
+ __table_args__ = (
121
+ UniqueConstraint("package_id", "number", name="uq_issue_package_number"),
122
+ Index("ix_issue_created_at", "created_at"),
123
+ )
124
+
125
+
126
+ class Score(Base):
127
+ """A calculated risk score for a package."""
128
+
129
+ __tablename__ = "scores"
130
+
131
+ id: Mapped[int] = mapped_column(Integer, primary_key=True)
132
+ package_id: Mapped[int] = mapped_column(ForeignKey("packages.id", ondelete="CASCADE"))
133
+
134
+ # Score calculation date and cutoff
135
+ calculated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
136
+ cutoff_date: Mapped[datetime] = mapped_column(DateTime)
137
+
138
+ # Final score
139
+ final_score: Mapped[int] = mapped_column(Integer)
140
+ risk_level: Mapped[str] = mapped_column(String(20)) # CRITICAL, HIGH, MODERATE, LOW, VERY_LOW
141
+
142
+ # Score components
143
+ base_risk: Mapped[int] = mapped_column(Integer)
144
+ activity_modifier: Mapped[int] = mapped_column(Integer)
145
+ protective_factors_total: Mapped[int] = mapped_column(Integer)
146
+ sentiment_modifier: Mapped[int] = mapped_column(Integer, default=0)
147
+
148
+ # Detailed breakdown stored as JSON
149
+ breakdown: Mapped[dict] = mapped_column(JSON)
150
+
151
+ # Core metrics at time of scoring
152
+ maintainer_concentration: Mapped[float] = mapped_column(Float)
153
+ commits_last_year: Mapped[int] = mapped_column(Integer)
154
+ unique_contributors: Mapped[int] = mapped_column(Integer)
155
+ weekly_downloads: Mapped[int] = mapped_column(Integer, default=0)
156
+
157
+ # Relationships
158
+ package: Mapped["Package"] = relationship(back_populates="scores")
159
+
160
+ __table_args__ = (Index("ix_score_calculated_at", "calculated_at"),)
161
+
162
+
163
+ class SentimentRecord(Base):
164
+ """Sentiment analysis result for a piece of text."""
165
+
166
+ __tablename__ = "sentiment_records"
167
+
168
+ id: Mapped[int] = mapped_column(Integer, primary_key=True)
169
+ package_id: Mapped[int] = mapped_column(ForeignKey("packages.id", ondelete="CASCADE"))
170
+
171
+ # Source of the text
172
+ source_type: Mapped[str] = mapped_column(String(50)) # commit, issue, comment
173
+ source_id: Mapped[str] = mapped_column(String(255)) # sha or issue number
174
+
175
+ # Text hash for deduplication
176
+ text_hash: Mapped[str] = mapped_column(String(64))
177
+
178
+ # Sentiment scores
179
+ compound_score: Mapped[float] = mapped_column(Float) # -1 to 1
180
+ positive_score: Mapped[float] = mapped_column(Float)
181
+ negative_score: Mapped[float] = mapped_column(Float)
182
+ neutral_score: Mapped[float] = mapped_column(Float)
183
+
184
+ # Frustration detection
185
+ frustration_detected: Mapped[bool] = mapped_column(default=False)
186
+ frustration_keywords: Mapped[Optional[list]] = mapped_column(JSON)
187
+
188
+ # Metadata
189
+ analyzed_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
190
+
191
+ # Relationships
192
+ package: Mapped["Package"] = relationship(back_populates="sentiment_records")
193
+
194
+ __table_args__ = (
195
+ UniqueConstraint("package_id", "text_hash", name="uq_sentiment_package_hash"),
196
+ Index("ix_sentiment_source_type", "source_type"),
197
+ )
ossuary/db/session.py ADDED
@@ -0,0 +1,49 @@
1
+ """Database session management."""
2
+
3
+ import os
4
+ from contextlib import contextmanager
5
+ from typing import Generator
6
+
7
+ from sqlalchemy import create_engine
8
+ from sqlalchemy.orm import Session, sessionmaker
9
+
10
+ from ossuary.db.models import Base
11
+
12
+ # Default to SQLite for development
13
+ DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./ossuary.db")
14
+
15
+ # Handle SQLite URL format for SQLAlchemy
16
+ if DATABASE_URL.startswith("sqlite"):
17
+ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
18
+ else:
19
+ engine = create_engine(DATABASE_URL)
20
+
21
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
22
+
23
+
24
+ def init_db() -> None:
25
+ """Initialize the database, creating all tables."""
26
+ Base.metadata.create_all(bind=engine)
27
+
28
+
29
+ def get_session() -> Generator[Session, None, None]:
30
+ """Get a database session."""
31
+ session = SessionLocal()
32
+ try:
33
+ yield session
34
+ finally:
35
+ session.close()
36
+
37
+
38
+ @contextmanager
39
+ def session_scope() -> Generator[Session, None, None]:
40
+ """Provide a transactional scope around a series of operations."""
41
+ session = SessionLocal()
42
+ try:
43
+ yield session
44
+ session.commit()
45
+ except Exception:
46
+ session.rollback()
47
+ raise
48
+ finally:
49
+ session.close()
@@ -0,0 +1,16 @@
1
+ """Risk scoring engine."""
2
+
3
+ from ossuary.scoring.engine import PackageMetrics, RiskScorer
4
+ from ossuary.scoring.factors import ProtectiveFactors, RiskBreakdown, RiskLevel
5
+ from ossuary.scoring.reputation import ReputationBreakdown, ReputationScorer, ReputationTier
6
+
7
+ __all__ = [
8
+ "PackageMetrics",
9
+ "RiskScorer",
10
+ "ProtectiveFactors",
11
+ "RiskBreakdown",
12
+ "RiskLevel",
13
+ "ReputationBreakdown",
14
+ "ReputationScorer",
15
+ "ReputationTier",
16
+ ]