ossuary-risk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,318 @@
1
+ """Risk scoring engine implementation."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timedelta
5
+ from typing import Optional
6
+
7
+ from ossuary.scoring.factors import ProtectiveFactors, RiskBreakdown, RiskLevel
8
+ from ossuary.scoring.reputation import ReputationBreakdown, ReputationScorer
9
+
10
+
11
+ @dataclass
12
+ class PackageMetrics:
13
+ """Collected metrics for a package."""
14
+
15
+ # Core metrics from git history
16
+ maintainer_concentration: float = 0.0
17
+ commits_last_year: int = 0
18
+ unique_contributors: int = 0
19
+ top_contributor_email: str = ""
20
+ top_contributor_name: str = ""
21
+ last_commit_date: Optional[datetime] = None
22
+
23
+ # External API data
24
+ weekly_downloads: int = 0
25
+
26
+ # Maintainer info (basic)
27
+ maintainer_username: Optional[str] = None
28
+ maintainer_public_repos: int = 0
29
+ maintainer_total_stars: int = 0
30
+ has_github_sponsors: bool = False
31
+
32
+ # Reputation data (for composite scoring)
33
+ maintainer_account_created: Optional[datetime] = None
34
+ maintainer_repos: list[dict] = None # Full repo data
35
+ maintainer_sponsor_count: int = 0
36
+ maintainer_orgs: list[str] = None
37
+ packages_maintained: list[str] = None # Packages by this maintainer
38
+
39
+ # Computed reputation
40
+ reputation: Optional[ReputationBreakdown] = None
41
+
42
+ # Repository info
43
+ is_org_owned: bool = False
44
+ org_admin_count: int = 0
45
+ cii_badge_level: str = "none"
46
+
47
+ # Sentiment analysis results
48
+ average_sentiment: float = 0.0
49
+ frustration_detected: bool = False
50
+ frustration_evidence: list[str] = None
51
+
52
+ def __post_init__(self):
53
+ if self.frustration_evidence is None:
54
+ self.frustration_evidence = []
55
+ if self.maintainer_repos is None:
56
+ self.maintainer_repos = []
57
+ if self.maintainer_orgs is None:
58
+ self.maintainer_orgs = []
59
+ if self.packages_maintained is None:
60
+ self.packages_maintained = []
61
+
62
+
63
+ class RiskScorer:
64
+ """
65
+ Risk scoring engine implementing the ossuary methodology.
66
+
67
+ Score = Base Risk + Activity Modifier + Protective Factors
68
+ Range: 0-100 (higher = riskier)
69
+ """
70
+
71
+ # Tier-1 maintainer thresholds
72
+ TIER1_REPOS_THRESHOLD = 500
73
+ TIER1_STARS_THRESHOLD = 100_000
74
+
75
+ # Download thresholds for visibility factor
76
+ MASSIVE_VISIBILITY_THRESHOLD = 50_000_000
77
+ HIGH_VISIBILITY_THRESHOLD = 10_000_000
78
+
79
+ def calculate_base_risk(self, concentration: float) -> int:
80
+ """
81
+ Calculate base risk from maintainer concentration.
82
+
83
+ Args:
84
+ concentration: Percentage of commits from top contributor (0-100)
85
+
86
+ Returns:
87
+ Base risk score (20-100)
88
+ """
89
+ if concentration < 30:
90
+ return 20
91
+ elif concentration < 50:
92
+ return 40
93
+ elif concentration < 70:
94
+ return 60
95
+ elif concentration < 90:
96
+ return 80
97
+ else:
98
+ return 100
99
+
100
+ def calculate_activity_modifier(self, commits_last_year: int) -> int:
101
+ """
102
+ Calculate activity modifier from commit frequency.
103
+
104
+ Args:
105
+ commits_last_year: Number of commits in the last 12 months
106
+
107
+ Returns:
108
+ Activity modifier (-30 to +20)
109
+ """
110
+ if commits_last_year > 50:
111
+ return -30 # Active: reduces risk significantly
112
+ elif commits_last_year >= 12:
113
+ return -15 # Moderate: reduces risk somewhat
114
+ elif commits_last_year >= 4:
115
+ return 0 # Low: neutral
116
+ else:
117
+ return 20 # Abandoned: increases risk critically
118
+
119
+ def calculate_protective_factors(
120
+ self, metrics: PackageMetrics, ecosystem: str = "npm"
121
+ ) -> ProtectiveFactors:
122
+ """
123
+ Calculate all protective factors.
124
+
125
+ Args:
126
+ metrics: Collected package metrics
127
+ ecosystem: Package ecosystem for reputation lookup
128
+
129
+ Returns:
130
+ ProtectiveFactors breakdown
131
+ """
132
+ pf = ProtectiveFactors()
133
+
134
+ # Factor 1: Maintainer Reputation (composite score)
135
+ if metrics.reputation:
136
+ # Use pre-calculated reputation
137
+ reputation = metrics.reputation
138
+ else:
139
+ # Calculate reputation on the fly
140
+ reputation_scorer = ReputationScorer()
141
+ reputation = reputation_scorer.calculate(
142
+ username=metrics.maintainer_username or "",
143
+ account_created=metrics.maintainer_account_created,
144
+ repos=metrics.maintainer_repos,
145
+ sponsor_count=metrics.maintainer_sponsor_count,
146
+ orgs=metrics.maintainer_orgs,
147
+ packages_maintained=metrics.packages_maintained,
148
+ ecosystem=ecosystem,
149
+ )
150
+
151
+ pf.reputation_score = reputation.tier.risk_reduction
152
+ if pf.reputation_score != 0:
153
+ pf.reputation_evidence = (
154
+ f"{reputation.username}: {reputation.total_score} pts ({reputation.tier.value}) - "
155
+ f"tenure={reputation.tenure_score}, portfolio={reputation.portfolio_score}, "
156
+ f"stars={reputation.stars_score}, sponsors={reputation.sponsors_score}"
157
+ )
158
+
159
+ # Factor 2: Economic Sustainability (-15)
160
+ if metrics.has_github_sponsors:
161
+ pf.funding_score = -15
162
+ pf.funding_evidence = "GitHub Sponsors enabled"
163
+
164
+ # Factor 3: Organization Ownership (-15)
165
+ if metrics.is_org_owned and metrics.org_admin_count >= 3:
166
+ pf.org_score = -15
167
+
168
+ # Factor 4: Download Visibility (-10 to -20)
169
+ if metrics.weekly_downloads > self.MASSIVE_VISIBILITY_THRESHOLD:
170
+ pf.visibility_score = -20
171
+ elif metrics.weekly_downloads > self.HIGH_VISIBILITY_THRESHOLD:
172
+ pf.visibility_score = -10
173
+
174
+ # Factor 5: Distributed Governance (-10)
175
+ if metrics.maintainer_concentration < 40:
176
+ pf.distributed_score = -10
177
+
178
+ # Factor 6: Active Community (-10)
179
+ if metrics.unique_contributors > 20:
180
+ pf.community_score = -10
181
+
182
+ # Factor 7: CII Best Practices (-10)
183
+ if metrics.cii_badge_level in ("gold", "silver", "passing"):
184
+ pf.cii_score = -10
185
+
186
+ # Factor 8: Economic Frustration (+20)
187
+ if metrics.frustration_detected:
188
+ pf.frustration_score = 20
189
+ pf.frustration_evidence = metrics.frustration_evidence
190
+
191
+ # Factor 9: Sentiment Analysis (-10 to +10)
192
+ # Negative sentiment (< -0.3) increases risk
193
+ # Positive sentiment (> 0.3) slightly reduces risk
194
+ if metrics.average_sentiment < -0.3:
195
+ pf.sentiment_score = 10
196
+ pf.sentiment_evidence = ["Negative sentiment detected in communications"]
197
+ elif metrics.average_sentiment > 0.3:
198
+ pf.sentiment_score = -5
199
+
200
+ return pf
201
+
202
+ def generate_explanation(self, breakdown: RiskBreakdown) -> str:
203
+ """Generate human-readable explanation of the score."""
204
+ parts = []
205
+
206
+ # Concentration explanation
207
+ conc = breakdown.maintainer_concentration
208
+ if conc >= 90:
209
+ parts.append(f"Critical concentration ({conc:.0f}%): single person controls nearly all commits")
210
+ elif conc >= 70:
211
+ parts.append(f"High concentration ({conc:.0f}%): majority of commits from one person")
212
+ elif conc >= 50:
213
+ parts.append(f"Moderate concentration ({conc:.0f}%): some bus factor risk")
214
+ else:
215
+ parts.append(f"Distributed commits ({conc:.0f}%): healthy contributor diversity")
216
+
217
+ # Activity explanation
218
+ if breakdown.activity_modifier == 20:
219
+ parts.append("Project appears abandoned (<4 commits/year)")
220
+ elif breakdown.activity_modifier == -30:
221
+ parts.append("Actively maintained (>50 commits/year)")
222
+ elif breakdown.activity_modifier == -15:
223
+ parts.append("Moderately active (12-50 commits/year)")
224
+ elif breakdown.activity_modifier == 0:
225
+ parts.append("Low activity (4-11 commits/year)")
226
+
227
+ # Protective factors summary
228
+ pf_total = breakdown.protective_factors.total
229
+ if pf_total < -30:
230
+ parts.append(f"Strong protective factors ({pf_total:+d} points)")
231
+ elif pf_total < 0:
232
+ parts.append(f"Some protective factors ({pf_total:+d} points)")
233
+ elif pf_total > 0:
234
+ parts.append(f"Warning signals present ({pf_total:+d} points)")
235
+
236
+ # Frustration alert
237
+ if breakdown.protective_factors.frustration_score > 0:
238
+ parts.append("ALERT: Economic frustration signals detected")
239
+
240
+ return f"{breakdown.risk_level.semaphore} {breakdown.risk_level.value} ({breakdown.final_score}). " + ". ".join(
241
+ parts
242
+ )
243
+
244
+ def generate_recommendations(self, breakdown: RiskBreakdown) -> list[str]:
245
+ """Generate actionable recommendations based on the score."""
246
+ recs = []
247
+
248
+ if breakdown.final_score >= 80:
249
+ recs.append("IMMEDIATE: Identify alternative packages or prepare to fork")
250
+ recs.append("Do not accept new versions without manual code review")
251
+ recs.append("Monitor for maintainer changes or ownership transfers")
252
+ elif breakdown.final_score >= 60:
253
+ recs.append("Review new releases carefully before updating")
254
+ recs.append("Consider contributing to reduce maintainer concentration")
255
+ recs.append("Monitor project health metrics monthly")
256
+ elif breakdown.final_score >= 40:
257
+ recs.append("Standard monitoring recommended")
258
+ recs.append("Keep dependencies updated")
259
+ else:
260
+ recs.append("Low risk - standard dependency management practices apply")
261
+
262
+ # Specific recommendations
263
+ if breakdown.protective_factors.frustration_score > 0:
264
+ recs.insert(0, "URGENT: Maintainer frustration detected - elevated sabotage risk")
265
+
266
+ if breakdown.maintainer_concentration > 90 and breakdown.commits_last_year < 10:
267
+ recs.insert(0, "HIGH PRIORITY: Single maintainer + low activity = prime takeover target")
268
+
269
+ return recs
270
+
271
+ def calculate(
272
+ self,
273
+ package_name: str,
274
+ ecosystem: str,
275
+ metrics: PackageMetrics,
276
+ repo_url: Optional[str] = None,
277
+ ) -> RiskBreakdown:
278
+ """
279
+ Calculate complete risk score for a package.
280
+
281
+ Args:
282
+ package_name: Name of the package
283
+ ecosystem: Package ecosystem (npm, pypi)
284
+ metrics: Collected package metrics
285
+ repo_url: Repository URL (optional)
286
+
287
+ Returns:
288
+ Complete RiskBreakdown
289
+ """
290
+ breakdown = RiskBreakdown(
291
+ package_name=package_name,
292
+ ecosystem=ecosystem,
293
+ repo_url=repo_url,
294
+ )
295
+
296
+ # Copy metrics
297
+ breakdown.maintainer_concentration = metrics.maintainer_concentration
298
+ breakdown.commits_last_year = metrics.commits_last_year
299
+ breakdown.unique_contributors = metrics.unique_contributors
300
+ breakdown.weekly_downloads = metrics.weekly_downloads
301
+
302
+ # Calculate components
303
+ breakdown.base_risk = self.calculate_base_risk(metrics.maintainer_concentration)
304
+ breakdown.activity_modifier = self.calculate_activity_modifier(metrics.commits_last_year)
305
+ breakdown.protective_factors = self.calculate_protective_factors(metrics, ecosystem)
306
+
307
+ # Calculate final score (clamped to 0-100)
308
+ raw_score = breakdown.base_risk + breakdown.activity_modifier + breakdown.protective_factors.total
309
+ breakdown.final_score = max(0, min(100, raw_score))
310
+
311
+ # Determine risk level
312
+ breakdown.risk_level = RiskLevel.from_score(breakdown.final_score)
313
+
314
+ # Generate explanation and recommendations
315
+ breakdown.explanation = self.generate_explanation(breakdown)
316
+ breakdown.recommendations = self.generate_recommendations(breakdown)
317
+
318
+ return breakdown
@@ -0,0 +1,175 @@
1
+ """Risk scoring factors and data structures."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Optional
6
+
7
+
8
+ class RiskLevel(str, Enum):
9
+ """Risk level classification."""
10
+
11
+ CRITICAL = "CRITICAL"
12
+ HIGH = "HIGH"
13
+ MODERATE = "MODERATE"
14
+ LOW = "LOW"
15
+ VERY_LOW = "VERY_LOW"
16
+
17
+ @classmethod
18
+ def from_score(cls, score: int) -> "RiskLevel":
19
+ """Get risk level from numeric score."""
20
+ if score >= 80:
21
+ return cls.CRITICAL
22
+ elif score >= 60:
23
+ return cls.HIGH
24
+ elif score >= 40:
25
+ return cls.MODERATE
26
+ elif score >= 20:
27
+ return cls.LOW
28
+ else:
29
+ return cls.VERY_LOW
30
+
31
+ @property
32
+ def semaphore(self) -> str:
33
+ """Get semaphore emoji for this risk level."""
34
+ return {
35
+ RiskLevel.CRITICAL: "🔴",
36
+ RiskLevel.HIGH: "🟠",
37
+ RiskLevel.MODERATE: "🟡",
38
+ RiskLevel.LOW: "🟢",
39
+ RiskLevel.VERY_LOW: "🟢",
40
+ }[self]
41
+
42
+ @property
43
+ def description(self) -> str:
44
+ """Human-readable description of the risk level."""
45
+ return {
46
+ RiskLevel.CRITICAL: "Immediate risk - action required",
47
+ RiskLevel.HIGH: "Elevated risk - intervention recommended",
48
+ RiskLevel.MODERATE: "Requires active monitoring",
49
+ RiskLevel.LOW: "Minor concerns, generally stable",
50
+ RiskLevel.VERY_LOW: "Safe, well-governed package",
51
+ }[self]
52
+
53
+
54
+ @dataclass
55
+ class ProtectiveFactors:
56
+ """Breakdown of protective factors that reduce risk."""
57
+
58
+ # Factor scores (negative = reduces risk, positive = increases risk)
59
+ reputation_score: int = 0 # -25 for tier-1 maintainer
60
+ funding_score: int = 0 # -15 for GitHub Sponsors
61
+ org_score: int = 0 # -15 for org with 3+ admins
62
+ visibility_score: int = 0 # -20 for >50M downloads, -10 for >10M
63
+ distributed_score: int = 0 # -10 for <40% concentration
64
+ community_score: int = 0 # -10 for >20 contributors
65
+ cii_score: int = 0 # -10 for CII badge
66
+ frustration_score: int = 0 # +20 for detected frustration
67
+ sentiment_score: int = 0 # -10 to +20 based on sentiment analysis
68
+
69
+ # Evidence for each factor
70
+ reputation_evidence: Optional[str] = None
71
+ funding_evidence: Optional[str] = None
72
+ frustration_evidence: list[str] = field(default_factory=list)
73
+ sentiment_evidence: list[str] = field(default_factory=list)
74
+
75
+ @property
76
+ def total(self) -> int:
77
+ """Calculate total protective factor modifier."""
78
+ return (
79
+ self.reputation_score
80
+ + self.funding_score
81
+ + self.org_score
82
+ + self.visibility_score
83
+ + self.distributed_score
84
+ + self.community_score
85
+ + self.cii_score
86
+ + self.frustration_score
87
+ + self.sentiment_score
88
+ )
89
+
90
+ def to_dict(self) -> dict:
91
+ """Convert to dictionary for JSON serialization."""
92
+ return {
93
+ "reputation": {
94
+ "score": self.reputation_score,
95
+ "evidence": self.reputation_evidence,
96
+ },
97
+ "funding": {"score": self.funding_score, "evidence": self.funding_evidence},
98
+ "organization": {"score": self.org_score},
99
+ "visibility": {"score": self.visibility_score},
100
+ "distributed_governance": {"score": self.distributed_score},
101
+ "community": {"score": self.community_score},
102
+ "cii_badge": {"score": self.cii_score},
103
+ "frustration": {
104
+ "score": self.frustration_score,
105
+ "evidence": self.frustration_evidence,
106
+ },
107
+ "sentiment": {
108
+ "score": self.sentiment_score,
109
+ "evidence": self.sentiment_evidence,
110
+ },
111
+ "total": self.total,
112
+ }
113
+
114
+
115
+ @dataclass
116
+ class RiskBreakdown:
117
+ """Complete risk assessment result."""
118
+
119
+ # Package identification
120
+ package_name: str
121
+ ecosystem: str
122
+ repo_url: Optional[str] = None
123
+
124
+ # Core metrics
125
+ maintainer_concentration: float = 0.0
126
+ commits_last_year: int = 0
127
+ unique_contributors: int = 0
128
+ weekly_downloads: int = 0
129
+
130
+ # Score components
131
+ base_risk: int = 0
132
+ activity_modifier: int = 0
133
+ protective_factors: ProtectiveFactors = field(default_factory=ProtectiveFactors)
134
+
135
+ # Final score
136
+ final_score: int = 0
137
+ risk_level: RiskLevel = RiskLevel.VERY_LOW
138
+
139
+ # Explanation
140
+ explanation: str = ""
141
+ recommendations: list[str] = field(default_factory=list)
142
+
143
+ # Data completeness tracking
144
+ data_sources: dict[str, bool] = field(default_factory=dict)
145
+ warnings: list[str] = field(default_factory=list)
146
+
147
+ def to_dict(self) -> dict:
148
+ """Convert to dictionary for JSON serialization."""
149
+ return {
150
+ "package": {
151
+ "name": self.package_name,
152
+ "ecosystem": self.ecosystem,
153
+ "repo_url": self.repo_url,
154
+ },
155
+ "metrics": {
156
+ "maintainer_concentration": self.maintainer_concentration,
157
+ "commits_last_year": self.commits_last_year,
158
+ "unique_contributors": self.unique_contributors,
159
+ "weekly_downloads": self.weekly_downloads,
160
+ },
161
+ "score": {
162
+ "final": self.final_score,
163
+ "risk_level": self.risk_level.value,
164
+ "semaphore": self.risk_level.semaphore,
165
+ "components": {
166
+ "base_risk": self.base_risk,
167
+ "activity_modifier": self.activity_modifier,
168
+ "protective_factors": self.protective_factors.to_dict(),
169
+ },
170
+ },
171
+ "explanation": self.explanation,
172
+ "recommendations": self.recommendations,
173
+ "data_sources": self.data_sources,
174
+ "warnings": self.warnings,
175
+ }