ossuary-risk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ossuary/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """
2
+ Ossuary - OSS Supply Chain Risk Scoring
3
+
4
+ Where abandoned packages come to rest.
5
+ """
6
+
7
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """API module for ossuary."""
ossuary/api/main.py ADDED
@@ -0,0 +1,173 @@
1
+ """FastAPI application for ossuary."""
2
+
3
+ from datetime import datetime
4
+ from typing import Optional
5
+
6
+ from fastapi import FastAPI, HTTPException, Query
7
+ from pydantic import BaseModel
8
+
9
+ from ossuary import __version__
10
+ from ossuary.collectors.git import GitCollector
11
+ from ossuary.collectors.github import GitHubCollector
12
+ from ossuary.collectors.npm import NpmCollector
13
+ from ossuary.collectors.pypi import PyPICollector
14
+ from ossuary.scoring.engine import PackageMetrics, RiskScorer
15
+ from ossuary.scoring.factors import RiskLevel
16
+ from ossuary.sentiment.analyzer import SentimentAnalyzer
17
+
18
+ app = FastAPI(
19
+ title="Ossuary",
20
+ description="OSS Supply Chain Risk Scoring API - Where abandoned packages come to rest",
21
+ version=__version__,
22
+ )
23
+
24
+
25
+ # Response models
26
+ class ScoreResponse(BaseModel):
27
+ """Response model for score endpoint."""
28
+
29
+ package: str
30
+ ecosystem: str
31
+ repo_url: Optional[str]
32
+ score: int
33
+ risk_level: str
34
+ semaphore: str
35
+ explanation: str
36
+ breakdown: dict
37
+ recommendations: list[str]
38
+
39
+
40
+ class HealthResponse(BaseModel):
41
+ """Health check response."""
42
+
43
+ status: str
44
+ version: str
45
+
46
+
47
+ @app.get("/health", response_model=HealthResponse)
48
+ async def health():
49
+ """Health check endpoint."""
50
+ return HealthResponse(status="healthy", version=__version__)
51
+
52
+
53
+ @app.get("/score/{ecosystem}/{package:path}", response_model=ScoreResponse)
54
+ async def get_score(
55
+ ecosystem: str,
56
+ package: str,
57
+ repo_url: Optional[str] = Query(None, description="Repository URL (auto-detected if not provided)"),
58
+ cutoff_date: Optional[str] = Query(None, description="Cutoff date for T-1 analysis (YYYY-MM-DD)"),
59
+ ):
60
+ """
61
+ Calculate risk score for a package.
62
+
63
+ Args:
64
+ ecosystem: Package ecosystem (npm or pypi)
65
+ package: Package name
66
+ repo_url: Optional repository URL
67
+ cutoff_date: Optional cutoff date for historical analysis
68
+ """
69
+ if ecosystem not in ("npm", "pypi"):
70
+ raise HTTPException(status_code=400, detail=f"Unsupported ecosystem: {ecosystem}")
71
+
72
+ cutoff = None
73
+ if cutoff_date:
74
+ try:
75
+ cutoff = datetime.strptime(cutoff_date, "%Y-%m-%d")
76
+ except ValueError:
77
+ raise HTTPException(status_code=400, detail="Invalid date format. Use YYYY-MM-DD")
78
+
79
+ try:
80
+ # Get package info
81
+ if ecosystem == "npm":
82
+ pkg_collector = NpmCollector()
83
+ pkg_data = await pkg_collector.collect(package)
84
+ await pkg_collector.close()
85
+ if not repo_url:
86
+ repo_url = pkg_data.repository_url
87
+ weekly_downloads = pkg_data.weekly_downloads
88
+ else: # pypi
89
+ pkg_collector = PyPICollector()
90
+ pkg_data = await pkg_collector.collect(package)
91
+ await pkg_collector.close()
92
+ if not repo_url:
93
+ repo_url = pkg_data.repository_url
94
+ weekly_downloads = pkg_data.weekly_downloads
95
+
96
+ if not repo_url:
97
+ raise HTTPException(
98
+ status_code=400,
99
+ detail="Could not find repository URL. Please provide with repo_url query parameter",
100
+ )
101
+
102
+ # Collect git data
103
+ git_collector = GitCollector()
104
+ git_metrics = await git_collector.collect(repo_url, cutoff)
105
+
106
+ # Collect GitHub data
107
+ github_collector = GitHubCollector()
108
+ github_data = await github_collector.collect(repo_url)
109
+ await github_collector.close()
110
+
111
+ # Run sentiment analysis
112
+ sentiment_analyzer = SentimentAnalyzer()
113
+ commit_sentiment = sentiment_analyzer.analyze_commits([c.message for c in git_metrics.commits])
114
+ issue_sentiment = sentiment_analyzer.analyze_issues(
115
+ [{"title": i.title, "body": i.body, "comments": i.comments} for i in github_data.issues]
116
+ )
117
+
118
+ # Aggregate sentiment
119
+ total_frustration = commit_sentiment.frustration_count + issue_sentiment.frustration_count
120
+ avg_sentiment = (commit_sentiment.average_compound + issue_sentiment.average_compound) / 2
121
+
122
+ # Build metrics
123
+ metrics = PackageMetrics(
124
+ maintainer_concentration=git_metrics.maintainer_concentration,
125
+ commits_last_year=git_metrics.commits_last_year,
126
+ unique_contributors=git_metrics.unique_contributors,
127
+ top_contributor_email=git_metrics.top_contributor_email,
128
+ last_commit_date=git_metrics.last_commit_date,
129
+ weekly_downloads=weekly_downloads,
130
+ maintainer_username=github_data.maintainer_username,
131
+ maintainer_public_repos=github_data.maintainer_public_repos,
132
+ maintainer_total_stars=github_data.maintainer_total_stars,
133
+ has_github_sponsors=github_data.has_github_sponsors,
134
+ is_org_owned=github_data.is_org_owned,
135
+ org_admin_count=github_data.org_admin_count,
136
+ average_sentiment=avg_sentiment,
137
+ frustration_detected=total_frustration > 0,
138
+ frustration_evidence=commit_sentiment.frustration_evidence + issue_sentiment.frustration_evidence,
139
+ )
140
+
141
+ # Calculate score
142
+ scorer = RiskScorer()
143
+ breakdown = scorer.calculate(package, ecosystem, metrics, repo_url)
144
+
145
+ return ScoreResponse(
146
+ package=package,
147
+ ecosystem=ecosystem,
148
+ repo_url=repo_url,
149
+ score=breakdown.final_score,
150
+ risk_level=breakdown.risk_level.value,
151
+ semaphore=breakdown.risk_level.semaphore,
152
+ explanation=breakdown.explanation,
153
+ breakdown=breakdown.to_dict()["score"]["components"],
154
+ recommendations=breakdown.recommendations,
155
+ )
156
+
157
+ except Exception as e:
158
+ raise HTTPException(status_code=500, detail=str(e))
159
+
160
+
161
+ @app.get("/")
162
+ async def root():
163
+ """Root endpoint with API info."""
164
+ return {
165
+ "name": "Ossuary",
166
+ "description": "OSS Supply Chain Risk Scoring API",
167
+ "version": __version__,
168
+ "docs": "/docs",
169
+ "endpoints": {
170
+ "score": "/score/{ecosystem}/{package}",
171
+ "health": "/health",
172
+ },
173
+ }
ossuary/cli.py ADDED
@@ -0,0 +1,309 @@
1
+ """Command-line interface for ossuary."""
2
+
3
+ import asyncio
4
+ import json
5
+ import sys
6
+ from datetime import datetime
7
+ from typing import Optional
8
+
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich.table import Table
13
+
14
+ from ossuary import __version__
15
+ from ossuary.collectors.git import GitCollector
16
+ from ossuary.collectors.github import GitHubCollector
17
+ from ossuary.collectors.npm import NpmCollector
18
+ from ossuary.collectors.pypi import PyPICollector
19
+ from ossuary.db.session import init_db
20
+ from ossuary.scoring.engine import PackageMetrics, RiskScorer
21
+ from ossuary.scoring.factors import RiskLevel
22
+ from ossuary.scoring.reputation import ReputationScorer
23
+ from ossuary.sentiment.analyzer import SentimentAnalyzer
24
+
25
+ app = typer.Typer(
26
+ name="ossuary",
27
+ help="OSS Supply Chain Risk Scoring - Where abandoned packages come to rest",
28
+ add_completion=False,
29
+ )
30
+ console = Console()
31
+
32
+
33
+ def version_callback(value: bool):
34
+ if value:
35
+ console.print(f"ossuary version {__version__}")
36
+ raise typer.Exit()
37
+
38
+
39
+ @app.callback()
40
+ def main(
41
+ version: bool = typer.Option(
42
+ None,
43
+ "--version",
44
+ "-v",
45
+ callback=version_callback,
46
+ is_eager=True,
47
+ help="Show version and exit",
48
+ ),
49
+ ):
50
+ """Ossuary - OSS Supply Chain Risk Scoring."""
51
+ pass
52
+
53
+
54
+ @app.command()
55
+ def init():
56
+ """Initialize the database."""
57
+ console.print("Initializing database...")
58
+ init_db()
59
+ console.print("[green]Database initialized successfully[/green]")
60
+
61
+
62
+ @app.command()
63
+ def score(
64
+ package: str = typer.Argument(..., help="Package name to analyze"),
65
+ ecosystem: str = typer.Option("npm", "--ecosystem", "-e", help="Package ecosystem (npm, pypi)"),
66
+ repo_url: Optional[str] = typer.Option(None, "--repo", "-r", help="Repository URL (auto-detected if not provided)"),
67
+ cutoff_date: Optional[str] = typer.Option(None, "--cutoff", "-c", help="Cutoff date for T-1 analysis (YYYY-MM-DD)"),
68
+ output_json: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
69
+ ):
70
+ """Calculate risk score for a package."""
71
+ asyncio.run(_score_package(package, ecosystem, repo_url, cutoff_date, output_json))
72
+
73
+
74
+ async def _score_package(
75
+ package: str,
76
+ ecosystem: str,
77
+ repo_url: Optional[str],
78
+ cutoff_date: Optional[str],
79
+ output_json: bool,
80
+ ):
81
+ """Internal async function to score a package."""
82
+ cutoff = None
83
+ if cutoff_date:
84
+ try:
85
+ cutoff = datetime.strptime(cutoff_date, "%Y-%m-%d")
86
+ except ValueError:
87
+ console.print("[red]Invalid date format. Use YYYY-MM-DD[/red]")
88
+ raise typer.Exit(1)
89
+
90
+ with console.status(f"[bold blue]Analyzing {package}...[/bold blue]"):
91
+ # Get package info
92
+ if ecosystem == "npm":
93
+ pkg_collector = NpmCollector()
94
+ pkg_data = await pkg_collector.collect(package)
95
+ await pkg_collector.close()
96
+ if not repo_url:
97
+ repo_url = pkg_data.repository_url
98
+ weekly_downloads = pkg_data.weekly_downloads
99
+ elif ecosystem == "pypi":
100
+ pkg_collector = PyPICollector()
101
+ pkg_data = await pkg_collector.collect(package)
102
+ await pkg_collector.close()
103
+ if not repo_url:
104
+ repo_url = pkg_data.repository_url
105
+ weekly_downloads = pkg_data.weekly_downloads
106
+ else:
107
+ console.print(f"[red]Unsupported ecosystem: {ecosystem}[/red]")
108
+ raise typer.Exit(1)
109
+
110
+ if not repo_url:
111
+ console.print("[red]Could not find repository URL. Please provide with --repo[/red]")
112
+ raise typer.Exit(1)
113
+
114
+ console.print(f" Repository: {repo_url}")
115
+
116
+ # Collect git data
117
+ console.print(" Collecting git history...")
118
+ git_collector = GitCollector()
119
+ git_metrics = await git_collector.collect(repo_url, cutoff)
120
+
121
+ # Try to find top contributor's GitHub username from git email
122
+ top_contributor_username = None
123
+ if git_metrics.top_contributor_email:
124
+ # Try to extract username from email (e.g., user@users.noreply.github.com)
125
+ email = git_metrics.top_contributor_email
126
+ if "noreply.github.com" in email:
127
+ # Format: username@users.noreply.github.com or 12345+username@users.noreply.github.com
128
+ parts = email.split("@")[0]
129
+ if "+" in parts:
130
+ top_contributor_username = parts.split("+")[1]
131
+ else:
132
+ top_contributor_username = parts
133
+ # Otherwise we'll rely on the git author name or repo owner
134
+
135
+ # Collect GitHub data (pass top contributor info to get correct maintainer data)
136
+ console.print(" Collecting GitHub data...")
137
+ github_collector = GitHubCollector()
138
+ github_data = await github_collector.collect(
139
+ repo_url,
140
+ top_contributor_username=top_contributor_username,
141
+ top_contributor_email=git_metrics.top_contributor_email,
142
+ )
143
+ await github_collector.close()
144
+
145
+ # Parse account created date
146
+ maintainer_account_created = None
147
+ if github_data.maintainer_account_created:
148
+ try:
149
+ maintainer_account_created = datetime.fromisoformat(
150
+ github_data.maintainer_account_created.replace("Z", "+00:00")
151
+ )
152
+ except ValueError:
153
+ pass
154
+
155
+ # Calculate reputation score
156
+ console.print(" Calculating reputation...")
157
+ reputation_scorer = ReputationScorer()
158
+ reputation = reputation_scorer.calculate(
159
+ username=github_data.maintainer_username,
160
+ account_created=maintainer_account_created,
161
+ repos=github_data.maintainer_repos,
162
+ sponsor_count=github_data.maintainer_sponsor_count,
163
+ orgs=github_data.maintainer_orgs,
164
+ packages_maintained=[package], # At minimum, they maintain this package
165
+ ecosystem=ecosystem,
166
+ )
167
+
168
+ # Run sentiment analysis
169
+ console.print(" Analyzing sentiment...")
170
+ sentiment_analyzer = SentimentAnalyzer()
171
+ commit_sentiment = sentiment_analyzer.analyze_commits([c.message for c in git_metrics.commits])
172
+ issue_sentiment = sentiment_analyzer.analyze_issues(
173
+ [{"title": i.title, "body": i.body, "comments": i.comments} for i in github_data.issues]
174
+ )
175
+
176
+ # Aggregate sentiment
177
+ total_frustration = commit_sentiment.frustration_count + issue_sentiment.frustration_count
178
+ avg_sentiment = (commit_sentiment.average_compound + issue_sentiment.average_compound) / 2
179
+
180
+ # Build metrics
181
+ metrics = PackageMetrics(
182
+ maintainer_concentration=git_metrics.maintainer_concentration,
183
+ commits_last_year=git_metrics.commits_last_year,
184
+ unique_contributors=git_metrics.unique_contributors,
185
+ top_contributor_email=git_metrics.top_contributor_email,
186
+ top_contributor_name=git_metrics.top_contributor_name,
187
+ last_commit_date=git_metrics.last_commit_date,
188
+ weekly_downloads=weekly_downloads,
189
+ maintainer_username=github_data.maintainer_username,
190
+ maintainer_public_repos=github_data.maintainer_public_repos,
191
+ maintainer_total_stars=github_data.maintainer_total_stars,
192
+ has_github_sponsors=github_data.has_github_sponsors,
193
+ maintainer_account_created=maintainer_account_created,
194
+ maintainer_repos=github_data.maintainer_repos,
195
+ maintainer_sponsor_count=github_data.maintainer_sponsor_count,
196
+ maintainer_orgs=github_data.maintainer_orgs,
197
+ packages_maintained=[package],
198
+ reputation=reputation,
199
+ is_org_owned=github_data.is_org_owned,
200
+ org_admin_count=github_data.org_admin_count,
201
+ average_sentiment=avg_sentiment,
202
+ frustration_detected=total_frustration > 0,
203
+ frustration_evidence=commit_sentiment.frustration_evidence + issue_sentiment.frustration_evidence,
204
+ )
205
+
206
+ # Calculate score
207
+ scorer = RiskScorer()
208
+ breakdown = scorer.calculate(package, ecosystem, metrics, repo_url)
209
+
210
+ # Output results
211
+ if output_json:
212
+ console.print(json.dumps(breakdown.to_dict(), indent=2))
213
+ else:
214
+ _display_results(breakdown, git_metrics, github_data, commit_sentiment, issue_sentiment)
215
+
216
+
217
+ def _display_results(breakdown, git_metrics, github_data, commit_sentiment, issue_sentiment):
218
+ """Display results in a formatted way."""
219
+ # Semaphore color
220
+ color = {
221
+ RiskLevel.CRITICAL: "red",
222
+ RiskLevel.HIGH: "orange1",
223
+ RiskLevel.MODERATE: "yellow",
224
+ RiskLevel.LOW: "green",
225
+ RiskLevel.VERY_LOW: "green",
226
+ }[breakdown.risk_level]
227
+
228
+ # Main score panel
229
+ score_text = f"[bold {color}]{breakdown.risk_level.semaphore} {breakdown.final_score} - {breakdown.risk_level.value}[/bold {color}]"
230
+ console.print(Panel(score_text, title=f"[bold]{breakdown.package_name}[/bold]", border_style=color))
231
+
232
+ # Score breakdown table
233
+ table = Table(title="Score Breakdown")
234
+ table.add_column("Component", style="cyan")
235
+ table.add_column("Value", style="magenta")
236
+ table.add_column("Points", justify="right")
237
+
238
+ table.add_row(
239
+ "Base Risk (Concentration)",
240
+ f"{breakdown.maintainer_concentration:.0f}%",
241
+ f"{breakdown.base_risk:+d}",
242
+ )
243
+ table.add_row(
244
+ "Activity Modifier",
245
+ f"{breakdown.commits_last_year} commits/yr",
246
+ f"{breakdown.activity_modifier:+d}",
247
+ )
248
+ table.add_row(
249
+ "Protective Factors",
250
+ "",
251
+ f"{breakdown.protective_factors.total:+d}",
252
+ )
253
+ table.add_section()
254
+ table.add_row("[bold]Final Score[/bold]", "", f"[bold]{breakdown.final_score}[/bold]")
255
+
256
+ console.print(table)
257
+
258
+ # Protective factors detail
259
+ pf = breakdown.protective_factors
260
+ pf_table = Table(title="Protective Factors Detail")
261
+ pf_table.add_column("Factor", style="cyan")
262
+ pf_table.add_column("Points", justify="right")
263
+ pf_table.add_column("Evidence")
264
+
265
+ if pf.reputation_score != 0:
266
+ pf_table.add_row("Tier-1 Reputation", f"{pf.reputation_score:+d}", pf.reputation_evidence or "")
267
+ if pf.funding_score != 0:
268
+ pf_table.add_row("GitHub Sponsors", f"{pf.funding_score:+d}", pf.funding_evidence or "")
269
+ if pf.org_score != 0:
270
+ pf_table.add_row("Organization", f"{pf.org_score:+d}", f"{github_data.org_admin_count} admins")
271
+ if pf.visibility_score != 0:
272
+ pf_table.add_row("Visibility", f"{pf.visibility_score:+d}", f"{breakdown.weekly_downloads:,} downloads/wk")
273
+ if pf.distributed_score != 0:
274
+ pf_table.add_row("Distributed", f"{pf.distributed_score:+d}", f"<40% concentration")
275
+ if pf.community_score != 0:
276
+ pf_table.add_row("Community", f"{pf.community_score:+d}", f"{breakdown.unique_contributors} contributors")
277
+ if pf.frustration_score != 0:
278
+ pf_table.add_row(
279
+ "[red]Frustration[/red]",
280
+ f"[red]{pf.frustration_score:+d}[/red]",
281
+ "; ".join(pf.frustration_evidence[:2]) if pf.frustration_evidence else "",
282
+ )
283
+ if pf.sentiment_score != 0:
284
+ pf_table.add_row("Sentiment", f"{pf.sentiment_score:+d}", "")
285
+
286
+ if pf_table.row_count > 0:
287
+ console.print(pf_table)
288
+
289
+ # Explanation
290
+ console.print(f"\n[bold]Explanation:[/bold] {breakdown.explanation}")
291
+
292
+ # Recommendations
293
+ console.print("\n[bold]Recommendations:[/bold]")
294
+ for rec in breakdown.recommendations:
295
+ console.print(f" • {rec}")
296
+
297
+
298
+ @app.command()
299
+ def check(
300
+ packages_file: str = typer.Argument(..., help="JSON file with packages to check"),
301
+ output: Optional[str] = typer.Option(None, "--output", "-o", help="Output JSON file"),
302
+ ):
303
+ """Check multiple packages from a JSON file."""
304
+ console.print(f"[yellow]Batch checking not yet implemented[/yellow]")
305
+ raise typer.Exit(1)
306
+
307
+
308
+ if __name__ == "__main__":
309
+ app()
@@ -0,0 +1,8 @@
1
+ """Data collectors for various sources."""
2
+
3
+ from ossuary.collectors.git import GitCollector
4
+ from ossuary.collectors.github import GitHubCollector
5
+ from ossuary.collectors.npm import NpmCollector
6
+ from ossuary.collectors.pypi import PyPICollector
7
+
8
+ __all__ = ["GitCollector", "GitHubCollector", "NpmCollector", "PyPICollector"]
@@ -0,0 +1,26 @@
1
+ """Base collector interface."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+
7
+ class BaseCollector(ABC):
8
+ """Abstract base class for data collectors."""
9
+
10
+ @abstractmethod
11
+ async def collect(self, identifier: str) -> dict[str, Any]:
12
+ """
13
+ Collect data for the given identifier.
14
+
15
+ Args:
16
+ identifier: Package name, repo URL, or other identifier
17
+
18
+ Returns:
19
+ Dictionary of collected data
20
+ """
21
+ pass
22
+
23
+ @abstractmethod
24
+ def is_available(self) -> bool:
25
+ """Check if this collector is available (has required credentials, etc.)."""
26
+ pass