pipguard-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pipguard/__init__.py +1 -0
- pipguard/analyzer.py +116 -0
- pipguard/cache.py +51 -0
- pipguard/display.py +90 -0
- pipguard/github.py +66 -0
- pipguard/main.py +322 -0
- pipguard/osv.py +19 -0
- pipguard/pypi.py +84 -0
- pipguard/scorer.py +84 -0
- pipguard_cli-0.1.0.dist-info/METADATA +8 -0
- pipguard_cli-0.1.0.dist-info/RECORD +13 -0
- pipguard_cli-0.1.0.dist-info/WHEEL +4 -0
- pipguard_cli-0.1.0.dist-info/entry_points.txt +2 -0
pipguard/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
pipguard/analyzer.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import io
|
|
3
|
+
import tarfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
# Files that execute automatically during install or first import
|
|
9
|
+
TARGET_FILES = {"setup.py", "pyproject.toml", "__init__.py"}
|
|
10
|
+
|
|
11
|
+
_NETWORK_PATTERNS = [
|
|
12
|
+
"requests.", "urllib.", "httpx.", "http.client",
|
|
13
|
+
"ftplib", "smtplib", "socket.",
|
|
14
|
+
]
|
|
15
|
+
_SHELL_PATTERNS = ["os.system", "subprocess.", "commands.getoutput"]
|
|
16
|
+
_HOME_STRINGS = ["~/.ssh", "~/.aws", "~/.config", "~/.gnupg", "~/.netrc"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _FlagVisitor(ast.NodeVisitor):
|
|
20
|
+
def __init__(self):
|
|
21
|
+
self.flags: dict[str, bool] = {
|
|
22
|
+
"network_call": False,
|
|
23
|
+
"env_access": False,
|
|
24
|
+
"home_dir_access": False,
|
|
25
|
+
"shell_exec": False,
|
|
26
|
+
"base64_obfuscation": False,
|
|
27
|
+
"dynamic_exec": False,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def visit_Call(self, node: ast.Call):
|
|
31
|
+
func_str = ast.unparse(node)
|
|
32
|
+
|
|
33
|
+
if any(p in func_str for p in _NETWORK_PATTERNS):
|
|
34
|
+
self.flags["network_call"] = True
|
|
35
|
+
|
|
36
|
+
# curl/wget buried in string args
|
|
37
|
+
for child in ast.walk(node):
|
|
38
|
+
if isinstance(child, ast.Constant) and isinstance(child.value, str):
|
|
39
|
+
if any(s in child.value for s in ("curl ", "wget ", "http://", "https://")):
|
|
40
|
+
self.flags["network_call"] = True
|
|
41
|
+
|
|
42
|
+
if "os.environ" in func_str or "os.getenv" in func_str:
|
|
43
|
+
self.flags["env_access"] = True
|
|
44
|
+
|
|
45
|
+
if any(p in func_str for p in _SHELL_PATTERNS):
|
|
46
|
+
self.flags["shell_exec"] = True
|
|
47
|
+
|
|
48
|
+
if func_str.startswith("eval(") or func_str.startswith("exec("):
|
|
49
|
+
self.flags["dynamic_exec"] = True
|
|
50
|
+
self.flags["shell_exec"] = True
|
|
51
|
+
|
|
52
|
+
if "base64.b64decode" in func_str or "base64.decodebytes" in func_str:
|
|
53
|
+
self.flags["base64_obfuscation"] = True
|
|
54
|
+
|
|
55
|
+
# getattr-based obfuscation: getattr(os, 'sys'+'tem')(...)
|
|
56
|
+
if (
|
|
57
|
+
isinstance(node.func, ast.Call)
|
|
58
|
+
and isinstance(node.func.func, ast.Name)
|
|
59
|
+
and node.func.func.id == "getattr"
|
|
60
|
+
):
|
|
61
|
+
self.flags["dynamic_exec"] = True
|
|
62
|
+
|
|
63
|
+
self.generic_visit(node)
|
|
64
|
+
|
|
65
|
+
def visit_Attribute(self, node: ast.Attribute):
|
|
66
|
+
full = ast.unparse(node)
|
|
67
|
+
if "expanduser" in full or ".home()" in full:
|
|
68
|
+
self.flags["home_dir_access"] = True
|
|
69
|
+
self.generic_visit(node)
|
|
70
|
+
|
|
71
|
+
def visit_Constant(self, node: ast.Constant):
|
|
72
|
+
if isinstance(node.value, str):
|
|
73
|
+
if any(node.value.startswith(p) for p in _HOME_STRINGS):
|
|
74
|
+
self.flags["home_dir_access"] = True
|
|
75
|
+
self.generic_visit(node)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _analyze_source(source: str) -> dict[str, bool]:
|
|
79
|
+
try:
|
|
80
|
+
tree = ast.parse(source)
|
|
81
|
+
except SyntaxError:
|
|
82
|
+
return {}
|
|
83
|
+
visitor = _FlagVisitor()
|
|
84
|
+
visitor.visit(tree)
|
|
85
|
+
return visitor.flags
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
async def analyze_tarball(tarball_url: str) -> dict[str, bool]:
|
|
89
|
+
"""Download source tarball and run AST analysis on install-time files."""
|
|
90
|
+
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
|
91
|
+
r = await client.get(tarball_url)
|
|
92
|
+
r.raise_for_status()
|
|
93
|
+
|
|
94
|
+
combined: dict[str, bool] = {
|
|
95
|
+
"network_call": False,
|
|
96
|
+
"env_access": False,
|
|
97
|
+
"home_dir_access": False,
|
|
98
|
+
"shell_exec": False,
|
|
99
|
+
"base64_obfuscation": False,
|
|
100
|
+
"dynamic_exec": False,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
buf = io.BytesIO(r.content)
|
|
104
|
+
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
|
|
105
|
+
for member in tar.getmembers():
|
|
106
|
+
if Path(member.name).name not in TARGET_FILES:
|
|
107
|
+
continue
|
|
108
|
+
f = tar.extractfile(member)
|
|
109
|
+
if f is None:
|
|
110
|
+
continue
|
|
111
|
+
source = f.read().decode("utf-8", errors="ignore")
|
|
112
|
+
for key, val in _analyze_source(source).items():
|
|
113
|
+
if val:
|
|
114
|
+
combined[key] = True
|
|
115
|
+
|
|
116
|
+
return combined
|
pipguard/cache.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
import time
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
CACHE_DIR = Path.home() / ".pipguard"
|
|
7
|
+
CACHE_DB = CACHE_DIR / "cache.db"
|
|
8
|
+
|
|
9
|
+
TTL_TRUST = 86_400 # 24 hours
|
|
10
|
+
TTL_VULN = 21_600 # 6 hours
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _conn() -> sqlite3.Connection:
|
|
14
|
+
CACHE_DIR.mkdir(exist_ok=True)
|
|
15
|
+
con = sqlite3.connect(CACHE_DB)
|
|
16
|
+
con.execute("""
|
|
17
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
18
|
+
key TEXT PRIMARY KEY,
|
|
19
|
+
value TEXT NOT NULL,
|
|
20
|
+
expires_at REAL NOT NULL
|
|
21
|
+
)
|
|
22
|
+
""")
|
|
23
|
+
con.commit()
|
|
24
|
+
return con
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get(key: str) -> dict | None:
|
|
28
|
+
with _conn() as con:
|
|
29
|
+
row = con.execute(
|
|
30
|
+
"SELECT value, expires_at FROM cache WHERE key = ?", (key,)
|
|
31
|
+
).fetchone()
|
|
32
|
+
if row is None:
|
|
33
|
+
return None
|
|
34
|
+
value, expires_at = row
|
|
35
|
+
if time.time() > expires_at:
|
|
36
|
+
return None
|
|
37
|
+
return json.loads(value)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def set(key: str, value: dict, ttl: int) -> None:
|
|
41
|
+
with _conn() as con:
|
|
42
|
+
con.execute(
|
|
43
|
+
"INSERT OR REPLACE INTO cache (key, value, expires_at) VALUES (?, ?, ?)",
|
|
44
|
+
(key, json.dumps(value), time.time() + ttl),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def clear_vuln() -> None:
|
|
49
|
+
"""Wipe all CVE/vulnerability cache entries (for pipguard update --force)."""
|
|
50
|
+
with _conn() as con:
|
|
51
|
+
con.execute("DELETE FROM cache WHERE key LIKE 'osv:%'")
|
pipguard/display.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from rich.console import Console
|
|
2
|
+
from rich.table import Table
|
|
3
|
+
|
|
4
|
+
console = Console()
|
|
5
|
+
|
|
6
|
+
_VERDICT_COLOR = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}
|
|
7
|
+
_VERDICT_LABEL = {"LOW": "LOW RISK", "MEDIUM": "MEDIUM RISK", "HIGH": "HIGH RISK"}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def show_report(
|
|
11
|
+
package: str,
|
|
12
|
+
metadata: dict,
|
|
13
|
+
download_stats: dict,
|
|
14
|
+
vulns: list[dict],
|
|
15
|
+
analysis_flags: dict,
|
|
16
|
+
breakdown: dict,
|
|
17
|
+
cached: bool = False,
|
|
18
|
+
) -> None:
|
|
19
|
+
cache_note = " [dim](cached)[/dim]" if cached else ""
|
|
20
|
+
console.print(f"\nAnalyzing [bold]{package}[/bold]{cache_note}...\n")
|
|
21
|
+
|
|
22
|
+
# ── Trust score ──────────────────────────────────────────────
|
|
23
|
+
console.rule("[bold]TRUST SCORE[/bold]")
|
|
24
|
+
t = Table(box=None, show_header=False, padding=(0, 2))
|
|
25
|
+
t.add_column(width=28)
|
|
26
|
+
t.add_column()
|
|
27
|
+
|
|
28
|
+
age = metadata.get("age_days")
|
|
29
|
+
age_str = f"{age}d" if age is not None else "unknown"
|
|
30
|
+
if age is None:
|
|
31
|
+
age_icon = "❓"
|
|
32
|
+
elif age < 30:
|
|
33
|
+
age_icon = "🔴"
|
|
34
|
+
elif age < 90:
|
|
35
|
+
age_icon = "⚠️"
|
|
36
|
+
else:
|
|
37
|
+
age_icon = "✅"
|
|
38
|
+
t.add_row("Package age:", f"{age_str} {age_icon}")
|
|
39
|
+
|
|
40
|
+
t.add_row(
|
|
41
|
+
"GitHub repo:",
|
|
42
|
+
"✅ linked" if metadata.get("github_url") else "🔴 none",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
spike = download_stats.get("spike_pct")
|
|
46
|
+
last_month = download_stats.get("last_month") or 0
|
|
47
|
+
if spike and spike > 300 and last_month < 50_000:
|
|
48
|
+
t.add_row("Download spike:", f"+{spike:.0f}% ⚠️")
|
|
49
|
+
else:
|
|
50
|
+
t.add_row("Download spike:", "normal ✅")
|
|
51
|
+
|
|
52
|
+
if vulns:
|
|
53
|
+
t.add_row("Known vulns:", f"🔴 {len(vulns)} found ({vulns[0]['id']})")
|
|
54
|
+
else:
|
|
55
|
+
t.add_row("Known vulns:", "✅ none")
|
|
56
|
+
|
|
57
|
+
console.print(t)
|
|
58
|
+
|
|
59
|
+
# ── Code analysis ────────────────────────────────────────────
|
|
60
|
+
console.rule("[bold]CODE ANALYSIS[/bold]")
|
|
61
|
+
|
|
62
|
+
if not analysis_flags:
|
|
63
|
+
console.print(" [dim]No source tarball available — code analysis skipped[/dim]")
|
|
64
|
+
else:
|
|
65
|
+
a = Table(box=None, show_header=False, padding=(0, 2))
|
|
66
|
+
a.add_column(width=28)
|
|
67
|
+
a.add_column()
|
|
68
|
+
|
|
69
|
+
def flag_row(label: str, key: str):
|
|
70
|
+
found = analysis_flags.get(key, False)
|
|
71
|
+
a.add_row(label, "🔴 FOUND" if found else "✅ NOT FOUND")
|
|
72
|
+
|
|
73
|
+
flag_row("Network requests:", "network_call")
|
|
74
|
+
flag_row("Env var access:", "env_access")
|
|
75
|
+
flag_row("Shell execution:", "shell_exec")
|
|
76
|
+
flag_row("Base64 obfuscation:", "base64_obfuscation")
|
|
77
|
+
flag_row("Home dir access:", "home_dir_access")
|
|
78
|
+
console.print(a)
|
|
79
|
+
|
|
80
|
+
# ── Verdict ──────────────────────────────────────────────────
|
|
81
|
+
verdict = breakdown["verdict"]
|
|
82
|
+
score = breakdown["score"]
|
|
83
|
+
color = _VERDICT_COLOR[verdict]
|
|
84
|
+
console.rule()
|
|
85
|
+
console.print(
|
|
86
|
+
f" VERDICT: [{color}]{_VERDICT_LABEL[verdict]}[/{color}]"
|
|
87
|
+
f" (Score: [bold]{score}[/bold])"
|
|
88
|
+
)
|
|
89
|
+
console.rule()
|
|
90
|
+
console.print()
|
pipguard/github.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
# Classifiers that indicate a package has no business making network calls at install time
|
|
6
|
+
PURE_PYTHON_CLASSIFIERS = {
|
|
7
|
+
"Programming Language :: Python :: Implementation :: CPython",
|
|
8
|
+
"Topic :: Utilities",
|
|
9
|
+
"Topic :: Text Processing",
|
|
10
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
# Classifiers where network calls are plausible
|
|
14
|
+
NETWORK_CLASSIFIERS = {
|
|
15
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
16
|
+
"Topic :: System :: Networking",
|
|
17
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
18
|
+
"Topic :: Software Development :: Build Tools",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
_NETWORK_KEYWORDS = [
|
|
22
|
+
"download", "binary", "binaries", "pre-built", "native extension",
|
|
23
|
+
"model weights", "fetches", "auto-update", "update check",
|
|
24
|
+
]
|
|
25
|
+
_PURE_KEYWORDS = ["pure python", "pure-python", "zero dependencies", "no network", "offline"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def classify_from_classifiers(classifiers: list[str]) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Returns 'pure_python', 'network_expected', or 'ambiguous' based on PyPI classifiers.
|
|
31
|
+
This is free — classifiers are already in the PyPI JSON response.
|
|
32
|
+
"""
|
|
33
|
+
classifier_set = set(classifiers)
|
|
34
|
+
has_pure = bool(classifier_set & PURE_PYTHON_CLASSIFIERS)
|
|
35
|
+
has_network = bool(classifier_set & NETWORK_CLASSIFIERS)
|
|
36
|
+
|
|
37
|
+
if has_pure and not has_network:
|
|
38
|
+
return "pure_python"
|
|
39
|
+
if has_network:
|
|
40
|
+
return "network_expected"
|
|
41
|
+
return "ambiguous"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def fetch_readme_classification(github_url: str) -> str:
|
|
45
|
+
"""
|
|
46
|
+
Only called when classifiers are ambiguous or network_expected.
|
|
47
|
+
Returns 'network_expected', 'pure_python', or 'unknown'.
|
|
48
|
+
"""
|
|
49
|
+
match = re.match(r"https://github\.com/([^/]+/[^/\s]+)", github_url)
|
|
50
|
+
if not match:
|
|
51
|
+
return "unknown"
|
|
52
|
+
|
|
53
|
+
repo = match.group(1).rstrip("/")
|
|
54
|
+
readme_url = f"https://raw.githubusercontent.com/{repo}/HEAD/README.md"
|
|
55
|
+
|
|
56
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
57
|
+
r = await client.get(readme_url)
|
|
58
|
+
if r.status_code != 200:
|
|
59
|
+
return "unknown"
|
|
60
|
+
text = r.text.lower()
|
|
61
|
+
|
|
62
|
+
if any(kw in text for kw in _NETWORK_KEYWORDS):
|
|
63
|
+
return "network_expected"
|
|
64
|
+
if any(kw in text for kw in _PURE_KEYWORDS):
|
|
65
|
+
return "pure_python"
|
|
66
|
+
return "unknown"
|
pipguard/main.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.prompt import Confirm
|
|
9
|
+
|
|
10
|
+
from pipguard import analyzer, cache, display, github, osv, pypi, scorer
|
|
11
|
+
from pipguard.display import console
|
|
12
|
+
|
|
13
|
+
app = typer.Typer(
|
|
14
|
+
help="pipguard — supply chain attack prevention for pip installs.",
|
|
15
|
+
add_completion=False,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _analyze(package: str, version: str | None, no_cache: bool) -> tuple[dict, bool]:
|
|
20
|
+
"""Core analysis pipeline. Returns (result_dict, was_cached)."""
|
|
21
|
+
cache_key = f"full:{package}:{version or 'latest'}"
|
|
22
|
+
|
|
23
|
+
if not no_cache:
|
|
24
|
+
cached = cache.get(cache_key)
|
|
25
|
+
if cached:
|
|
26
|
+
return cached, True
|
|
27
|
+
|
|
28
|
+
# Fetch metadata first to resolve the exact version, then query OSV with it
|
|
29
|
+
metadata = await pypi.fetch_metadata(package, version)
|
|
30
|
+
download_stats, vulns = await asyncio.gather(
|
|
31
|
+
pypi.fetch_download_stats(package),
|
|
32
|
+
osv.check_vulns(package, metadata["version"]),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Classifier gating: decide whether to call GitHub README
|
|
36
|
+
classifier_context = github.classify_from_classifiers(metadata["classifiers"])
|
|
37
|
+
readme_context = "unknown"
|
|
38
|
+
|
|
39
|
+
if classifier_context in ("ambiguous", "network_expected") and metadata.get("github_url"):
|
|
40
|
+
readme_context = await github.fetch_readme_classification(metadata["github_url"])
|
|
41
|
+
|
|
42
|
+
# Layer 2: AST analysis — only if source tarball exists
|
|
43
|
+
analysis_flags: dict = {}
|
|
44
|
+
if metadata.get("tarball_url"):
|
|
45
|
+
analysis_flags = await analyzer.analyze_tarball(metadata["tarball_url"])
|
|
46
|
+
|
|
47
|
+
breakdown = scorer.compute(
|
|
48
|
+
metadata, download_stats, vulns, analysis_flags, classifier_context, readme_context
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
result = {
|
|
52
|
+
"metadata": metadata,
|
|
53
|
+
"download_stats": download_stats,
|
|
54
|
+
"vulns": vulns,
|
|
55
|
+
"analysis_flags": analysis_flags,
|
|
56
|
+
"breakdown": breakdown,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
cache.set(cache_key, result, cache.TTL_TRUST)
|
|
60
|
+
return result, False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@app.command()
|
|
64
|
+
def install(
|
|
65
|
+
package: str = typer.Argument(..., help="Package to analyze and install"),
|
|
66
|
+
version: Optional[str] = typer.Option(None, "--version", "-v", help="Specific version"),
|
|
67
|
+
no_cache: bool = typer.Option(False, "--no-cache", help="Bypass cache for this run"),
|
|
68
|
+
yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
|
|
69
|
+
):
|
|
70
|
+
"""Analyze a package for supply chain risks, then install it."""
|
|
71
|
+
try:
|
|
72
|
+
result, cached = asyncio.run(_analyze(package, version, no_cache))
|
|
73
|
+
except Exception as e:
|
|
74
|
+
console.print(f"[red]Analysis failed: {e}[/red]")
|
|
75
|
+
raise typer.Exit(1)
|
|
76
|
+
|
|
77
|
+
display.show_report(
|
|
78
|
+
package,
|
|
79
|
+
result["metadata"],
|
|
80
|
+
result["download_stats"],
|
|
81
|
+
result["vulns"],
|
|
82
|
+
result["analysis_flags"],
|
|
83
|
+
result["breakdown"],
|
|
84
|
+
cached=cached,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
verdict = result["breakdown"]["verdict"]
|
|
88
|
+
|
|
89
|
+
if verdict == "HIGH" and not yes:
|
|
90
|
+
if not Confirm.ask("Proceed anyway?", default=False):
|
|
91
|
+
raise typer.Exit(1)
|
|
92
|
+
elif verdict == "MEDIUM" and not yes:
|
|
93
|
+
if not Confirm.ask("Proceed anyway?", default=True):
|
|
94
|
+
raise typer.Exit(1)
|
|
95
|
+
|
|
96
|
+
pkg_spec = f"{package}=={result['metadata']['version']}"
|
|
97
|
+
console.print(f"Installing [bold]{pkg_spec}[/bold]...")
|
|
98
|
+
subprocess.run([sys.executable, "-m", "pip", "install", pkg_spec], check=True)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@app.command()
|
|
102
|
+
def info(
|
|
103
|
+
package: str = typer.Argument(..., help="Package to inspect"),
|
|
104
|
+
version: Optional[str] = typer.Option(None, "--version", "-v"),
|
|
105
|
+
no_cache: bool = typer.Option(False, "--no-cache"),
|
|
106
|
+
):
|
|
107
|
+
"""Show a risk report without installing."""
|
|
108
|
+
try:
|
|
109
|
+
result, cached = asyncio.run(_analyze(package, version, no_cache))
|
|
110
|
+
except Exception as e:
|
|
111
|
+
console.print(f"[red]Analysis failed: {e}[/red]")
|
|
112
|
+
raise typer.Exit(1)
|
|
113
|
+
|
|
114
|
+
display.show_report(
|
|
115
|
+
package,
|
|
116
|
+
result["metadata"],
|
|
117
|
+
result["download_stats"],
|
|
118
|
+
result["vulns"],
|
|
119
|
+
result["analysis_flags"],
|
|
120
|
+
result["breakdown"],
|
|
121
|
+
cached=cached,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@app.command()
|
|
126
|
+
def scan(
|
|
127
|
+
file: str = typer.Option("requirements.txt", "--file", "-f", help="Requirements file"),
|
|
128
|
+
ci: bool = typer.Option(False, "--ci", help="Non-interactive CI mode"),
|
|
129
|
+
fail_on: str = typer.Option("high", "--fail-on", help="Fail threshold: medium or high"),
|
|
130
|
+
no_cache: bool = typer.Option(False, "--no-cache"),
|
|
131
|
+
):
|
|
132
|
+
"""Scan all packages in a requirements file."""
|
|
133
|
+
req_path = Path(file)
|
|
134
|
+
if not req_path.exists():
|
|
135
|
+
console.print(f"[red]File not found: {file}[/red]")
|
|
136
|
+
raise typer.Exit(1)
|
|
137
|
+
|
|
138
|
+
packages: list[tuple[str, str | None]] = []
|
|
139
|
+
for line in req_path.read_text().splitlines():
|
|
140
|
+
line = line.strip()
|
|
141
|
+
if not line or line.startswith("#"):
|
|
142
|
+
continue
|
|
143
|
+
if "==" in line:
|
|
144
|
+
name, ver = line.split("==", 1)
|
|
145
|
+
packages.append((name.strip(), ver.strip()))
|
|
146
|
+
else:
|
|
147
|
+
packages.append((line, None))
|
|
148
|
+
|
|
149
|
+
fail_score = {"medium": 31, "high": 61}.get(fail_on.lower(), 61)
|
|
150
|
+
exit_code = 0
|
|
151
|
+
|
|
152
|
+
for name, ver in packages:
|
|
153
|
+
console.print(f"[dim]Scanning {name}...[/dim]")
|
|
154
|
+
try:
|
|
155
|
+
result, cached = asyncio.run(_analyze(name, ver, no_cache))
|
|
156
|
+
bd = result["breakdown"]
|
|
157
|
+
if bd["score"] >= fail_score:
|
|
158
|
+
display.show_report(
|
|
159
|
+
name,
|
|
160
|
+
result["metadata"],
|
|
161
|
+
result["download_stats"],
|
|
162
|
+
result["vulns"],
|
|
163
|
+
result["analysis_flags"],
|
|
164
|
+
bd,
|
|
165
|
+
cached=cached,
|
|
166
|
+
)
|
|
167
|
+
if ci:
|
|
168
|
+
exit_code = 1
|
|
169
|
+
else:
|
|
170
|
+
verdict = bd["verdict"]
|
|
171
|
+
color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}[verdict]
|
|
172
|
+
console.print(f" [{color}]{verdict}[/{color}] {name} (score: {bd['score']})")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
console.print(f"[yellow] Warning: could not scan {name}: {e}[/yellow]")
|
|
175
|
+
|
|
176
|
+
if ci and exit_code:
|
|
177
|
+
raise typer.Exit(exit_code)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@app.command()
|
|
181
|
+
def history():
|
|
182
|
+
"""Show recent scan results from the local cache."""
|
|
183
|
+
import json
|
|
184
|
+
import sqlite3
|
|
185
|
+
|
|
186
|
+
if not cache.CACHE_DB.exists():
|
|
187
|
+
console.print("No scan history yet.")
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
con = sqlite3.connect(cache.CACHE_DB)
|
|
191
|
+
rows = con.execute(
|
|
192
|
+
"SELECT key, value FROM cache WHERE key LIKE 'full:%' ORDER BY expires_at DESC LIMIT 20"
|
|
193
|
+
).fetchall()
|
|
194
|
+
con.close()
|
|
195
|
+
|
|
196
|
+
if not rows:
|
|
197
|
+
console.print("No scan history yet.")
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
from rich.table import Table
|
|
201
|
+
|
|
202
|
+
t = Table(title="Recent Scans", show_lines=False)
|
|
203
|
+
t.add_column("Package")
|
|
204
|
+
t.add_column("Version")
|
|
205
|
+
t.add_column("Score", justify="right")
|
|
206
|
+
t.add_column("Verdict")
|
|
207
|
+
|
|
208
|
+
for key, value in rows:
|
|
209
|
+
data = json.loads(value)
|
|
210
|
+
bd = data.get("breakdown", {})
|
|
211
|
+
_, pkg, ver = key.split(":", 2)
|
|
212
|
+
verdict = bd.get("verdict", "?")
|
|
213
|
+
score = bd.get("score", "?")
|
|
214
|
+
color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}.get(verdict, "white")
|
|
215
|
+
t.add_row(pkg, ver, str(score), f"[{color}]{verdict}[/{color}]")
|
|
216
|
+
|
|
217
|
+
console.print(t)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@app.command()
|
|
221
|
+
def update(
|
|
222
|
+
force: bool = typer.Option(False, "--force", help="Wipe and refresh all CVE cache entries"),
|
|
223
|
+
):
|
|
224
|
+
"""Manage the pipguard cache."""
|
|
225
|
+
if force:
|
|
226
|
+
cache.clear_vuln()
|
|
227
|
+
console.print(
|
|
228
|
+
"[green]CVE cache cleared.[/green] Fresh vulnerability data will be fetched on next scan."
|
|
229
|
+
)
|
|
230
|
+
else:
|
|
231
|
+
console.print("Use [bold]pipguard update --force[/bold] to refresh CVE cache immediately.")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@app.command()
|
|
235
|
+
def configure():
|
|
236
|
+
"""Set up automatic pip interception for your shell."""
|
|
237
|
+
import os
|
|
238
|
+
import platform
|
|
239
|
+
|
|
240
|
+
BASH_ZSH_FUNC = """
|
|
241
|
+
# pipguard — intercept pip install
|
|
242
|
+
pip() {
|
|
243
|
+
if [ "$1" = "install" ]; then
|
|
244
|
+
pipguard install "${@:2}"
|
|
245
|
+
else
|
|
246
|
+
command pip "$@"
|
|
247
|
+
fi
|
|
248
|
+
}
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
FISH_FUNC = """
|
|
252
|
+
# pipguard — intercept pip install
|
|
253
|
+
function pip
|
|
254
|
+
if test "$argv[1]" = "install"
|
|
255
|
+
pipguard install $argv[2..]
|
|
256
|
+
else
|
|
257
|
+
command pip $argv
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
"""
|
|
261
|
+
|
|
262
|
+
POWERSHELL_FUNC = """
|
|
263
|
+
# pipguard — intercept pip install
|
|
264
|
+
function pip {
|
|
265
|
+
if ($args[0] -eq "install") {
|
|
266
|
+
pipguard install @($args | Select-Object -Skip 1)
|
|
267
|
+
} else {
|
|
268
|
+
& (Get-Command pip -CommandType Application | Select-Object -First 1).Source @args
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
MARKER = "# pipguard — intercept pip install"
|
|
274
|
+
|
|
275
|
+
def already_configured(path: Path) -> bool:
|
|
276
|
+
return path.exists() and MARKER in path.read_text()
|
|
277
|
+
|
|
278
|
+
def append_to(path: Path, content: str):
|
|
279
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
280
|
+
with open(path, "a") as f:
|
|
281
|
+
f.write(content)
|
|
282
|
+
|
|
283
|
+
# Detect shell and config file
|
|
284
|
+
if platform.system() == "Windows":
|
|
285
|
+
# PowerShell
|
|
286
|
+
# Use $PROFILE directly — it resolves to the correct path for the running PowerShell version
|
|
287
|
+
ps_profile = Path(os.environ.get("USERPROFILE", str(Path.home()))) / "Documents" / "WindowsPowerShell" / "Microsoft.PowerShell_profile.ps1"
|
|
288
|
+
if already_configured(ps_profile):
|
|
289
|
+
console.print("[yellow]pipguard is already configured in your PowerShell profile.[/yellow]")
|
|
290
|
+
return
|
|
291
|
+
append_to(ps_profile, POWERSHELL_FUNC)
|
|
292
|
+
console.print(f"[green]Done![/green] Added pip interceptor to:\n {ps_profile}")
|
|
293
|
+
console.print("\nReload your shell or run:")
|
|
294
|
+
console.print(" [bold]. $PROFILE[/bold]")
|
|
295
|
+
|
|
296
|
+
else:
|
|
297
|
+
shell = os.environ.get("SHELL", "")
|
|
298
|
+
if "zsh" in shell:
|
|
299
|
+
config = Path.home() / ".zshrc"
|
|
300
|
+
func = BASH_ZSH_FUNC
|
|
301
|
+
elif "fish" in shell:
|
|
302
|
+
config = Path.home() / ".config" / "fish" / "config.fish"
|
|
303
|
+
func = FISH_FUNC
|
|
304
|
+
else:
|
|
305
|
+
config = Path.home() / ".bashrc"
|
|
306
|
+
func = BASH_ZSH_FUNC
|
|
307
|
+
|
|
308
|
+
if already_configured(config):
|
|
309
|
+
console.print(f"[yellow]pipguard is already configured in {config}[/yellow]")
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
append_to(config, func)
|
|
313
|
+
console.print(f"[green]Done![/green] Added pip interceptor to:\n {config}")
|
|
314
|
+
console.print("\nReload your shell or run:")
|
|
315
|
+
console.print(f" [bold]source {config}[/bold]")
|
|
316
|
+
|
|
317
|
+
console.print("\nFrom now on, [bold]pip install <package>[/bold] will automatically run through pipguard.")
|
|
318
|
+
console.print("To remove, delete the pip() function from the config file shown above.")
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
if __name__ == "__main__":
|
|
322
|
+
app()
|
pipguard/osv.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
|
|
3
|
+
OSV_URL = "https://api.osv.dev/v1/query"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def check_vulns(package: str, version: str) -> list[dict]:
|
|
7
|
+
payload = {
|
|
8
|
+
"version": version,
|
|
9
|
+
"package": {"name": package, "ecosystem": "PyPI"},
|
|
10
|
+
}
|
|
11
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
12
|
+
r = await client.post(OSV_URL, json=payload)
|
|
13
|
+
r.raise_for_status()
|
|
14
|
+
data = r.json()
|
|
15
|
+
|
|
16
|
+
return [
|
|
17
|
+
{"id": v["id"], "summary": v.get("summary", "No description")}
|
|
18
|
+
for v in data.get("vulns", [])
|
|
19
|
+
]
|
pipguard/pypi.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
PYPI_URL = "https://pypi.org/pypi/{package}/json"
|
|
6
|
+
PYPI_VERSION_URL = "https://pypi.org/pypi/{package}/{version}/json"
|
|
7
|
+
PYPISTATS_URL = "https://pypistats.org/api/packages/{package}/recent"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def fetch_metadata(package: str, version: str | None = None) -> dict:
|
|
11
|
+
url = (
|
|
12
|
+
PYPI_VERSION_URL.format(package=package, version=version)
|
|
13
|
+
if version
|
|
14
|
+
else PYPI_URL.format(package=package)
|
|
15
|
+
)
|
|
16
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
17
|
+
r = await client.get(url)
|
|
18
|
+
r.raise_for_status()
|
|
19
|
+
data = r.json()
|
|
20
|
+
|
|
21
|
+
info = data["info"]
|
|
22
|
+
releases = data.get("releases", {})
|
|
23
|
+
|
|
24
|
+
# Earliest release date across all versions
|
|
25
|
+
all_dates = []
|
|
26
|
+
for files in releases.values():
|
|
27
|
+
for f in files:
|
|
28
|
+
if f.get("upload_time"):
|
|
29
|
+
all_dates.append(
|
|
30
|
+
datetime.fromisoformat(f["upload_time"]).replace(tzinfo=timezone.utc)
|
|
31
|
+
)
|
|
32
|
+
first_release = min(all_dates) if all_dates else None
|
|
33
|
+
age_days = (datetime.now(timezone.utc) - first_release).days if first_release else None
|
|
34
|
+
|
|
35
|
+
# GitHub repo from project_urls or home_page
|
|
36
|
+
project_urls = info.get("project_urls") or {}
|
|
37
|
+
candidates = list(project_urls.values()) + [info.get("home_page") or ""]
|
|
38
|
+
github_url = next((u for u in candidates if u and "github.com" in u), None)
|
|
39
|
+
|
|
40
|
+
# Source tarball URL — version-specific endpoint puts files under data["urls"],
|
|
41
|
+
# the non-version endpoint puts them under releases[version]
|
|
42
|
+
target_version = version or info["version"]
|
|
43
|
+
tarball_url = None
|
|
44
|
+
candidate_files = data.get("urls") or releases.get(target_version, [])
|
|
45
|
+
for f in candidate_files:
|
|
46
|
+
if f.get("packagetype") == "sdist":
|
|
47
|
+
tarball_url = f["url"]
|
|
48
|
+
break
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
"name": info["name"],
|
|
52
|
+
"version": target_version,
|
|
53
|
+
"age_days": age_days,
|
|
54
|
+
"classifiers": info.get("classifiers") or [],
|
|
55
|
+
"github_url": github_url,
|
|
56
|
+
"maintainer": info.get("maintainer") or info.get("author"),
|
|
57
|
+
"tarball_url": tarball_url,
|
|
58
|
+
"release_count": len(releases),
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def fetch_download_stats(package: str) -> dict:
|
|
63
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
64
|
+
r = await client.get(PYPISTATS_URL.format(package=package))
|
|
65
|
+
if r.status_code != 200:
|
|
66
|
+
return {"last_week": None, "last_month": None, "spike_pct": None}
|
|
67
|
+
data = r.json()["data"]
|
|
68
|
+
|
|
69
|
+
last_week = data.get("last_week") or 0
|
|
70
|
+
last_month = data.get("last_month") or 0
|
|
71
|
+
|
|
72
|
+
# Expected weekly = monthly / 4; spike = how much this week exceeds that
|
|
73
|
+
expected_weekly = last_month / 4 if last_month else 0
|
|
74
|
+
spike_pct = (
|
|
75
|
+
(last_week - expected_weekly) / expected_weekly * 100
|
|
76
|
+
if expected_weekly > 0
|
|
77
|
+
else None
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
"last_week": last_week,
|
|
82
|
+
"last_month": last_month,
|
|
83
|
+
"spike_pct": spike_pct,
|
|
84
|
+
}
|
pipguard/scorer.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
_WEIGHTS = {
|
|
2
|
+
"known_cve": 50,
|
|
3
|
+
"shell_exec": 40,
|
|
4
|
+
"base64_obfuscation": 35,
|
|
5
|
+
"package_new": 30,
|
|
6
|
+
"home_dir_access": 30,
|
|
7
|
+
"network_call_full": 25, # pure Python package making network calls
|
|
8
|
+
"network_call_discounted": 8, # network calls expected for this package type
|
|
9
|
+
"env_access": 20,
|
|
10
|
+
"maintainer_new": 20,
|
|
11
|
+
"download_spike": 15,
|
|
12
|
+
"no_github": 10,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def compute(
|
|
17
|
+
metadata: dict,
|
|
18
|
+
download_stats: dict,
|
|
19
|
+
vulns: list[dict],
|
|
20
|
+
analysis_flags: dict,
|
|
21
|
+
classifier_context: str, # 'pure_python' | 'network_expected' | 'ambiguous'
|
|
22
|
+
readme_context: str, # 'network_expected' | 'pure_python' | 'unknown'
|
|
23
|
+
) -> dict:
|
|
24
|
+
score = 0
|
|
25
|
+
signals: dict[str, int] = {}
|
|
26
|
+
|
|
27
|
+
def add(label: str, pts: int):
|
|
28
|
+
nonlocal score
|
|
29
|
+
score += pts
|
|
30
|
+
signals[label] = pts
|
|
31
|
+
|
|
32
|
+
if vulns:
|
|
33
|
+
add(f"Known CVE ({vulns[0]['id']})", _WEIGHTS["known_cve"])
|
|
34
|
+
|
|
35
|
+
age_days = metadata.get("age_days")
|
|
36
|
+
if age_days is not None and age_days < 30:
|
|
37
|
+
add(f"Package < 30 days old ({age_days}d)", _WEIGHTS["package_new"])
|
|
38
|
+
|
|
39
|
+
spike_pct = download_stats.get("spike_pct")
|
|
40
|
+
last_month = download_stats.get("last_month") or 0
|
|
41
|
+
if spike_pct and spike_pct > 300 and last_month < 50_000:
|
|
42
|
+
add(f"Download spike +{spike_pct:.0f}%", _WEIGHTS["download_spike"])
|
|
43
|
+
|
|
44
|
+
if not metadata.get("github_url"):
|
|
45
|
+
add("No GitHub repo linked", _WEIGHTS["no_github"])
|
|
46
|
+
|
|
47
|
+
if analysis_flags.get("shell_exec"):
|
|
48
|
+
add("Shell execution in setup.py", _WEIGHTS["shell_exec"])
|
|
49
|
+
|
|
50
|
+
if analysis_flags.get("base64_obfuscation"):
|
|
51
|
+
add("Base64 obfuscation", _WEIGHTS["base64_obfuscation"])
|
|
52
|
+
|
|
53
|
+
if analysis_flags.get("home_dir_access"):
|
|
54
|
+
add("Home directory access", _WEIGHTS["home_dir_access"])
|
|
55
|
+
|
|
56
|
+
if analysis_flags.get("env_access"):
|
|
57
|
+
add("Env variable access", _WEIGHTS["env_access"])
|
|
58
|
+
|
|
59
|
+
if analysis_flags.get("network_call"):
|
|
60
|
+
network_expected = (
|
|
61
|
+
classifier_context == "network_expected"
|
|
62
|
+
or readme_context == "network_expected"
|
|
63
|
+
)
|
|
64
|
+
if classifier_context == "pure_python":
|
|
65
|
+
add(
|
|
66
|
+
"Network call in setup.py (unexpected for pure Python)",
|
|
67
|
+
_WEIGHTS["network_call_full"],
|
|
68
|
+
)
|
|
69
|
+
elif network_expected:
|
|
70
|
+
add(
|
|
71
|
+
"Network call in setup.py (expected for this package type)",
|
|
72
|
+
_WEIGHTS["network_call_discounted"],
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
add("Network call in setup.py", _WEIGHTS["network_call_full"])
|
|
76
|
+
|
|
77
|
+
if score <= 30:
|
|
78
|
+
verdict = "LOW"
|
|
79
|
+
elif score <= 60:
|
|
80
|
+
verdict = "MEDIUM"
|
|
81
|
+
else:
|
|
82
|
+
verdict = "HIGH"
|
|
83
|
+
|
|
84
|
+
return {"score": score, "verdict": verdict, "signals": signals}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
pipguard/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
|
2
|
+
pipguard/analyzer.py,sha256=Tz_AfXvee91-Ru_U-tPR3bxK6YOpAmFLqNUkjL6DSzo,3828
|
|
3
|
+
pipguard/cache.py,sha256=oxMN7yuoXnaO3UYbRjpp8uvmY8cJhtDHtzgj0qb3JaY,1318
|
|
4
|
+
pipguard/display.py,sha256=yjRPjlcLnm2vkkixQEKAbGxVjcNow3RM3jVfPSn2kps,3155
|
|
5
|
+
pipguard/github.py,sha256=s9YObjmj_ZxScG76seevB868HOxxI0g6IEpwPPj7O9o,2222
|
|
6
|
+
pipguard/main.py,sha256=ns5sUpLdg4cx-X2nDswsWhxwWMG8RCAYpjbgB9LfZHY,10536
|
|
7
|
+
pipguard/osv.py,sha256=tm8pDKrP88DoXgqjEDRHNeGRdZzEI55N9vlThMkoqjI,523
|
|
8
|
+
pipguard/pypi.py,sha256=OtZih1HQ-O_lussiac7Ft35bOw6BFGyUaxcoADz-j1Q,2965
|
|
9
|
+
pipguard/scorer.py,sha256=9P329MUvC3_YtGzIxluccVeXxedP3wZF699JsqP0w7w,2727
|
|
10
|
+
pipguard_cli-0.1.0.dist-info/METADATA,sha256=lswJJCw0HDZq3M_UDKeEDTcOCMSI9x8EXentebMLbFo,222
|
|
11
|
+
pipguard_cli-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
12
|
+
pipguard_cli-0.1.0.dist-info/entry_points.txt,sha256=WBKw7YrjmXETv7L4TiFS-EfmVPcIdznX7y4c_hi2TJc,47
|
|
13
|
+
pipguard_cli-0.1.0.dist-info/RECORD,,
|