pipguard-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pipguard/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
pipguard/analyzer.py ADDED
@@ -0,0 +1,116 @@
1
+ import ast
2
+ import io
3
+ import tarfile
4
+ from pathlib import Path
5
+
6
+ import httpx
7
+
8
+ # Files that execute automatically during install or first import
9
+ TARGET_FILES = {"setup.py", "pyproject.toml", "__init__.py"}
10
+
11
+ _NETWORK_PATTERNS = [
12
+ "requests.", "urllib.", "httpx.", "http.client",
13
+ "ftplib", "smtplib", "socket.",
14
+ ]
15
+ _SHELL_PATTERNS = ["os.system", "subprocess.", "commands.getoutput"]
16
+ _HOME_STRINGS = ["~/.ssh", "~/.aws", "~/.config", "~/.gnupg", "~/.netrc"]
17
+
18
+
19
+ class _FlagVisitor(ast.NodeVisitor):
20
+ def __init__(self):
21
+ self.flags: dict[str, bool] = {
22
+ "network_call": False,
23
+ "env_access": False,
24
+ "home_dir_access": False,
25
+ "shell_exec": False,
26
+ "base64_obfuscation": False,
27
+ "dynamic_exec": False,
28
+ }
29
+
30
+ def visit_Call(self, node: ast.Call):
31
+ func_str = ast.unparse(node)
32
+
33
+ if any(p in func_str for p in _NETWORK_PATTERNS):
34
+ self.flags["network_call"] = True
35
+
36
+ # curl/wget buried in string args
37
+ for child in ast.walk(node):
38
+ if isinstance(child, ast.Constant) and isinstance(child.value, str):
39
+ if any(s in child.value for s in ("curl ", "wget ", "http://", "https://")):
40
+ self.flags["network_call"] = True
41
+
42
+ if "os.environ" in func_str or "os.getenv" in func_str:
43
+ self.flags["env_access"] = True
44
+
45
+ if any(p in func_str for p in _SHELL_PATTERNS):
46
+ self.flags["shell_exec"] = True
47
+
48
+ if func_str.startswith("eval(") or func_str.startswith("exec("):
49
+ self.flags["dynamic_exec"] = True
50
+ self.flags["shell_exec"] = True
51
+
52
+ if "base64.b64decode" in func_str or "base64.decodebytes" in func_str:
53
+ self.flags["base64_obfuscation"] = True
54
+
55
+ # getattr-based obfuscation: getattr(os, 'sys'+'tem')(...)
56
+ if (
57
+ isinstance(node.func, ast.Call)
58
+ and isinstance(node.func.func, ast.Name)
59
+ and node.func.func.id == "getattr"
60
+ ):
61
+ self.flags["dynamic_exec"] = True
62
+
63
+ self.generic_visit(node)
64
+
65
+ def visit_Attribute(self, node: ast.Attribute):
66
+ full = ast.unparse(node)
67
+ if "expanduser" in full or ".home()" in full:
68
+ self.flags["home_dir_access"] = True
69
+ self.generic_visit(node)
70
+
71
+ def visit_Constant(self, node: ast.Constant):
72
+ if isinstance(node.value, str):
73
+ if any(node.value.startswith(p) for p in _HOME_STRINGS):
74
+ self.flags["home_dir_access"] = True
75
+ self.generic_visit(node)
76
+
77
+
78
+ def _analyze_source(source: str) -> dict[str, bool]:
79
+ try:
80
+ tree = ast.parse(source)
81
+ except SyntaxError:
82
+ return {}
83
+ visitor = _FlagVisitor()
84
+ visitor.visit(tree)
85
+ return visitor.flags
86
+
87
+
88
+ async def analyze_tarball(tarball_url: str) -> dict[str, bool]:
89
+ """Download source tarball and run AST analysis on install-time files."""
90
+ async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
91
+ r = await client.get(tarball_url)
92
+ r.raise_for_status()
93
+
94
+ combined: dict[str, bool] = {
95
+ "network_call": False,
96
+ "env_access": False,
97
+ "home_dir_access": False,
98
+ "shell_exec": False,
99
+ "base64_obfuscation": False,
100
+ "dynamic_exec": False,
101
+ }
102
+
103
+ buf = io.BytesIO(r.content)
104
+ with tarfile.open(fileobj=buf, mode="r:gz") as tar:
105
+ for member in tar.getmembers():
106
+ if Path(member.name).name not in TARGET_FILES:
107
+ continue
108
+ f = tar.extractfile(member)
109
+ if f is None:
110
+ continue
111
+ source = f.read().decode("utf-8", errors="ignore")
112
+ for key, val in _analyze_source(source).items():
113
+ if val:
114
+ combined[key] = True
115
+
116
+ return combined
pipguard/cache.py ADDED
@@ -0,0 +1,51 @@
1
+ import json
2
+ import sqlite3
3
+ import time
4
+ from pathlib import Path
5
+
6
+ CACHE_DIR = Path.home() / ".pipguard"
7
+ CACHE_DB = CACHE_DIR / "cache.db"
8
+
9
+ TTL_TRUST = 86_400 # 24 hours
10
+ TTL_VULN = 21_600 # 6 hours
11
+
12
+
13
+ def _conn() -> sqlite3.Connection:
14
+ CACHE_DIR.mkdir(exist_ok=True)
15
+ con = sqlite3.connect(CACHE_DB)
16
+ con.execute("""
17
+ CREATE TABLE IF NOT EXISTS cache (
18
+ key TEXT PRIMARY KEY,
19
+ value TEXT NOT NULL,
20
+ expires_at REAL NOT NULL
21
+ )
22
+ """)
23
+ con.commit()
24
+ return con
25
+
26
+
27
+ def get(key: str) -> dict | None:
28
+ with _conn() as con:
29
+ row = con.execute(
30
+ "SELECT value, expires_at FROM cache WHERE key = ?", (key,)
31
+ ).fetchone()
32
+ if row is None:
33
+ return None
34
+ value, expires_at = row
35
+ if time.time() > expires_at:
36
+ return None
37
+ return json.loads(value)
38
+
39
+
40
+ def set(key: str, value: dict, ttl: int) -> None:
41
+ with _conn() as con:
42
+ con.execute(
43
+ "INSERT OR REPLACE INTO cache (key, value, expires_at) VALUES (?, ?, ?)",
44
+ (key, json.dumps(value), time.time() + ttl),
45
+ )
46
+
47
+
48
+ def clear_vuln() -> None:
49
+ """Wipe all CVE/vulnerability cache entries (for pipguard update --force)."""
50
+ with _conn() as con:
51
+ con.execute("DELETE FROM cache WHERE key LIKE 'osv:%'")
pipguard/display.py ADDED
@@ -0,0 +1,90 @@
1
+ from rich.console import Console
2
+ from rich.table import Table
3
+
4
+ console = Console()
5
+
6
+ _VERDICT_COLOR = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}
7
+ _VERDICT_LABEL = {"LOW": "LOW RISK", "MEDIUM": "MEDIUM RISK", "HIGH": "HIGH RISK"}
8
+
9
+
10
+ def show_report(
11
+ package: str,
12
+ metadata: dict,
13
+ download_stats: dict,
14
+ vulns: list[dict],
15
+ analysis_flags: dict,
16
+ breakdown: dict,
17
+ cached: bool = False,
18
+ ) -> None:
19
+ cache_note = " [dim](cached)[/dim]" if cached else ""
20
+ console.print(f"\nAnalyzing [bold]{package}[/bold]{cache_note}...\n")
21
+
22
+ # ── Trust score ──────────────────────────────────────────────
23
+ console.rule("[bold]TRUST SCORE[/bold]")
24
+ t = Table(box=None, show_header=False, padding=(0, 2))
25
+ t.add_column(width=28)
26
+ t.add_column()
27
+
28
+ age = metadata.get("age_days")
29
+ age_str = f"{age}d" if age is not None else "unknown"
30
+ if age is None:
31
+ age_icon = "❓"
32
+ elif age < 30:
33
+ age_icon = "🔴"
34
+ elif age < 90:
35
+ age_icon = "⚠️"
36
+ else:
37
+ age_icon = "✅"
38
+ t.add_row("Package age:", f"{age_str} {age_icon}")
39
+
40
+ t.add_row(
41
+ "GitHub repo:",
42
+ "✅ linked" if metadata.get("github_url") else "🔴 none",
43
+ )
44
+
45
+ spike = download_stats.get("spike_pct")
46
+ last_month = download_stats.get("last_month") or 0
47
+ if spike and spike > 300 and last_month < 50_000:
48
+ t.add_row("Download spike:", f"+{spike:.0f}% ⚠️")
49
+ else:
50
+ t.add_row("Download spike:", "normal ✅")
51
+
52
+ if vulns:
53
+ t.add_row("Known vulns:", f"🔴 {len(vulns)} found ({vulns[0]['id']})")
54
+ else:
55
+ t.add_row("Known vulns:", "✅ none")
56
+
57
+ console.print(t)
58
+
59
+ # ── Code analysis ────────────────────────────────────────────
60
+ console.rule("[bold]CODE ANALYSIS[/bold]")
61
+
62
+ if not analysis_flags:
63
+ console.print(" [dim]No source tarball available — code analysis skipped[/dim]")
64
+ else:
65
+ a = Table(box=None, show_header=False, padding=(0, 2))
66
+ a.add_column(width=28)
67
+ a.add_column()
68
+
69
+ def flag_row(label: str, key: str):
70
+ found = analysis_flags.get(key, False)
71
+ a.add_row(label, "🔴 FOUND" if found else "✅ NOT FOUND")
72
+
73
+ flag_row("Network requests:", "network_call")
74
+ flag_row("Env var access:", "env_access")
75
+ flag_row("Shell execution:", "shell_exec")
76
+ flag_row("Base64 obfuscation:", "base64_obfuscation")
77
+ flag_row("Home dir access:", "home_dir_access")
78
+ console.print(a)
79
+
80
+ # ── Verdict ──────────────────────────────────────────────────
81
+ verdict = breakdown["verdict"]
82
+ score = breakdown["score"]
83
+ color = _VERDICT_COLOR[verdict]
84
+ console.rule()
85
+ console.print(
86
+ f" VERDICT: [{color}]{_VERDICT_LABEL[verdict]}[/{color}]"
87
+ f" (Score: [bold]{score}[/bold])"
88
+ )
89
+ console.rule()
90
+ console.print()
pipguard/github.py ADDED
@@ -0,0 +1,66 @@
1
+ import re
2
+
3
+ import httpx
4
+
5
+ # Classifiers that indicate a package has no business making network calls at install time
6
+ PURE_PYTHON_CLASSIFIERS = {
7
+ "Programming Language :: Python :: Implementation :: CPython",
8
+ "Topic :: Utilities",
9
+ "Topic :: Text Processing",
10
+ "Topic :: Software Development :: Libraries :: Python Modules",
11
+ }
12
+
13
+ # Classifiers where network calls are plausible
14
+ NETWORK_CLASSIFIERS = {
15
+ "Topic :: Internet :: WWW/HTTP",
16
+ "Topic :: System :: Networking",
17
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
18
+ "Topic :: Software Development :: Build Tools",
19
+ }
20
+
21
+ _NETWORK_KEYWORDS = [
22
+ "download", "binary", "binaries", "pre-built", "native extension",
23
+ "model weights", "fetches", "auto-update", "update check",
24
+ ]
25
+ _PURE_KEYWORDS = ["pure python", "pure-python", "zero dependencies", "no network", "offline"]
26
+
27
+
28
+ def classify_from_classifiers(classifiers: list[str]) -> str:
29
+ """
30
+ Returns 'pure_python', 'network_expected', or 'ambiguous' based on PyPI classifiers.
31
+ This is free — classifiers are already in the PyPI JSON response.
32
+ """
33
+ classifier_set = set(classifiers)
34
+ has_pure = bool(classifier_set & PURE_PYTHON_CLASSIFIERS)
35
+ has_network = bool(classifier_set & NETWORK_CLASSIFIERS)
36
+
37
+ if has_pure and not has_network:
38
+ return "pure_python"
39
+ if has_network:
40
+ return "network_expected"
41
+ return "ambiguous"
42
+
43
+
44
+ async def fetch_readme_classification(github_url: str) -> str:
45
+ """
46
+ Only called when classifiers are ambiguous or network_expected.
47
+ Returns 'network_expected', 'pure_python', or 'unknown'.
48
+ """
49
+ match = re.match(r"https://github\.com/([^/]+/[^/\s]+)", github_url)
50
+ if not match:
51
+ return "unknown"
52
+
53
+ repo = match.group(1).rstrip("/")
54
+ readme_url = f"https://raw.githubusercontent.com/{repo}/HEAD/README.md"
55
+
56
+ async with httpx.AsyncClient(timeout=10) as client:
57
+ r = await client.get(readme_url)
58
+ if r.status_code != 200:
59
+ return "unknown"
60
+ text = r.text.lower()
61
+
62
+ if any(kw in text for kw in _NETWORK_KEYWORDS):
63
+ return "network_expected"
64
+ if any(kw in text for kw in _PURE_KEYWORDS):
65
+ return "pure_python"
66
+ return "unknown"
pipguard/main.py ADDED
@@ -0,0 +1,322 @@
1
+ import asyncio
2
+ import subprocess
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import typer
8
+ from rich.prompt import Confirm
9
+
10
+ from pipguard import analyzer, cache, display, github, osv, pypi, scorer
11
+ from pipguard.display import console
12
+
13
+ app = typer.Typer(
14
+ help="pipguard — supply chain attack prevention for pip installs.",
15
+ add_completion=False,
16
+ )
17
+
18
+
19
+ async def _analyze(package: str, version: str | None, no_cache: bool) -> tuple[dict, bool]:
20
+ """Core analysis pipeline. Returns (result_dict, was_cached)."""
21
+ cache_key = f"full:{package}:{version or 'latest'}"
22
+
23
+ if not no_cache:
24
+ cached = cache.get(cache_key)
25
+ if cached:
26
+ return cached, True
27
+
28
+ # Fetch metadata first to resolve the exact version, then query OSV with it
29
+ metadata = await pypi.fetch_metadata(package, version)
30
+ download_stats, vulns = await asyncio.gather(
31
+ pypi.fetch_download_stats(package),
32
+ osv.check_vulns(package, metadata["version"]),
33
+ )
34
+
35
+ # Classifier gating: decide whether to call GitHub README
36
+ classifier_context = github.classify_from_classifiers(metadata["classifiers"])
37
+ readme_context = "unknown"
38
+
39
+ if classifier_context in ("ambiguous", "network_expected") and metadata.get("github_url"):
40
+ readme_context = await github.fetch_readme_classification(metadata["github_url"])
41
+
42
+ # Layer 2: AST analysis — only if source tarball exists
43
+ analysis_flags: dict = {}
44
+ if metadata.get("tarball_url"):
45
+ analysis_flags = await analyzer.analyze_tarball(metadata["tarball_url"])
46
+
47
+ breakdown = scorer.compute(
48
+ metadata, download_stats, vulns, analysis_flags, classifier_context, readme_context
49
+ )
50
+
51
+ result = {
52
+ "metadata": metadata,
53
+ "download_stats": download_stats,
54
+ "vulns": vulns,
55
+ "analysis_flags": analysis_flags,
56
+ "breakdown": breakdown,
57
+ }
58
+
59
+ cache.set(cache_key, result, cache.TTL_TRUST)
60
+ return result, False
61
+
62
+
63
+ @app.command()
64
+ def install(
65
+ package: str = typer.Argument(..., help="Package to analyze and install"),
66
+ version: Optional[str] = typer.Option(None, "--version", "-v", help="Specific version"),
67
+ no_cache: bool = typer.Option(False, "--no-cache", help="Bypass cache for this run"),
68
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
69
+ ):
70
+ """Analyze a package for supply chain risks, then install it."""
71
+ try:
72
+ result, cached = asyncio.run(_analyze(package, version, no_cache))
73
+ except Exception as e:
74
+ console.print(f"[red]Analysis failed: {e}[/red]")
75
+ raise typer.Exit(1)
76
+
77
+ display.show_report(
78
+ package,
79
+ result["metadata"],
80
+ result["download_stats"],
81
+ result["vulns"],
82
+ result["analysis_flags"],
83
+ result["breakdown"],
84
+ cached=cached,
85
+ )
86
+
87
+ verdict = result["breakdown"]["verdict"]
88
+
89
+ if verdict == "HIGH" and not yes:
90
+ if not Confirm.ask("Proceed anyway?", default=False):
91
+ raise typer.Exit(1)
92
+ elif verdict == "MEDIUM" and not yes:
93
+ if not Confirm.ask("Proceed anyway?", default=True):
94
+ raise typer.Exit(1)
95
+
96
+ pkg_spec = f"{package}=={result['metadata']['version']}"
97
+ console.print(f"Installing [bold]{pkg_spec}[/bold]...")
98
+ subprocess.run([sys.executable, "-m", "pip", "install", pkg_spec], check=True)
99
+
100
+
101
+ @app.command()
102
+ def info(
103
+ package: str = typer.Argument(..., help="Package to inspect"),
104
+ version: Optional[str] = typer.Option(None, "--version", "-v"),
105
+ no_cache: bool = typer.Option(False, "--no-cache"),
106
+ ):
107
+ """Show a risk report without installing."""
108
+ try:
109
+ result, cached = asyncio.run(_analyze(package, version, no_cache))
110
+ except Exception as e:
111
+ console.print(f"[red]Analysis failed: {e}[/red]")
112
+ raise typer.Exit(1)
113
+
114
+ display.show_report(
115
+ package,
116
+ result["metadata"],
117
+ result["download_stats"],
118
+ result["vulns"],
119
+ result["analysis_flags"],
120
+ result["breakdown"],
121
+ cached=cached,
122
+ )
123
+
124
+
125
+ @app.command()
126
+ def scan(
127
+ file: str = typer.Option("requirements.txt", "--file", "-f", help="Requirements file"),
128
+ ci: bool = typer.Option(False, "--ci", help="Non-interactive CI mode"),
129
+ fail_on: str = typer.Option("high", "--fail-on", help="Fail threshold: medium or high"),
130
+ no_cache: bool = typer.Option(False, "--no-cache"),
131
+ ):
132
+ """Scan all packages in a requirements file."""
133
+ req_path = Path(file)
134
+ if not req_path.exists():
135
+ console.print(f"[red]File not found: {file}[/red]")
136
+ raise typer.Exit(1)
137
+
138
+ packages: list[tuple[str, str | None]] = []
139
+ for line in req_path.read_text().splitlines():
140
+ line = line.strip()
141
+ if not line or line.startswith("#"):
142
+ continue
143
+ if "==" in line:
144
+ name, ver = line.split("==", 1)
145
+ packages.append((name.strip(), ver.strip()))
146
+ else:
147
+ packages.append((line, None))
148
+
149
+ fail_score = {"medium": 31, "high": 61}.get(fail_on.lower(), 61)
150
+ exit_code = 0
151
+
152
+ for name, ver in packages:
153
+ console.print(f"[dim]Scanning {name}...[/dim]")
154
+ try:
155
+ result, cached = asyncio.run(_analyze(name, ver, no_cache))
156
+ bd = result["breakdown"]
157
+ if bd["score"] >= fail_score:
158
+ display.show_report(
159
+ name,
160
+ result["metadata"],
161
+ result["download_stats"],
162
+ result["vulns"],
163
+ result["analysis_flags"],
164
+ bd,
165
+ cached=cached,
166
+ )
167
+ if ci:
168
+ exit_code = 1
169
+ else:
170
+ verdict = bd["verdict"]
171
+ color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}[verdict]
172
+ console.print(f" [{color}]{verdict}[/{color}] {name} (score: {bd['score']})")
173
+ except Exception as e:
174
+ console.print(f"[yellow] Warning: could not scan {name}: {e}[/yellow]")
175
+
176
+ if ci and exit_code:
177
+ raise typer.Exit(exit_code)
178
+
179
+
180
+ @app.command()
181
+ def history():
182
+ """Show recent scan results from the local cache."""
183
+ import json
184
+ import sqlite3
185
+
186
+ if not cache.CACHE_DB.exists():
187
+ console.print("No scan history yet.")
188
+ return
189
+
190
+ con = sqlite3.connect(cache.CACHE_DB)
191
+ rows = con.execute(
192
+ "SELECT key, value FROM cache WHERE key LIKE 'full:%' ORDER BY expires_at DESC LIMIT 20"
193
+ ).fetchall()
194
+ con.close()
195
+
196
+ if not rows:
197
+ console.print("No scan history yet.")
198
+ return
199
+
200
+ from rich.table import Table
201
+
202
+ t = Table(title="Recent Scans", show_lines=False)
203
+ t.add_column("Package")
204
+ t.add_column("Version")
205
+ t.add_column("Score", justify="right")
206
+ t.add_column("Verdict")
207
+
208
+ for key, value in rows:
209
+ data = json.loads(value)
210
+ bd = data.get("breakdown", {})
211
+ _, pkg, ver = key.split(":", 2)
212
+ verdict = bd.get("verdict", "?")
213
+ score = bd.get("score", "?")
214
+ color = {"LOW": "green", "MEDIUM": "yellow", "HIGH": "red"}.get(verdict, "white")
215
+ t.add_row(pkg, ver, str(score), f"[{color}]{verdict}[/{color}]")
216
+
217
+ console.print(t)
218
+
219
+
220
+ @app.command()
221
+ def update(
222
+ force: bool = typer.Option(False, "--force", help="Wipe and refresh all CVE cache entries"),
223
+ ):
224
+ """Manage the pipguard cache."""
225
+ if force:
226
+ cache.clear_vuln()
227
+ console.print(
228
+ "[green]CVE cache cleared.[/green] Fresh vulnerability data will be fetched on next scan."
229
+ )
230
+ else:
231
+ console.print("Use [bold]pipguard update --force[/bold] to refresh CVE cache immediately.")
232
+
233
+
234
+ @app.command()
235
+ def configure():
236
+ """Set up automatic pip interception for your shell."""
237
+ import os
238
+ import platform
239
+
240
+ BASH_ZSH_FUNC = """
241
+ # pipguard — intercept pip install
242
+ pip() {
243
+ if [ "$1" = "install" ]; then
244
+ pipguard install "${@:2}"
245
+ else
246
+ command pip "$@"
247
+ fi
248
+ }
249
+ """
250
+
251
+ FISH_FUNC = """
252
+ # pipguard — intercept pip install
253
+ function pip
254
+ if test "$argv[1]" = "install"
255
+ pipguard install $argv[2..]
256
+ else
257
+ command pip $argv
258
+ end
259
+ end
260
+ """
261
+
262
+ POWERSHELL_FUNC = """
263
+ # pipguard — intercept pip install
264
+ function pip {
265
+ if ($args[0] -eq "install") {
266
+ pipguard install @($args | Select-Object -Skip 1)
267
+ } else {
268
+ & (Get-Command pip -CommandType Application | Select-Object -First 1).Source @args
269
+ }
270
+ }
271
+ """
272
+
273
+ MARKER = "# pipguard — intercept pip install"
274
+
275
+ def already_configured(path: Path) -> bool:
276
+ return path.exists() and MARKER in path.read_text()
277
+
278
+ def append_to(path: Path, content: str):
279
+ path.parent.mkdir(parents=True, exist_ok=True)
280
+ with open(path, "a") as f:
281
+ f.write(content)
282
+
283
+ # Detect shell and config file
284
+ if platform.system() == "Windows":
285
+ # PowerShell
286
+ # Use $PROFILE directly — it resolves to the correct path for the running PowerShell version
287
+ ps_profile = Path(os.environ.get("USERPROFILE", str(Path.home()))) / "Documents" / "WindowsPowerShell" / "Microsoft.PowerShell_profile.ps1"
288
+ if already_configured(ps_profile):
289
+ console.print("[yellow]pipguard is already configured in your PowerShell profile.[/yellow]")
290
+ return
291
+ append_to(ps_profile, POWERSHELL_FUNC)
292
+ console.print(f"[green]Done![/green] Added pip interceptor to:\n {ps_profile}")
293
+ console.print("\nReload your shell or run:")
294
+ console.print(" [bold]. $PROFILE[/bold]")
295
+
296
+ else:
297
+ shell = os.environ.get("SHELL", "")
298
+ if "zsh" in shell:
299
+ config = Path.home() / ".zshrc"
300
+ func = BASH_ZSH_FUNC
301
+ elif "fish" in shell:
302
+ config = Path.home() / ".config" / "fish" / "config.fish"
303
+ func = FISH_FUNC
304
+ else:
305
+ config = Path.home() / ".bashrc"
306
+ func = BASH_ZSH_FUNC
307
+
308
+ if already_configured(config):
309
+ console.print(f"[yellow]pipguard is already configured in {config}[/yellow]")
310
+ return
311
+
312
+ append_to(config, func)
313
+ console.print(f"[green]Done![/green] Added pip interceptor to:\n {config}")
314
+ console.print("\nReload your shell or run:")
315
+ console.print(f" [bold]source {config}[/bold]")
316
+
317
+ console.print("\nFrom now on, [bold]pip install <package>[/bold] will automatically run through pipguard.")
318
+ console.print("To remove, delete the pip() function from the config file shown above.")
319
+
320
+
321
+ if __name__ == "__main__":
322
+ app()
pipguard/osv.py ADDED
@@ -0,0 +1,19 @@
1
+ import httpx
2
+
3
+ OSV_URL = "https://api.osv.dev/v1/query"
4
+
5
+
6
+ async def check_vulns(package: str, version: str) -> list[dict]:
7
+ payload = {
8
+ "version": version,
9
+ "package": {"name": package, "ecosystem": "PyPI"},
10
+ }
11
+ async with httpx.AsyncClient(timeout=10) as client:
12
+ r = await client.post(OSV_URL, json=payload)
13
+ r.raise_for_status()
14
+ data = r.json()
15
+
16
+ return [
17
+ {"id": v["id"], "summary": v.get("summary", "No description")}
18
+ for v in data.get("vulns", [])
19
+ ]
pipguard/pypi.py ADDED
@@ -0,0 +1,84 @@
1
+ from datetime import datetime, timezone
2
+
3
+ import httpx
4
+
5
+ PYPI_URL = "https://pypi.org/pypi/{package}/json"
6
+ PYPI_VERSION_URL = "https://pypi.org/pypi/{package}/{version}/json"
7
+ PYPISTATS_URL = "https://pypistats.org/api/packages/{package}/recent"
8
+
9
+
10
+ async def fetch_metadata(package: str, version: str | None = None) -> dict:
11
+ url = (
12
+ PYPI_VERSION_URL.format(package=package, version=version)
13
+ if version
14
+ else PYPI_URL.format(package=package)
15
+ )
16
+ async with httpx.AsyncClient(timeout=10) as client:
17
+ r = await client.get(url)
18
+ r.raise_for_status()
19
+ data = r.json()
20
+
21
+ info = data["info"]
22
+ releases = data.get("releases", {})
23
+
24
+ # Earliest release date across all versions
25
+ all_dates = []
26
+ for files in releases.values():
27
+ for f in files:
28
+ if f.get("upload_time"):
29
+ all_dates.append(
30
+ datetime.fromisoformat(f["upload_time"]).replace(tzinfo=timezone.utc)
31
+ )
32
+ first_release = min(all_dates) if all_dates else None
33
+ age_days = (datetime.now(timezone.utc) - first_release).days if first_release else None
34
+
35
+ # GitHub repo from project_urls or home_page
36
+ project_urls = info.get("project_urls") or {}
37
+ candidates = list(project_urls.values()) + [info.get("home_page") or ""]
38
+ github_url = next((u for u in candidates if u and "github.com" in u), None)
39
+
40
+ # Source tarball URL — version-specific endpoint puts files under data["urls"],
41
+ # the non-version endpoint puts them under releases[version]
42
+ target_version = version or info["version"]
43
+ tarball_url = None
44
+ candidate_files = data.get("urls") or releases.get(target_version, [])
45
+ for f in candidate_files:
46
+ if f.get("packagetype") == "sdist":
47
+ tarball_url = f["url"]
48
+ break
49
+
50
+ return {
51
+ "name": info["name"],
52
+ "version": target_version,
53
+ "age_days": age_days,
54
+ "classifiers": info.get("classifiers") or [],
55
+ "github_url": github_url,
56
+ "maintainer": info.get("maintainer") or info.get("author"),
57
+ "tarball_url": tarball_url,
58
+ "release_count": len(releases),
59
+ }
60
+
61
+
62
+ async def fetch_download_stats(package: str) -> dict:
63
+ async with httpx.AsyncClient(timeout=10) as client:
64
+ r = await client.get(PYPISTATS_URL.format(package=package))
65
+ if r.status_code != 200:
66
+ return {"last_week": None, "last_month": None, "spike_pct": None}
67
+ data = r.json()["data"]
68
+
69
+ last_week = data.get("last_week") or 0
70
+ last_month = data.get("last_month") or 0
71
+
72
+ # Expected weekly = monthly / 4; spike = how much this week exceeds that
73
+ expected_weekly = last_month / 4 if last_month else 0
74
+ spike_pct = (
75
+ (last_week - expected_weekly) / expected_weekly * 100
76
+ if expected_weekly > 0
77
+ else None
78
+ )
79
+
80
+ return {
81
+ "last_week": last_week,
82
+ "last_month": last_month,
83
+ "spike_pct": spike_pct,
84
+ }
pipguard/scorer.py ADDED
@@ -0,0 +1,84 @@
1
+ _WEIGHTS = {
2
+ "known_cve": 50,
3
+ "shell_exec": 40,
4
+ "base64_obfuscation": 35,
5
+ "package_new": 30,
6
+ "home_dir_access": 30,
7
+ "network_call_full": 25, # pure Python package making network calls
8
+ "network_call_discounted": 8, # network calls expected for this package type
9
+ "env_access": 20,
10
+ "maintainer_new": 20,
11
+ "download_spike": 15,
12
+ "no_github": 10,
13
+ }
14
+
15
+
16
+ def compute(
17
+ metadata: dict,
18
+ download_stats: dict,
19
+ vulns: list[dict],
20
+ analysis_flags: dict,
21
+ classifier_context: str, # 'pure_python' | 'network_expected' | 'ambiguous'
22
+ readme_context: str, # 'network_expected' | 'pure_python' | 'unknown'
23
+ ) -> dict:
24
+ score = 0
25
+ signals: dict[str, int] = {}
26
+
27
+ def add(label: str, pts: int):
28
+ nonlocal score
29
+ score += pts
30
+ signals[label] = pts
31
+
32
+ if vulns:
33
+ add(f"Known CVE ({vulns[0]['id']})", _WEIGHTS["known_cve"])
34
+
35
+ age_days = metadata.get("age_days")
36
+ if age_days is not None and age_days < 30:
37
+ add(f"Package < 30 days old ({age_days}d)", _WEIGHTS["package_new"])
38
+
39
+ spike_pct = download_stats.get("spike_pct")
40
+ last_month = download_stats.get("last_month") or 0
41
+ if spike_pct and spike_pct > 300 and last_month < 50_000:
42
+ add(f"Download spike +{spike_pct:.0f}%", _WEIGHTS["download_spike"])
43
+
44
+ if not metadata.get("github_url"):
45
+ add("No GitHub repo linked", _WEIGHTS["no_github"])
46
+
47
+ if analysis_flags.get("shell_exec"):
48
+ add("Shell execution in setup.py", _WEIGHTS["shell_exec"])
49
+
50
+ if analysis_flags.get("base64_obfuscation"):
51
+ add("Base64 obfuscation", _WEIGHTS["base64_obfuscation"])
52
+
53
+ if analysis_flags.get("home_dir_access"):
54
+ add("Home directory access", _WEIGHTS["home_dir_access"])
55
+
56
+ if analysis_flags.get("env_access"):
57
+ add("Env variable access", _WEIGHTS["env_access"])
58
+
59
+ if analysis_flags.get("network_call"):
60
+ network_expected = (
61
+ classifier_context == "network_expected"
62
+ or readme_context == "network_expected"
63
+ )
64
+ if classifier_context == "pure_python":
65
+ add(
66
+ "Network call in setup.py (unexpected for pure Python)",
67
+ _WEIGHTS["network_call_full"],
68
+ )
69
+ elif network_expected:
70
+ add(
71
+ "Network call in setup.py (expected for this package type)",
72
+ _WEIGHTS["network_call_discounted"],
73
+ )
74
+ else:
75
+ add("Network call in setup.py", _WEIGHTS["network_call_full"])
76
+
77
+ if score <= 30:
78
+ verdict = "LOW"
79
+ elif score <= 60:
80
+ verdict = "MEDIUM"
81
+ else:
82
+ verdict = "HIGH"
83
+
84
+ return {"score": score, "verdict": verdict, "signals": signals}
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: pipguard-cli
3
+ Version: 0.1.0
4
+ Summary: Supply chain attack prevention for pip installs
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: httpx>=0.27.0
7
+ Requires-Dist: rich>=13.0.0
8
+ Requires-Dist: typer>=0.9.0
@@ -0,0 +1,13 @@
1
+ pipguard/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
+ pipguard/analyzer.py,sha256=Tz_AfXvee91-Ru_U-tPR3bxK6YOpAmFLqNUkjL6DSzo,3828
3
+ pipguard/cache.py,sha256=oxMN7yuoXnaO3UYbRjpp8uvmY8cJhtDHtzgj0qb3JaY,1318
4
+ pipguard/display.py,sha256=yjRPjlcLnm2vkkixQEKAbGxVjcNow3RM3jVfPSn2kps,3155
5
+ pipguard/github.py,sha256=s9YObjmj_ZxScG76seevB868HOxxI0g6IEpwPPj7O9o,2222
6
+ pipguard/main.py,sha256=ns5sUpLdg4cx-X2nDswsWhxwWMG8RCAYpjbgB9LfZHY,10536
7
+ pipguard/osv.py,sha256=tm8pDKrP88DoXgqjEDRHNeGRdZzEI55N9vlThMkoqjI,523
8
+ pipguard/pypi.py,sha256=OtZih1HQ-O_lussiac7Ft35bOw6BFGyUaxcoADz-j1Q,2965
9
+ pipguard/scorer.py,sha256=9P329MUvC3_YtGzIxluccVeXxedP3wZF699JsqP0w7w,2727
10
+ pipguard_cli-0.1.0.dist-info/METADATA,sha256=lswJJCw0HDZq3M_UDKeEDTcOCMSI9x8EXentebMLbFo,222
11
+ pipguard_cli-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
12
+ pipguard_cli-0.1.0.dist-info/entry_points.txt,sha256=WBKw7YrjmXETv7L4TiFS-EfmVPcIdznX7y4c_hi2TJc,47
13
+ pipguard_cli-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pipguard = pipguard.main:app