slopguard-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
slopguard/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """SlopGuard — defend against slopsquatting (LLM-hallucinated package names)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.1.0"
6
+
7
+ __all__ = ["__version__"]
slopguard/__main__.py ADDED
@@ -0,0 +1,13 @@
1
+ """Enables ``python -m slopguard``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from slopguard.cli import app
6
+
7
+
8
+ def main() -> None:
9
+ app()
10
+
11
+
12
+ if __name__ == "__main__":
13
+ main()
slopguard/cli.py ADDED
@@ -0,0 +1,321 @@
1
+ """SlopGuard CLI — ``scan`` + ``version`` subcommands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import sys
8
+ import time
9
+ import uuid
10
+ from datetime import UTC, datetime
11
+ from enum import StrEnum
12
+ from pathlib import Path
13
+ from typing import Annotated
14
+
15
+ import typer
16
+ from rich.console import Console
17
+
18
+ from slopguard import __version__
19
+ from slopguard.config import (
20
+ ConfigError,
21
+ ResolvedConfig,
22
+ default_config_path,
23
+ load_file_config,
24
+ resolve,
25
+ )
26
+ from slopguard.models import (
27
+ Dependency,
28
+ Ecosystem,
29
+ Finding,
30
+ ManifestInfo,
31
+ RiskTier,
32
+ ScanReport,
33
+ ScanSummary,
34
+ Signal,
35
+ )
36
+ from slopguard.parsers.base import Parser, ParserError
37
+ from slopguard.parsers.npm import NpmParser
38
+ from slopguard.parsers.python import PythonParser
39
+ from slopguard.registry.base import RegistryClient
40
+ from slopguard.registry.npm import NpmRegistryClient
41
+ from slopguard.registry.pypi import PypiRegistryClient
42
+ from slopguard.report.json import write_json_report
43
+ from slopguard.report.terminal import render_terminal_report
44
+ from slopguard.scoring.engine import ScoringConfig, ScoringEngine
45
+
46
+ app = typer.Typer(
47
+ name="slopguard",
48
+ help="Defend developers and AI coding agents against slopsquatting (LLM-hallucinated packages).",
49
+ add_completion=False,
50
+ no_args_is_help=True,
51
+ )
52
+
53
+ logger = logging.getLogger("slopguard")
54
+
55
+
56
+ class OutputFormat(StrEnum):
57
+ TERMINAL = "terminal"
58
+ JSON = "json"
59
+
60
+
61
+ class FailOn(StrEnum):
62
+ ANY = "any"
63
+ HALLUCINATED = "hallucinated"
64
+ SUSPICIOUS = "suspicious"
65
+ NONE = "none"
66
+
67
+
68
+ MANIFEST_PARSERS: dict[str, type[Parser]] = {
69
+ "package.json": NpmParser,
70
+ "package-lock.json": NpmParser,
71
+ "requirements.txt": PythonParser,
72
+ "pyproject.toml": PythonParser,
73
+ "Pipfile": PythonParser,
74
+ }
75
+
76
+
77
+ def _ecosystem_for(filename: str) -> Ecosystem:
78
+ return Ecosystem.NPM if filename in {"package.json", "package-lock.json"} else Ecosystem.PYPI
79
+
80
+
81
+ def discover_manifests(path: Path) -> list[Path]:
82
+ """Find manifests in ``path``. Recurse up to 2 levels for directory targets."""
83
+ if path.is_file():
84
+ return [path] if path.name in MANIFEST_PARSERS else []
85
+ if not path.is_dir():
86
+ return []
87
+ found: list[Path] = []
88
+ seen: set[Path] = set()
89
+ for depth in range(3): # 0 = path itself, 1 = children, 2 = grandchildren
90
+ for entry in _iter_at_depth(path, depth):
91
+ if entry.name in MANIFEST_PARSERS and entry.is_file() and entry not in seen:
92
+ # Skip lockfiles if the sibling package.json was already accepted; lockfiles
93
+ # tend to be far larger and the manifest gives us the directly-declared deps.
94
+ if entry.name == "package-lock.json":
95
+ sibling = entry.with_name("package.json")
96
+ if sibling in seen:
97
+ continue
98
+ seen.add(entry)
99
+ found.append(entry)
100
+ # Stable ordering.
101
+ found.sort()
102
+ return found
103
+
104
+
105
+ def _iter_at_depth(root: Path, depth: int) -> list[Path]:
106
+ if depth == 0:
107
+ return list(root.iterdir())
108
+ out: list[Path] = []
109
+ for sub in root.iterdir():
110
+ if not sub.is_dir():
111
+ continue
112
+ if sub.name in {"node_modules", ".venv", "venv", "__pycache__", ".git", "dist", "build"}:
113
+ continue
114
+ out.extend(_iter_at_depth(sub, depth - 1))
115
+ return out
116
+
117
+
118
+ def _build_clients(cfg: ResolvedConfig) -> tuple[RegistryClient | None, RegistryClient | None]:
119
+ if not cfg.network_enabled:
120
+ return (None, None)
121
+ npm = NpmRegistryClient(timeout=cfg.timeout_seconds)
122
+ pypi = PypiRegistryClient(timeout=cfg.timeout_seconds)
123
+ return (npm, pypi)
124
+
125
+
126
+ def _should_fail(summary: ScanSummary, fail_on: str) -> bool:
127
+ if fail_on == "none":
128
+ return False
129
+ if fail_on in {"any", "hallucinated"} and summary.hallucinated > 0:
130
+ return True
131
+ if fail_on == "any" and (summary.suspicious > 0 or summary.errors > 0):
132
+ return True
133
+ return fail_on == "suspicious" and (summary.hallucinated > 0 or summary.suspicious > 0)
134
+
135
+
136
+ def _summarise(findings: list[Finding]) -> ScanSummary:
137
+ clean = sum(1 for f in findings if f.risk is RiskTier.CLEAN)
138
+ suspicious = sum(1 for f in findings if f.risk is RiskTier.SUSPICIOUS)
139
+ hallucinated = sum(1 for f in findings if f.risk is RiskTier.HALLUCINATED)
140
+ errors = sum(1 for f in findings if f.risk is RiskTier.ERROR)
141
+ return ScanSummary(
142
+ total=len(findings),
143
+ clean=clean,
144
+ suspicious=suspicious,
145
+ hallucinated=hallucinated,
146
+ errors=errors,
147
+ )
148
+
149
+
150
+ def _ignored_finding(dep: Dependency) -> Finding:
151
+ return Finding(
152
+ name=dep.name,
153
+ version=dep.version,
154
+ ecosystem=dep.ecosystem,
155
+ manifest=dep.manifest,
156
+ risk=RiskTier.CLEAN,
157
+ score=0.0,
158
+ signals=[
159
+ Signal(
160
+ type="ignored_by_config",
161
+ weight=0.0,
162
+ detail="Matched an .slopguard.yaml ignore rule.",
163
+ )
164
+ ],
165
+ remediation="No action required (ignored).",
166
+ )
167
+
168
+
169
+ async def _scan_async(
170
+ target: Path,
171
+ cfg: ResolvedConfig,
172
+ *,
173
+ verbose: bool,
174
+ ) -> tuple[ScanReport, float]:
175
+ started = time.monotonic()
176
+ manifests = discover_manifests(target)
177
+ if not manifests:
178
+ raise _error_exit("No supported manifests found at the given path.")
179
+
180
+ parsed: list[tuple[ManifestInfo, list[Dependency]]] = []
181
+ for m in manifests:
182
+ parser_cls = MANIFEST_PARSERS[m.name]
183
+ parser = parser_cls()
184
+ try:
185
+ deps = parser.parse(m)
186
+ except ParserError as exc:
187
+ raise _error_exit(f"Could not parse {m}: {exc}") from exc
188
+ ecosystem = _ecosystem_for(m.name)
189
+ rel = m.relative_to(target) if target.is_dir() else Path(m.name)
190
+ manifest_info = ManifestInfo(path=str(rel), ecosystem=ecosystem, dependency_count=len(deps))
191
+ # Rewrite each Dependency's manifest path to be the relative one (parsers default to basename).
192
+ deps = [d.model_copy(update={"manifest": str(rel)}) for d in deps]
193
+ parsed.append((manifest_info, deps))
194
+
195
+ all_deps = [d for _, deps in parsed for d in deps]
196
+ npm_client, pypi_client = _build_clients(cfg)
197
+
198
+ try:
199
+ engine = ScoringEngine(
200
+ npm_client=npm_client,
201
+ pypi_client=pypi_client,
202
+ config=ScoringConfig(
203
+ suspicious_min=cfg.suspicious_min,
204
+ hallucinated_min=cfg.hallucinated_min,
205
+ verbose=verbose,
206
+ ),
207
+ no_network=not cfg.network_enabled,
208
+ concurrency=cfg.concurrency,
209
+ )
210
+ # Pre-split deps into ignored vs to-score.
211
+ to_score: list[Dependency] = []
212
+ ignored: list[Finding] = []
213
+ for dep in all_deps:
214
+ if cfg.ignore.matches(dep.name):
215
+ ignored.append(_ignored_finding(dep))
216
+ else:
217
+ to_score.append(dep)
218
+ scored = await engine.score_all(to_score)
219
+ finally:
220
+ if npm_client is not None:
221
+ await npm_client.aclose()
222
+ if pypi_client is not None:
223
+ await pypi_client.aclose()
224
+
225
+ findings = ignored + scored
226
+ summary = _summarise(findings)
227
+ exit_code = 1 if _should_fail(summary, cfg.fail_on) else 0
228
+ report = ScanReport(
229
+ slopguard_version=__version__,
230
+ scan_id=str(uuid.uuid4()),
231
+ scanned_at=datetime.now(UTC),
232
+ path=str(target.resolve()),
233
+ manifests=[mi for mi, _ in parsed],
234
+ summary=summary,
235
+ findings=findings,
236
+ exit_code=exit_code, # type: ignore[arg-type] # Literal[0,1,2] — validated by Pydantic
237
+ )
238
+ return report, time.monotonic() - started
239
+
240
+
241
+ def _error_exit(message: str) -> typer.Exit:
242
+ Console(stderr=True).print(f"[red]error:[/red] {message}")
243
+ return typer.Exit(code=2)
244
+
245
+
246
+ @app.command("scan")
247
+ def scan_cmd(
248
+ path: Annotated[
249
+ Path | None, typer.Argument(help="Project directory or manifest file. Defaults to CWD.")
250
+ ] = None,
251
+ format_: Annotated[
252
+ OutputFormat, typer.Option("--format", help="Output format.")
253
+ ] = OutputFormat.TERMINAL,
254
+ output: Annotated[
255
+ Path | None,
256
+ typer.Option("--output", help="Write JSON output to a file (--format=json only)."),
257
+ ] = None,
258
+ config: Annotated[
259
+ Path | None, typer.Option("--config", help="Path to .slopguard.yaml.")
260
+ ] = None,
261
+ fail_on: Annotated[
262
+ FailOn | None, typer.Option("--fail-on", help="Risk level that causes a non-zero exit.")
263
+ ] = None,
264
+ no_network: Annotated[bool, typer.Option("--no-network", help="Skip registry probes.")] = False,
265
+ timeout: Annotated[
266
+ float | None,
267
+ typer.Option("--timeout", help="Per-request timeout for registry probes (seconds)."),
268
+ ] = None,
269
+ concurrency: Annotated[
270
+ int | None, typer.Option("--concurrency", help="Maximum concurrent registry probes.")
271
+ ] = None,
272
+ verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Show debug logs.")] = False,
273
+ ) -> None:
274
+ """Scan a project for slopsquatted / hallucinated dependencies."""
275
+ if verbose:
276
+ logging.basicConfig(level=logging.DEBUG, format="%(levelname)s %(name)s: %(message)s")
277
+
278
+ target = (path or Path.cwd()).expanduser()
279
+ if not target.exists():
280
+ raise _error_exit(f"path does not exist: {target}")
281
+
282
+ config_path = config or default_config_path(target if target.is_dir() else target.parent)
283
+ try:
284
+ file_cfg = load_file_config(config_path)
285
+ resolved = resolve(
286
+ file_cfg,
287
+ cli_fail_on=fail_on.value if fail_on is not None else None,
288
+ cli_no_network=no_network,
289
+ cli_timeout=timeout,
290
+ cli_concurrency=concurrency,
291
+ )
292
+ except ConfigError as exc:
293
+ raise _error_exit(str(exc)) from exc
294
+
295
+ if format_ is OutputFormat.JSON and output is None and verbose:
296
+ logger.warning("--format=json without --output; printing to stdout")
297
+
298
+ report, duration = asyncio.run(_scan_async(target, resolved, verbose=verbose))
299
+
300
+ if format_ is OutputFormat.JSON:
301
+ text = write_json_report(report, path=output)
302
+ if output is None:
303
+ sys.stdout.write(text)
304
+ else:
305
+ render_terminal_report(report, duration_seconds=duration)
306
+
307
+ raise typer.Exit(code=report.exit_code)
308
+
309
+
310
+ @app.command("version")
311
+ def version_cmd() -> None:
312
+ """Print the SlopGuard version and exit 0."""
313
+ print(__version__)
314
+
315
+
316
+ @app.command("update", hidden=True)
317
+ def update_cmd() -> None:
318
+ """(Stub) refresh the embedded hallucination database. See TODO(v0.2)."""
319
+ from slopguard.update import run
320
+
321
+ raise typer.Exit(code=run())
slopguard/config.py ADDED
@@ -0,0 +1,139 @@
1
+ """``.slopguard.yaml`` loader and merged config."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+ from pydantic import BaseModel, Field, ValidationError
12
+
13
+ from slopguard.scoring.engine import DEFAULT_HALLUCINATED_MIN, DEFAULT_SUSPICIOUS_MIN
14
+
15
+
16
+ class IgnoreConfig(BaseModel):
17
+ packages: list[str] = Field(default_factory=list)
18
+ patterns: list[str] = Field(default_factory=list)
19
+
20
+
21
+ class NetworkConfig(BaseModel):
22
+ enabled: bool = True
23
+ timeout_seconds: float = 5.0
24
+ concurrency: int = 16
25
+
26
+
27
+ class ScoringFileConfig(BaseModel):
28
+ suspicious_min_score: float = DEFAULT_SUSPICIOUS_MIN
29
+ hallucinated_min_score: float = DEFAULT_HALLUCINATED_MIN
30
+
31
+
32
+ class FileConfig(BaseModel):
33
+ """The schema of ``.slopguard.yaml``. All fields optional."""
34
+
35
+ ignore: IgnoreConfig = Field(default_factory=IgnoreConfig)
36
+ fail_on: str = "suspicious"
37
+ network: NetworkConfig = Field(default_factory=NetworkConfig)
38
+ scoring: ScoringFileConfig = Field(default_factory=ScoringFileConfig)
39
+
40
+
41
+ class ConfigError(Exception):
42
+ """Raised when ``.slopguard.yaml`` is malformed."""
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class IgnoreRules:
47
+ names: frozenset[str]
48
+ patterns: tuple[re.Pattern[str], ...]
49
+
50
+ def matches(self, name: str) -> bool:
51
+ if name in self.names:
52
+ return True
53
+ return any(p.search(name) for p in self.patterns)
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class ResolvedConfig:
58
+ """Merged config used by the CLI. CLI flags > file > defaults."""
59
+
60
+ fail_on: str
61
+ network_enabled: bool
62
+ timeout_seconds: float
63
+ concurrency: int
64
+ suspicious_min: float
65
+ hallucinated_min: float
66
+ ignore: IgnoreRules = field(default_factory=lambda: IgnoreRules(frozenset(), ()))
67
+
68
+
69
+ def load_file_config(path: Path | None) -> FileConfig:
70
+ """Load the YAML config. Returns defaults if ``path`` is None or missing."""
71
+ if path is None or not path.exists():
72
+ return FileConfig()
73
+ try:
74
+ raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
75
+ except yaml.YAMLError as exc:
76
+ raise ConfigError(f"invalid YAML in {path}: {exc}") from exc
77
+ if not isinstance(raw, dict):
78
+ raise ConfigError(f"expected a YAML mapping at top level of {path}")
79
+ try:
80
+ return FileConfig.model_validate(raw)
81
+ except ValidationError as exc:
82
+ raise ConfigError(f"invalid .slopguard.yaml schema: {exc}") from exc
83
+
84
+
85
+ def resolve(
86
+ file_cfg: FileConfig,
87
+ *,
88
+ cli_fail_on: str | None,
89
+ cli_no_network: bool,
90
+ cli_timeout: float | None,
91
+ cli_concurrency: int | None,
92
+ ) -> ResolvedConfig:
93
+ fail_on = cli_fail_on or file_cfg.fail_on
94
+ if fail_on not in {"any", "hallucinated", "suspicious", "none"}:
95
+ raise ConfigError(f"invalid fail_on value: {fail_on!r}")
96
+ network_enabled = file_cfg.network.enabled and not cli_no_network
97
+ timeout = cli_timeout if cli_timeout is not None else file_cfg.network.timeout_seconds
98
+ concurrency = cli_concurrency if cli_concurrency is not None else file_cfg.network.concurrency
99
+ ignore = IgnoreRules(
100
+ names=frozenset(file_cfg.ignore.packages),
101
+ patterns=tuple(_compile_patterns(file_cfg.ignore.patterns)),
102
+ )
103
+ return ResolvedConfig(
104
+ fail_on=fail_on,
105
+ network_enabled=network_enabled,
106
+ timeout_seconds=timeout,
107
+ concurrency=concurrency,
108
+ suspicious_min=file_cfg.scoring.suspicious_min_score,
109
+ hallucinated_min=file_cfg.scoring.hallucinated_min_score,
110
+ ignore=ignore,
111
+ )
112
+
113
+
114
+ def _compile_patterns(patterns: list[str]) -> list[re.Pattern[str]]:
115
+ compiled: list[re.Pattern[str]] = []
116
+ for raw in patterns:
117
+ try:
118
+ compiled.append(re.compile(raw))
119
+ except re.error as exc:
120
+ raise ConfigError(f"invalid ignore pattern {raw!r}: {exc}") from exc
121
+ return compiled
122
+
123
+
124
+ def default_config_path(start: Path) -> Path | None:
125
+ """Search for ``.slopguard.yaml`` in ``start`` and its parents (up to 3 levels)."""
126
+ cur = start.resolve()
127
+ for _ in range(3):
128
+ candidate = cur / ".slopguard.yaml"
129
+ if candidate.exists():
130
+ return candidate
131
+ if cur.parent == cur:
132
+ break
133
+ cur = cur.parent
134
+ return None
135
+
136
+
137
+ def _ensure_yaml_safe(data: Any) -> None: # pragma: no cover — diagnostic only
138
+ """Belt-and-braces sanity check used by tests."""
139
+ yaml.safe_dump(data)
@@ -0,0 +1,40 @@
1
+ """Embedded data: hallucination seed DB + popular-package lists."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from functools import lru_cache
7
+ from importlib.resources import files
8
+
9
+ from slopguard.models import Ecosystem, HallucinationDB, HallucinationEntry, PopularPackages
10
+
11
+
12
+ @lru_cache(maxsize=1)
13
+ def load_hallucination_db() -> HallucinationDB:
14
+ """Load and validate the embedded hallucination seed database."""
15
+ raw = files("slopguard.data").joinpath("hallucinations_seed.json").read_text(encoding="utf-8")
16
+ payload = json.loads(raw)
17
+ # Strip non-schema keys (operator notes, etc.) before validating.
18
+ keep = {"schema_version", "updated", "entries"}
19
+ cleaned = {k: v for k, v in payload.items() if k in keep}
20
+ return HallucinationDB.model_validate(cleaned)
21
+
22
+
23
+ @lru_cache(maxsize=1)
24
+ def load_popular_packages() -> PopularPackages:
25
+ """Load the embedded top-1000 popularity lists."""
26
+ raw = files("slopguard.data").joinpath("popular_packages.json").read_text(encoding="utf-8")
27
+ return PopularPackages.model_validate_json(raw)
28
+
29
+
30
+ def hallucination_index() -> dict[tuple[Ecosystem, str], HallucinationEntry]:
31
+ """Return a lookup of (ecosystem, lowercased name) → seed entry."""
32
+ db = load_hallucination_db()
33
+ return {(entry.ecosystem, entry.name.lower()): entry for entry in db.entries}
34
+
35
+
36
+ def popular_set(ecosystem: Ecosystem) -> frozenset[str]:
37
+ """Return the lowercased popular-package set for an ecosystem."""
38
+ pkgs = load_popular_packages()
39
+ names = pkgs.npm_top_1000 if ecosystem is Ecosystem.NPM else pkgs.pypi_top_1000
40
+ return frozenset(name.lower() for name in names)