PyPI - slopguard-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

slopguard-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

slopguard/__init__.py +7 -0
slopguard/__main__.py +13 -0
slopguard/cli.py +321 -0
slopguard/config.py +139 -0
slopguard/data/__init__.py +40 -0
slopguard/data/hallucinations_seed.json +5603 -0
slopguard/data/popular_packages.json +2007 -0
slopguard/models.py +133 -0
slopguard/parsers/__init__.py +9 -0
slopguard/parsers/base.py +28 -0
slopguard/parsers/npm.py +146 -0
slopguard/parsers/python.py +269 -0
slopguard/registry/__init__.py +14 -0
slopguard/registry/base.py +107 -0
slopguard/registry/npm.py +78 -0
slopguard/registry/pypi.py +99 -0
slopguard/report/__init__.py +8 -0
slopguard/report/json.py +17 -0
slopguard/report/terminal.py +87 -0
slopguard/scoring/__init__.py +7 -0
slopguard/scoring/engine.py +235 -0
slopguard/scoring/signals.py +183 -0
slopguard/update.py +15 -0
slopguard_cli-0.1.0.dist-info/METADATA +197 -0
slopguard_cli-0.1.0.dist-info/RECORD +28 -0
slopguard_cli-0.1.0.dist-info/WHEEL +4 -0
slopguard_cli-0.1.0.dist-info/entry_points.txt +2 -0
slopguard_cli-0.1.0.dist-info/licenses/LICENSE +21 -0

slopguard/models.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""Pydantic models — wire contract for the JSON report and internal types."""
+from __future__ import annotations
+from datetime import datetime
+from enum import StrEnum
+from typing import Literal
+from pydantic import BaseModel, ConfigDict, Field
+class Ecosystem(StrEnum):
+    """Supported package ecosystems."""
+    NPM = "npm"
+    PYPI = "pypi"
+class RiskTier(StrEnum):
+    """Risk tier emitted for each scanned dependency.
+    Ordering (low → high): ``clean`` < ``error`` < ``suspicious`` < ``hallucinated``.
+    ``error`` sits below ``suspicious`` because it usually reflects a transient
+    network or parse problem rather than evidence of risk.
+    """
+    CLEAN = "clean"
+    ERROR = "error"
+    SUSPICIOUS = "suspicious"
+    HALLUCINATED = "hallucinated"
+class DependencySource(StrEnum):
+    """Where a dependency reference came from."""
+    REGISTRY = "registry"
+    FILE = "file"
+    LINK = "link"
+    GIT = "git"
+    URL = "url"
+class Dependency(BaseModel):
+    """A single dependency entry parsed from a manifest."""
+    model_config = ConfigDict(frozen=True)
+    name: str
+    version: str | None = None
+    ecosystem: Ecosystem
+    manifest: str
+    source: DependencySource = DependencySource.REGISTRY
+    scoped: bool = False
+class Signal(BaseModel):
+    """One contributing signal toward a finding's risk score."""
+    type: str
+    weight: float = Field(ge=0.0, le=1.0)
+    detail: str
+class Finding(BaseModel):
+    """The result of scoring a single dependency."""
+    name: str
+    version: str | None = None
+    ecosystem: Ecosystem
+    manifest: str
+    risk: RiskTier
+    score: float = Field(ge=0.0, le=1.0)
+    signals: list[Signal] = Field(default_factory=list)
+    remediation: str
+    error: str | None = None
+class ManifestInfo(BaseModel):
+    """Summary of a single manifest scanned during this run."""
+    path: str
+    ecosystem: Ecosystem
+    dependency_count: int = Field(ge=0)
+class ScanSummary(BaseModel):
+    """Aggregate counts over all findings in a scan."""
+    total: int = Field(ge=0)
+    clean: int = Field(ge=0)
+    suspicious: int = Field(ge=0)
+    hallucinated: int = Field(ge=0)
+    errors: int = Field(ge=0)
+class ScanReport(BaseModel):
+    """Top-level scan report. This is the JSON wire format — treat as a public API."""
+    slopguard_version: str
+    scan_id: str
+    scanned_at: datetime
+    path: str
+    manifests: list[ManifestInfo]
+    summary: ScanSummary
+    findings: list[Finding]
+    exit_code: Literal[0, 1, 2]
+class HallucinationEntry(BaseModel):
+    """One row in the embedded hallucination database."""
+    name: str
+    ecosystem: Ecosystem
+    first_seen: str
+    recurrence_rate: float = Field(ge=0.0, le=1.0)
+    models_observed: list[str]
+    notes: str
+class HallucinationDB(BaseModel):
+    """Loaded hallucination database."""
+    schema_version: int
+    updated: str
+    entries: list[HallucinationEntry]
+class PopularPackages(BaseModel):
+    """Top-N popularity lists per ecosystem (used for Levenshtein typosquat checks)."""
+    schema_version: int
+    npm_top_1000: list[str]
+    pypi_top_1000: list[str]

slopguard/parsers/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Manifest parsers for npm and Python ecosystems."""
+from __future__ import annotations
+from slopguard.parsers.base import Parser, ParserError
+from slopguard.parsers.npm import NpmParser
+from slopguard.parsers.python import PythonParser
+__all__ = ["NpmParser", "Parser", "ParserError", "PythonParser"]

slopguard/parsers/base.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Parser base class."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from pathlib import Path
+from slopguard.models import Dependency
+class ParserError(Exception):
+    """Raised when a manifest cannot be parsed."""
+class Parser(ABC):
+    """Abstract parser for a single manifest file."""
+    @classmethod
+    @abstractmethod
+    def supported_filenames(cls) -> tuple[str, ...]:
+        """Filenames (basename match) this parser handles."""
+    @abstractmethod
+    def parse(self, path: Path) -> list[Dependency]:
+        """Parse the manifest at ``path`` and return dependencies.
+        Raises ``ParserError`` if the file is malformed or unreadable.
+        """

slopguard/parsers/npm.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""npm manifest parsers: ``package.json`` and ``package-lock.json``."""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any
+from slopguard.models import Dependency, DependencySource, Ecosystem
+from slopguard.parsers.base import Parser, ParserError
+def _classify_npm_spec(spec: str) -> DependencySource:
+    """Classify the source of an npm dependency spec string."""
+    if spec.startswith(("file:", "link:")):
+        return DependencySource.FILE if spec.startswith("file:") else DependencySource.LINK
+    if spec.startswith(("git:", "git+", "github:")) or "git@" in spec:
+        return DependencySource.GIT
+    if spec.startswith(("http://", "https://")):
+        return DependencySource.URL
+    return DependencySource.REGISTRY
+class NpmParser(Parser):
+    """Parse ``package.json`` and ``package-lock.json``."""
+    @classmethod
+    def supported_filenames(cls) -> tuple[str, ...]:
+        return ("package.json", "package-lock.json")
+    def parse(self, path: Path) -> list[Dependency]:
+        try:
+            with path.open("rb") as fh:
+                data = json.load(fh)
+        except FileNotFoundError as exc:
+            raise ParserError(f"manifest not found: {path}") from exc
+        except json.JSONDecodeError as exc:
+            raise ParserError(f"invalid JSON in {path}: {exc.msg}") from exc
+        if not isinstance(data, dict):
+            raise ParserError(f"expected JSON object at top level of {path}")
+        if path.name == "package-lock.json":
+            return self._parse_lockfile(data, path)
+        return self._parse_manifest(data, path)
+    def _parse_manifest(self, data: dict[str, Any], path: Path) -> list[Dependency]:
+        deps: list[Dependency] = []
+        manifest_rel = path.name
+        for section in (
+            "dependencies",
+            "devDependencies",
+            "optionalDependencies",
+            "peerDependencies",
+        ):
+            block = data.get(section)
+            if not isinstance(block, dict):
+                continue
+            for name, spec in block.items():
+                if not isinstance(name, str):
+                    continue
+                spec_str = spec if isinstance(spec, str) else ""
+                source = _classify_npm_spec(spec_str)
+                version = spec_str if source is DependencySource.REGISTRY else None
+                deps.append(
+                    Dependency(
+                        name=name,
+                        version=version,
+                        ecosystem=Ecosystem.NPM,
+                        manifest=manifest_rel,
+                        source=source,
+                        scoped=name.startswith("@"),
+                    )
+                )
+        return deps
+    def _parse_lockfile(self, data: dict[str, Any], path: Path) -> list[Dependency]:
+        deps: list[Dependency] = []
+        manifest_rel = path.name
+        seen: set[tuple[str, str | None]] = set()
+        # lockfile v2/v3 puts everything under "packages" keyed by node_modules path.
+        packages = data.get("packages")
+        if isinstance(packages, dict):
+            for node_path, meta in packages.items():
+                if not node_path or not isinstance(meta, dict):
+                    continue
+                # The empty-key entry describes the root project — skip it.
+                name = meta.get("name") or self._name_from_node_path(node_path)
+                if not name:
+                    continue
+                version = meta.get("version")
+                if not isinstance(version, str):
+                    version = None
+                source = DependencySource.REGISTRY
+                if meta.get("link"):
+                    source = DependencySource.LINK
+                elif meta.get("resolved", "").startswith("git"):
+                    source = DependencySource.GIT
+                key = (name, version)
+                if key in seen:
+                    continue
+                seen.add(key)
+                deps.append(
+                    Dependency(
+                        name=name,
+                        version=version,
+                        ecosystem=Ecosystem.NPM,
+                        manifest=manifest_rel,
+                        source=source,
+                        scoped=name.startswith("@"),
+                    )
+                )
+            return deps
+        # lockfile v1 stored deps under "dependencies".
+        legacy = data.get("dependencies")
+        if isinstance(legacy, dict):
+            for name, meta in legacy.items():
+                if not isinstance(meta, dict):
+                    continue
+                version = meta.get("version") if isinstance(meta.get("version"), str) else None
+                key = (name, version)
+                if key in seen:
+                    continue
+                seen.add(key)
+                deps.append(
+                    Dependency(
+                        name=name,
+                        version=version,
+                        ecosystem=Ecosystem.NPM,
+                        manifest=manifest_rel,
+                        source=DependencySource.REGISTRY,
+                        scoped=name.startswith("@"),
+                    )
+                )
+        return deps
+    @staticmethod
+    def _name_from_node_path(node_path: str) -> str | None:
+        # "node_modules/foo" -> "foo"; "node_modules/@scope/bar" -> "@scope/bar".
+        marker = "node_modules/"
+        idx = node_path.rfind(marker)
+        if idx < 0:
+            return None
+        return node_path[idx + len(marker) :] or None

slopguard/parsers/python.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""Python manifest parsers: ``requirements.txt``, ``pyproject.toml``, ``Pipfile``."""
+from __future__ import annotations
+import re
+import tomllib
+from pathlib import Path
+from typing import Any
+from slopguard.models import Dependency, DependencySource, Ecosystem
+from slopguard.parsers.base import Parser, ParserError
+# PEP 508 name + optional version specifier prefix.
+_NAME_RE = re.compile(r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)")
+# Strip extras like "package[extra,extra2]"
+_EXTRAS_RE = re.compile(r"\[[^\]]*\]")
+# Match any version specifier portion (==, >=, <=, ~=, !=, <, >).
+_SPEC_RE = re.compile(
+    r"(==|>=|<=|~=|!=|<|>)\s*[^,;\s]+(?:\s*,\s*(?:==|>=|<=|~=|!=|<|>)\s*[^,;\s]+)*"
+)
+def _parse_requirement_line(line: str) -> tuple[str, str | None, DependencySource] | None:
+    """Parse a single requirements.txt line. Returns (name, version, source) or None to skip."""
+    stripped = line.strip()
+    if not stripped or stripped.startswith("#"):
+        return None
+    # Skip pip flags like -r other.txt, -c constraints.txt, --index-url ...
+    if stripped.startswith("-"):
+        if stripped.startswith(("-e ", "-e\t")):
+            # Editable install — usually a local path or VCS URL. Treat as file/git for v0.1.
+            target = stripped[2:].strip()
+            return _classify_editable(target)
+        return None
+    # Inline editable: `-e .` already handled above. Direct URL requirements:
+    if "://" in stripped or stripped.startswith(("git+", "hg+", "svn+", "bzr+")):
+        # Direct VCS / URL install. Try to pull out an egg= fragment for the name.
+        name = _name_from_url(stripped)
+        if name is None:
+            return None
+        return (name, None, DependencySource.GIT if "git" in stripped else DependencySource.URL)
+    # Local path: starts with "./" or "/" or contains ".whl" / ".tar.gz" at end.
+    if stripped.startswith(("./", "/", "../")) or stripped.endswith((".whl", ".tar.gz", ".zip")):
+        # No reliable name without inspecting the file — skip.
+        return None
+    # Strip environment markers (";python_version>='3.10'") and inline comments.
+    bare = stripped.split(";", 1)[0].split("#", 1)[0].strip()
+    bare = _EXTRAS_RE.sub("", bare)
+    name_match = _NAME_RE.match(bare)
+    if not name_match:
+        return None
+    name = name_match.group(1)
+    rest = bare[name_match.end() :].strip()
+    spec_match = _SPEC_RE.search(rest)
+    version = spec_match.group(0).strip() if spec_match else None
+    return (name, version, DependencySource.REGISTRY)
+def _classify_editable(target: str) -> tuple[str, str | None, DependencySource] | None:
+    if "://" in target or target.startswith(("git+", "hg+", "svn+")):
+        name = _name_from_url(target)
+        if not name:
+            return None
+        return (name, None, DependencySource.GIT)
+    # Local editable install — name is the directory's package name, which we cannot infer
+    # without reading setup.cfg / pyproject. Skip.
+    return None
+def _name_from_url(url: str) -> str | None:
+    if "#egg=" in url:
+        return url.split("#egg=")[-1].split("&", 1)[0]
+    return None
+class PythonParser(Parser):
+    """Parse Python manifests: ``requirements.txt``, ``pyproject.toml``, ``Pipfile``."""
+    @classmethod
+    def supported_filenames(cls) -> tuple[str, ...]:
+        return ("requirements.txt", "pyproject.toml", "Pipfile")
+    def parse(self, path: Path) -> list[Dependency]:
+        try:
+            blob = path.read_bytes()
+        except FileNotFoundError as exc:
+            raise ParserError(f"manifest not found: {path}") from exc
+        if path.name == "requirements.txt":
+            return self._parse_requirements_txt(blob.decode("utf-8", errors="replace"), path)
+        if path.name == "pyproject.toml":
+            return self._parse_pyproject(blob, path)
+        if path.name == "Pipfile":
+            return self._parse_pipfile(blob, path)
+        raise ParserError(f"unsupported Python manifest: {path.name}")
+    def _parse_requirements_txt(self, text: str, path: Path) -> list[Dependency]:
+        deps: list[Dependency] = []
+        seen: set[tuple[str, str | None]] = set()
+        for raw in text.splitlines():
+            # Handle line continuations.
+            line = raw.rstrip("\\").rstrip()
+            parsed = _parse_requirement_line(line)
+            if parsed is None:
+                continue
+            name, version, source = parsed
+            key = (name.lower(), version)
+            if key in seen:
+                continue
+            seen.add(key)
+            deps.append(
+                Dependency(
+                    name=name,
+                    version=version,
+                    ecosystem=Ecosystem.PYPI,
+                    manifest=path.name,
+                    source=source,
+                    scoped=False,
+                )
+            )
+        return deps
+    def _parse_pyproject(self, blob: bytes, path: Path) -> list[Dependency]:
+        try:
+            data = tomllib.loads(blob.decode("utf-8"))
+        except tomllib.TOMLDecodeError as exc:
+            raise ParserError(f"invalid TOML in {path}: {exc}") from exc
+        if not isinstance(data, dict):
+            raise ParserError(f"expected TOML table at top level of {path}")
+        deps: list[Dependency] = []
+        seen: set[str] = set()
+        # PEP 621: [project] dependencies, [project.optional-dependencies]
+        project = data.get("project")
+        if isinstance(project, dict):
+            for entry in project.get("dependencies", []) or []:
+                if not isinstance(entry, str):
+                    continue
+                parsed = _parse_requirement_line(entry)
+                if parsed is None:
+                    continue
+                name, version, source = parsed
+                if name.lower() in seen:
+                    continue
+                seen.add(name.lower())
+                deps.append(
+                    Dependency(
+                        name=name,
+                        version=version,
+                        ecosystem=Ecosystem.PYPI,
+                        manifest=path.name,
+                        source=source,
+                    )
+                )
+            opt = project.get("optional-dependencies")
+            if isinstance(opt, dict):
+                for group in opt.values():
+                    if not isinstance(group, list):
+                        continue
+                    for entry in group:
+                        if not isinstance(entry, str):
+                            continue
+                        parsed = _parse_requirement_line(entry)
+                        if parsed is None:
+                            continue
+                        name, version, source = parsed
+                        if name.lower() in seen:
+                            continue
+                        seen.add(name.lower())
+                        deps.append(
+                            Dependency(
+                                name=name,
+                                version=version,
+                                ecosystem=Ecosystem.PYPI,
+                                manifest=path.name,
+                                source=source,
+                            )
+                        )
+        # Poetry: [tool.poetry.dependencies] and [tool.poetry.group.<name>.dependencies]
+        tool = data.get("tool")
+        if isinstance(tool, dict):
+            poetry = tool.get("poetry")
+            if isinstance(poetry, dict):
+                deps.extend(self._collect_poetry(poetry.get("dependencies"), path, seen))
+                groups = poetry.get("group")
+                if isinstance(groups, dict):
+                    for group_meta in groups.values():
+                        if isinstance(group_meta, dict):
+                            deps.extend(
+                                self._collect_poetry(group_meta.get("dependencies"), path, seen)
+                            )
+        return deps
+    def _collect_poetry(
+        self,
+        block: Any,
+        path: Path,
+        seen: set[str],
+    ) -> list[Dependency]:
+        out: list[Dependency] = []
+        if not isinstance(block, dict):
+            return out
+        for name, spec in block.items():
+            if not isinstance(name, str):
+                continue
+            if name.lower() == "python":
+                continue
+            if name.lower() in seen:
+                continue
+            seen.add(name.lower())
+            version, source = self._classify_poetry_spec(spec)
+            out.append(
+                Dependency(
+                    name=name,
+                    version=version,
+                    ecosystem=Ecosystem.PYPI,
+                    manifest=path.name,
+                    source=source,
+                )
+            )
+        return out
+    @staticmethod
+    def _classify_poetry_spec(spec: Any) -> tuple[str | None, DependencySource]:
+        if isinstance(spec, str):
+            return (spec, DependencySource.REGISTRY)
+        if isinstance(spec, dict):
+            if "path" in spec:
+                return (None, DependencySource.FILE)
+            if "url" in spec:
+                return (None, DependencySource.URL)
+            if "git" in spec:
+                return (None, DependencySource.GIT)
+            ver = spec.get("version")
+            return (ver if isinstance(ver, str) else None, DependencySource.REGISTRY)
+        return (None, DependencySource.REGISTRY)
+    def _parse_pipfile(self, blob: bytes, path: Path) -> list[Dependency]:
+        try:
+            data = tomllib.loads(blob.decode("utf-8"))
+        except tomllib.TOMLDecodeError as exc:
+            raise ParserError(f"invalid TOML in {path}: {exc}") from exc
+        deps: list[Dependency] = []
+        seen: set[str] = set()
+        for section in ("packages", "dev-packages"):
+            block = data.get(section)
+            if not isinstance(block, dict):
+                continue
+            for name, spec in block.items():
+                if not isinstance(name, str) or name.lower() in seen:
+                    continue
+                seen.add(name.lower())
+                version, source = PythonParser._classify_poetry_spec(spec)
+                if version == "*":
+                    version = None
+                deps.append(
+                    Dependency(
+                        name=name,
+                        version=version,
+                        ecosystem=Ecosystem.PYPI,
+                        manifest=path.name,
+                        source=source,
+                    )
+                )
+        return deps

slopguard/registry/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Registry clients for npm and PyPI."""
+from __future__ import annotations
+from slopguard.registry.base import RegistryClient, RegistryError
+from slopguard.registry.npm import NpmRegistryClient
+from slopguard.registry.pypi import PypiRegistryClient
+__all__ = [
+    "NpmRegistryClient",
+    "PypiRegistryClient",
+    "RegistryClient",
+    "RegistryError",
+]