slopguard-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
slopguard/models.py ADDED
@@ -0,0 +1,133 @@
1
+ """Pydantic models — wire contract for the JSON report and internal types."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from enum import StrEnum
7
+ from typing import Literal
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field
10
+
11
+
12
+ class Ecosystem(StrEnum):
13
+ """Supported package ecosystems."""
14
+
15
+ NPM = "npm"
16
+ PYPI = "pypi"
17
+
18
+
19
+ class RiskTier(StrEnum):
20
+ """Risk tier emitted for each scanned dependency.
21
+
22
+ Ordering (low → high): ``clean`` < ``error`` < ``suspicious`` < ``hallucinated``.
23
+ ``error`` sits below ``suspicious`` because it usually reflects a transient
24
+ network or parse problem rather than evidence of risk.
25
+ """
26
+
27
+ CLEAN = "clean"
28
+ ERROR = "error"
29
+ SUSPICIOUS = "suspicious"
30
+ HALLUCINATED = "hallucinated"
31
+
32
+
33
+ class DependencySource(StrEnum):
34
+ """Where a dependency reference came from."""
35
+
36
+ REGISTRY = "registry"
37
+ FILE = "file"
38
+ LINK = "link"
39
+ GIT = "git"
40
+ URL = "url"
41
+
42
+
43
+ class Dependency(BaseModel):
44
+ """A single dependency entry parsed from a manifest."""
45
+
46
+ model_config = ConfigDict(frozen=True)
47
+
48
+ name: str
49
+ version: str | None = None
50
+ ecosystem: Ecosystem
51
+ manifest: str
52
+ source: DependencySource = DependencySource.REGISTRY
53
+ scoped: bool = False
54
+
55
+
56
+ class Signal(BaseModel):
57
+ """One contributing signal toward a finding's risk score."""
58
+
59
+ type: str
60
+ weight: float = Field(ge=0.0, le=1.0)
61
+ detail: str
62
+
63
+
64
+ class Finding(BaseModel):
65
+ """The result of scoring a single dependency."""
66
+
67
+ name: str
68
+ version: str | None = None
69
+ ecosystem: Ecosystem
70
+ manifest: str
71
+ risk: RiskTier
72
+ score: float = Field(ge=0.0, le=1.0)
73
+ signals: list[Signal] = Field(default_factory=list)
74
+ remediation: str
75
+ error: str | None = None
76
+
77
+
78
+ class ManifestInfo(BaseModel):
79
+ """Summary of a single manifest scanned during this run."""
80
+
81
+ path: str
82
+ ecosystem: Ecosystem
83
+ dependency_count: int = Field(ge=0)
84
+
85
+
86
+ class ScanSummary(BaseModel):
87
+ """Aggregate counts over all findings in a scan."""
88
+
89
+ total: int = Field(ge=0)
90
+ clean: int = Field(ge=0)
91
+ suspicious: int = Field(ge=0)
92
+ hallucinated: int = Field(ge=0)
93
+ errors: int = Field(ge=0)
94
+
95
+
96
+ class ScanReport(BaseModel):
97
+ """Top-level scan report. This is the JSON wire format — treat as a public API."""
98
+
99
+ slopguard_version: str
100
+ scan_id: str
101
+ scanned_at: datetime
102
+ path: str
103
+ manifests: list[ManifestInfo]
104
+ summary: ScanSummary
105
+ findings: list[Finding]
106
+ exit_code: Literal[0, 1, 2]
107
+
108
+
109
+ class HallucinationEntry(BaseModel):
110
+ """One row in the embedded hallucination database."""
111
+
112
+ name: str
113
+ ecosystem: Ecosystem
114
+ first_seen: str
115
+ recurrence_rate: float = Field(ge=0.0, le=1.0)
116
+ models_observed: list[str]
117
+ notes: str
118
+
119
+
120
+ class HallucinationDB(BaseModel):
121
+ """Loaded hallucination database."""
122
+
123
+ schema_version: int
124
+ updated: str
125
+ entries: list[HallucinationEntry]
126
+
127
+
128
+ class PopularPackages(BaseModel):
129
+ """Top-N popularity lists per ecosystem (used for Levenshtein typosquat checks)."""
130
+
131
+ schema_version: int
132
+ npm_top_1000: list[str]
133
+ pypi_top_1000: list[str]
@@ -0,0 +1,9 @@
1
+ """Manifest parsers for npm and Python ecosystems."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from slopguard.parsers.base import Parser, ParserError
6
+ from slopguard.parsers.npm import NpmParser
7
+ from slopguard.parsers.python import PythonParser
8
+
9
+ __all__ = ["NpmParser", "Parser", "ParserError", "PythonParser"]
@@ -0,0 +1,28 @@
1
+ """Parser base class."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+
8
+ from slopguard.models import Dependency
9
+
10
+
11
+ class ParserError(Exception):
12
+ """Raised when a manifest cannot be parsed."""
13
+
14
+
15
+ class Parser(ABC):
16
+ """Abstract parser for a single manifest file."""
17
+
18
+ @classmethod
19
+ @abstractmethod
20
+ def supported_filenames(cls) -> tuple[str, ...]:
21
+ """Filenames (basename match) this parser handles."""
22
+
23
+ @abstractmethod
24
+ def parse(self, path: Path) -> list[Dependency]:
25
+ """Parse the manifest at ``path`` and return dependencies.
26
+
27
+ Raises ``ParserError`` if the file is malformed or unreadable.
28
+ """
@@ -0,0 +1,146 @@
1
+ """npm manifest parsers: ``package.json`` and ``package-lock.json``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from slopguard.models import Dependency, DependencySource, Ecosystem
10
+ from slopguard.parsers.base import Parser, ParserError
11
+
12
+
13
+ def _classify_npm_spec(spec: str) -> DependencySource:
14
+ """Classify the source of an npm dependency spec string."""
15
+ if spec.startswith(("file:", "link:")):
16
+ return DependencySource.FILE if spec.startswith("file:") else DependencySource.LINK
17
+ if spec.startswith(("git:", "git+", "github:")) or "git@" in spec:
18
+ return DependencySource.GIT
19
+ if spec.startswith(("http://", "https://")):
20
+ return DependencySource.URL
21
+ return DependencySource.REGISTRY
22
+
23
+
24
+ class NpmParser(Parser):
25
+ """Parse ``package.json`` and ``package-lock.json``."""
26
+
27
+ @classmethod
28
+ def supported_filenames(cls) -> tuple[str, ...]:
29
+ return ("package.json", "package-lock.json")
30
+
31
+ def parse(self, path: Path) -> list[Dependency]:
32
+ try:
33
+ with path.open("rb") as fh:
34
+ data = json.load(fh)
35
+ except FileNotFoundError as exc:
36
+ raise ParserError(f"manifest not found: {path}") from exc
37
+ except json.JSONDecodeError as exc:
38
+ raise ParserError(f"invalid JSON in {path}: {exc.msg}") from exc
39
+
40
+ if not isinstance(data, dict):
41
+ raise ParserError(f"expected JSON object at top level of {path}")
42
+
43
+ if path.name == "package-lock.json":
44
+ return self._parse_lockfile(data, path)
45
+ return self._parse_manifest(data, path)
46
+
47
+ def _parse_manifest(self, data: dict[str, Any], path: Path) -> list[Dependency]:
48
+ deps: list[Dependency] = []
49
+ manifest_rel = path.name
50
+ for section in (
51
+ "dependencies",
52
+ "devDependencies",
53
+ "optionalDependencies",
54
+ "peerDependencies",
55
+ ):
56
+ block = data.get(section)
57
+ if not isinstance(block, dict):
58
+ continue
59
+ for name, spec in block.items():
60
+ if not isinstance(name, str):
61
+ continue
62
+ spec_str = spec if isinstance(spec, str) else ""
63
+ source = _classify_npm_spec(spec_str)
64
+ version = spec_str if source is DependencySource.REGISTRY else None
65
+ deps.append(
66
+ Dependency(
67
+ name=name,
68
+ version=version,
69
+ ecosystem=Ecosystem.NPM,
70
+ manifest=manifest_rel,
71
+ source=source,
72
+ scoped=name.startswith("@"),
73
+ )
74
+ )
75
+ return deps
76
+
77
+ def _parse_lockfile(self, data: dict[str, Any], path: Path) -> list[Dependency]:
78
+ deps: list[Dependency] = []
79
+ manifest_rel = path.name
80
+ seen: set[tuple[str, str | None]] = set()
81
+
82
+ # lockfile v2/v3 puts everything under "packages" keyed by node_modules path.
83
+ packages = data.get("packages")
84
+ if isinstance(packages, dict):
85
+ for node_path, meta in packages.items():
86
+ if not node_path or not isinstance(meta, dict):
87
+ continue
88
+ # The empty-key entry describes the root project — skip it.
89
+ name = meta.get("name") or self._name_from_node_path(node_path)
90
+ if not name:
91
+ continue
92
+ version = meta.get("version")
93
+ if not isinstance(version, str):
94
+ version = None
95
+ source = DependencySource.REGISTRY
96
+ if meta.get("link"):
97
+ source = DependencySource.LINK
98
+ elif meta.get("resolved", "").startswith("git"):
99
+ source = DependencySource.GIT
100
+ key = (name, version)
101
+ if key in seen:
102
+ continue
103
+ seen.add(key)
104
+ deps.append(
105
+ Dependency(
106
+ name=name,
107
+ version=version,
108
+ ecosystem=Ecosystem.NPM,
109
+ manifest=manifest_rel,
110
+ source=source,
111
+ scoped=name.startswith("@"),
112
+ )
113
+ )
114
+ return deps
115
+
116
+ # lockfile v1 stored deps under "dependencies".
117
+ legacy = data.get("dependencies")
118
+ if isinstance(legacy, dict):
119
+ for name, meta in legacy.items():
120
+ if not isinstance(meta, dict):
121
+ continue
122
+ version = meta.get("version") if isinstance(meta.get("version"), str) else None
123
+ key = (name, version)
124
+ if key in seen:
125
+ continue
126
+ seen.add(key)
127
+ deps.append(
128
+ Dependency(
129
+ name=name,
130
+ version=version,
131
+ ecosystem=Ecosystem.NPM,
132
+ manifest=manifest_rel,
133
+ source=DependencySource.REGISTRY,
134
+ scoped=name.startswith("@"),
135
+ )
136
+ )
137
+ return deps
138
+
139
+ @staticmethod
140
+ def _name_from_node_path(node_path: str) -> str | None:
141
+ # "node_modules/foo" -> "foo"; "node_modules/@scope/bar" -> "@scope/bar".
142
+ marker = "node_modules/"
143
+ idx = node_path.rfind(marker)
144
+ if idx < 0:
145
+ return None
146
+ return node_path[idx + len(marker) :] or None
@@ -0,0 +1,269 @@
1
+ """Python manifest parsers: ``requirements.txt``, ``pyproject.toml``, ``Pipfile``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import tomllib
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from slopguard.models import Dependency, DependencySource, Ecosystem
11
+ from slopguard.parsers.base import Parser, ParserError
12
+
13
+ # PEP 508 name + optional version specifier prefix.
14
+ _NAME_RE = re.compile(r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)")
15
+ # Strip extras like "package[extra,extra2]"
16
+ _EXTRAS_RE = re.compile(r"\[[^\]]*\]")
17
+ # Match any version specifier portion (==, >=, <=, ~=, !=, <, >).
18
+ _SPEC_RE = re.compile(
19
+ r"(==|>=|<=|~=|!=|<|>)\s*[^,;\s]+(?:\s*,\s*(?:==|>=|<=|~=|!=|<|>)\s*[^,;\s]+)*"
20
+ )
21
+
22
+
23
+ def _parse_requirement_line(line: str) -> tuple[str, str | None, DependencySource] | None:
24
+ """Parse a single requirements.txt line. Returns (name, version, source) or None to skip."""
25
+ stripped = line.strip()
26
+ if not stripped or stripped.startswith("#"):
27
+ return None
28
+ # Skip pip flags like -r other.txt, -c constraints.txt, --index-url ...
29
+ if stripped.startswith("-"):
30
+ if stripped.startswith(("-e ", "-e\t")):
31
+ # Editable install — usually a local path or VCS URL. Treat as file/git for v0.1.
32
+ target = stripped[2:].strip()
33
+ return _classify_editable(target)
34
+ return None
35
+ # Inline editable: `-e .` already handled above. Direct URL requirements:
36
+ if "://" in stripped or stripped.startswith(("git+", "hg+", "svn+", "bzr+")):
37
+ # Direct VCS / URL install. Try to pull out an egg= fragment for the name.
38
+ name = _name_from_url(stripped)
39
+ if name is None:
40
+ return None
41
+ return (name, None, DependencySource.GIT if "git" in stripped else DependencySource.URL)
42
+ # Local path: starts with "./" or "/" or contains ".whl" / ".tar.gz" at end.
43
+ if stripped.startswith(("./", "/", "../")) or stripped.endswith((".whl", ".tar.gz", ".zip")):
44
+ # No reliable name without inspecting the file — skip.
45
+ return None
46
+ # Strip environment markers (";python_version>='3.10'") and inline comments.
47
+ bare = stripped.split(";", 1)[0].split("#", 1)[0].strip()
48
+ bare = _EXTRAS_RE.sub("", bare)
49
+ name_match = _NAME_RE.match(bare)
50
+ if not name_match:
51
+ return None
52
+ name = name_match.group(1)
53
+ rest = bare[name_match.end() :].strip()
54
+ spec_match = _SPEC_RE.search(rest)
55
+ version = spec_match.group(0).strip() if spec_match else None
56
+ return (name, version, DependencySource.REGISTRY)
57
+
58
+
59
+ def _classify_editable(target: str) -> tuple[str, str | None, DependencySource] | None:
60
+ if "://" in target or target.startswith(("git+", "hg+", "svn+")):
61
+ name = _name_from_url(target)
62
+ if not name:
63
+ return None
64
+ return (name, None, DependencySource.GIT)
65
+ # Local editable install — name is the directory's package name, which we cannot infer
66
+ # without reading setup.cfg / pyproject. Skip.
67
+ return None
68
+
69
+
70
+ def _name_from_url(url: str) -> str | None:
71
+ if "#egg=" in url:
72
+ return url.split("#egg=")[-1].split("&", 1)[0]
73
+ return None
74
+
75
+
76
+ class PythonParser(Parser):
77
+ """Parse Python manifests: ``requirements.txt``, ``pyproject.toml``, ``Pipfile``."""
78
+
79
+ @classmethod
80
+ def supported_filenames(cls) -> tuple[str, ...]:
81
+ return ("requirements.txt", "pyproject.toml", "Pipfile")
82
+
83
+ def parse(self, path: Path) -> list[Dependency]:
84
+ try:
85
+ blob = path.read_bytes()
86
+ except FileNotFoundError as exc:
87
+ raise ParserError(f"manifest not found: {path}") from exc
88
+
89
+ if path.name == "requirements.txt":
90
+ return self._parse_requirements_txt(blob.decode("utf-8", errors="replace"), path)
91
+ if path.name == "pyproject.toml":
92
+ return self._parse_pyproject(blob, path)
93
+ if path.name == "Pipfile":
94
+ return self._parse_pipfile(blob, path)
95
+ raise ParserError(f"unsupported Python manifest: {path.name}")
96
+
97
+ def _parse_requirements_txt(self, text: str, path: Path) -> list[Dependency]:
98
+ deps: list[Dependency] = []
99
+ seen: set[tuple[str, str | None]] = set()
100
+ for raw in text.splitlines():
101
+ # Handle line continuations.
102
+ line = raw.rstrip("\\").rstrip()
103
+ parsed = _parse_requirement_line(line)
104
+ if parsed is None:
105
+ continue
106
+ name, version, source = parsed
107
+ key = (name.lower(), version)
108
+ if key in seen:
109
+ continue
110
+ seen.add(key)
111
+ deps.append(
112
+ Dependency(
113
+ name=name,
114
+ version=version,
115
+ ecosystem=Ecosystem.PYPI,
116
+ manifest=path.name,
117
+ source=source,
118
+ scoped=False,
119
+ )
120
+ )
121
+ return deps
122
+
123
+ def _parse_pyproject(self, blob: bytes, path: Path) -> list[Dependency]:
124
+ try:
125
+ data = tomllib.loads(blob.decode("utf-8"))
126
+ except tomllib.TOMLDecodeError as exc:
127
+ raise ParserError(f"invalid TOML in {path}: {exc}") from exc
128
+ if not isinstance(data, dict):
129
+ raise ParserError(f"expected TOML table at top level of {path}")
130
+
131
+ deps: list[Dependency] = []
132
+ seen: set[str] = set()
133
+
134
+ # PEP 621: [project] dependencies, [project.optional-dependencies]
135
+ project = data.get("project")
136
+ if isinstance(project, dict):
137
+ for entry in project.get("dependencies", []) or []:
138
+ if not isinstance(entry, str):
139
+ continue
140
+ parsed = _parse_requirement_line(entry)
141
+ if parsed is None:
142
+ continue
143
+ name, version, source = parsed
144
+ if name.lower() in seen:
145
+ continue
146
+ seen.add(name.lower())
147
+ deps.append(
148
+ Dependency(
149
+ name=name,
150
+ version=version,
151
+ ecosystem=Ecosystem.PYPI,
152
+ manifest=path.name,
153
+ source=source,
154
+ )
155
+ )
156
+ opt = project.get("optional-dependencies")
157
+ if isinstance(opt, dict):
158
+ for group in opt.values():
159
+ if not isinstance(group, list):
160
+ continue
161
+ for entry in group:
162
+ if not isinstance(entry, str):
163
+ continue
164
+ parsed = _parse_requirement_line(entry)
165
+ if parsed is None:
166
+ continue
167
+ name, version, source = parsed
168
+ if name.lower() in seen:
169
+ continue
170
+ seen.add(name.lower())
171
+ deps.append(
172
+ Dependency(
173
+ name=name,
174
+ version=version,
175
+ ecosystem=Ecosystem.PYPI,
176
+ manifest=path.name,
177
+ source=source,
178
+ )
179
+ )
180
+
181
+ # Poetry: [tool.poetry.dependencies] and [tool.poetry.group.<name>.dependencies]
182
+ tool = data.get("tool")
183
+ if isinstance(tool, dict):
184
+ poetry = tool.get("poetry")
185
+ if isinstance(poetry, dict):
186
+ deps.extend(self._collect_poetry(poetry.get("dependencies"), path, seen))
187
+ groups = poetry.get("group")
188
+ if isinstance(groups, dict):
189
+ for group_meta in groups.values():
190
+ if isinstance(group_meta, dict):
191
+ deps.extend(
192
+ self._collect_poetry(group_meta.get("dependencies"), path, seen)
193
+ )
194
+
195
+ return deps
196
+
197
+ def _collect_poetry(
198
+ self,
199
+ block: Any,
200
+ path: Path,
201
+ seen: set[str],
202
+ ) -> list[Dependency]:
203
+ out: list[Dependency] = []
204
+ if not isinstance(block, dict):
205
+ return out
206
+ for name, spec in block.items():
207
+ if not isinstance(name, str):
208
+ continue
209
+ if name.lower() == "python":
210
+ continue
211
+ if name.lower() in seen:
212
+ continue
213
+ seen.add(name.lower())
214
+ version, source = self._classify_poetry_spec(spec)
215
+ out.append(
216
+ Dependency(
217
+ name=name,
218
+ version=version,
219
+ ecosystem=Ecosystem.PYPI,
220
+ manifest=path.name,
221
+ source=source,
222
+ )
223
+ )
224
+ return out
225
+
226
+ @staticmethod
227
+ def _classify_poetry_spec(spec: Any) -> tuple[str | None, DependencySource]:
228
+ if isinstance(spec, str):
229
+ return (spec, DependencySource.REGISTRY)
230
+ if isinstance(spec, dict):
231
+ if "path" in spec:
232
+ return (None, DependencySource.FILE)
233
+ if "url" in spec:
234
+ return (None, DependencySource.URL)
235
+ if "git" in spec:
236
+ return (None, DependencySource.GIT)
237
+ ver = spec.get("version")
238
+ return (ver if isinstance(ver, str) else None, DependencySource.REGISTRY)
239
+ return (None, DependencySource.REGISTRY)
240
+
241
+ def _parse_pipfile(self, blob: bytes, path: Path) -> list[Dependency]:
242
+ try:
243
+ data = tomllib.loads(blob.decode("utf-8"))
244
+ except tomllib.TOMLDecodeError as exc:
245
+ raise ParserError(f"invalid TOML in {path}: {exc}") from exc
246
+
247
+ deps: list[Dependency] = []
248
+ seen: set[str] = set()
249
+ for section in ("packages", "dev-packages"):
250
+ block = data.get(section)
251
+ if not isinstance(block, dict):
252
+ continue
253
+ for name, spec in block.items():
254
+ if not isinstance(name, str) or name.lower() in seen:
255
+ continue
256
+ seen.add(name.lower())
257
+ version, source = PythonParser._classify_poetry_spec(spec)
258
+ if version == "*":
259
+ version = None
260
+ deps.append(
261
+ Dependency(
262
+ name=name,
263
+ version=version,
264
+ ecosystem=Ecosystem.PYPI,
265
+ manifest=path.name,
266
+ source=source,
267
+ )
268
+ )
269
+ return deps
@@ -0,0 +1,14 @@
1
+ """Registry clients for npm and PyPI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from slopguard.registry.base import RegistryClient, RegistryError
6
+ from slopguard.registry.npm import NpmRegistryClient
7
+ from slopguard.registry.pypi import PypiRegistryClient
8
+
9
+ __all__ = [
10
+ "NpmRegistryClient",
11
+ "PypiRegistryClient",
12
+ "RegistryClient",
13
+ "RegistryError",
14
+ ]