proofctl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+
6
+ # Matches the header of a block: keyword ("label1")? ("label2")? {
7
+ _BLOCK_HEADER_RE = re.compile(
8
+ r'^\s*(\w+)' # kind
9
+ r'(?:\s+"([^"]*)")?' # optional label1
10
+ r'(?:\s+"([^"]*)")?' # optional label2
11
+ r'\s*\{' # opening brace (rest of line may follow)
12
+ )
13
+
14
+ _HEREDOC_START_RE = re.compile(r'<<[-~]?(\w+)\s*$')
15
+
16
+
17
+ @dataclass
18
+ class HclBlock:
19
+ kind: str
20
+ label1: str
21
+ label2: str
22
+ start_line: int # 1-based, line of the block header
23
+ end_line: int # 1-based, line of the closing brace
24
+ raw_lines: list[str] = field(repr=False)
25
+
26
+ # ── Attribute helpers ────────────────────────────────────────────────────
27
+
28
+ def attr(self, name: str) -> tuple[str, int] | None:
29
+ """Return (raw_value_str, abs_lineno) for the first `name = …` in this block."""
30
+ pat = re.compile(rf'^\s*{re.escape(name)}\s*=\s*(.*?)(?:\s*#.*)?$')
31
+ for i, line in enumerate(self.raw_lines):
32
+ m = pat.match(line)
33
+ if m:
34
+ return m.group(1).strip(), self.start_line + i
35
+ return None
36
+
37
+ def has_attr(self, name: str) -> bool:
38
+ return self.attr(name) is not None
39
+
40
+ def attr_value(self, name: str) -> str | None:
41
+ r = self.attr(name)
42
+ return r[0] if r else None
43
+
44
+ def attr_line(self, name: str) -> int | None:
45
+ r = self.attr(name)
46
+ return r[1] if r else None
47
+
48
+ # ── Nested block helpers ─────────────────────────────────────────────────
49
+
50
+ def nested(self, kind: str) -> list[HclBlock]:
51
+ """Directly nested blocks of the given kind."""
52
+ inner = _parse(self.raw_lines, base=self.start_line, skip_outer=True)
53
+ return [b for b in inner if b.kind == kind]
54
+
55
+ def has_nested(self, kind: str) -> bool:
56
+ return bool(self.nested(kind))
57
+
58
+ # ── Raw text helpers ─────────────────────────────────────────────────────
59
+
60
+ def any_line_matches(self, pattern: str, flags: int = 0) -> int | None:
61
+ """Return abs 1-based lineno of first matching line, or None."""
62
+ pat = re.compile(pattern, flags)
63
+ for i, line in enumerate(self.raw_lines):
64
+ if pat.search(line):
65
+ return self.start_line + i
66
+ return None
67
+
68
+ def contains(self, pattern: str, flags: int = 0) -> bool:
69
+ return self.any_line_matches(pattern, flags) is not None
70
+
71
+
72
+ # ── Parser ───────────────────────────────────────────────────────────────────
73
+
74
+
75
+ def parse_blocks(source: str) -> list[HclBlock]:
76
+ """Parse top-level HCL blocks from *source*, returning a flat list."""
77
+ return _parse(source.splitlines(), base=1, skip_outer=False)
78
+
79
+
80
+ def _strip_comment(line: str) -> str:
81
+ in_str = False
82
+ i = 0
83
+ while i < len(line):
84
+ c = line[i]
85
+ if c == '"' and (i == 0 or line[i - 1] != '\\'):
86
+ in_str = not in_str
87
+ if not in_str:
88
+ if c == '#':
89
+ return line[:i]
90
+ if c == '/' and i + 1 < len(line) and line[i + 1] == '/':
91
+ return line[:i]
92
+ i += 1
93
+ return line
94
+
95
+
96
+ def _brace_delta(line: str) -> int:
97
+ s = _strip_comment(line)
98
+ opens = closes = 0
99
+ in_str = False
100
+ i = 0
101
+ while i < len(s):
102
+ c = s[i]
103
+ if in_str and c == "\\" and i + 1 < len(s):
104
+ i += 2 # skip escaped character (e.g. \" inside a string)
105
+ continue
106
+ if c == '"':
107
+ in_str = not in_str
108
+ elif not in_str:
109
+ if c == '{':
110
+ opens += 1
111
+ elif c == '}':
112
+ closes += 1
113
+ i += 1
114
+ return opens - closes
115
+
116
+
117
+ def _parse(lines: list[str], base: int, skip_outer: bool) -> list[HclBlock]:
118
+ """
119
+ Parse HCL blocks from *lines*.
120
+
121
+ base: 1-based absolute line number of lines[0].
122
+ skip_outer: True when lines is the raw_lines of a parent block — we skip
123
+ the first and last lines (header / closing brace).
124
+ """
125
+ blocks: list[HclBlock] = []
126
+ depth = 0
127
+ block_start_idx: int | None = None
128
+ block_kind = block_label1 = block_label2 = ""
129
+ heredoc_end: str | None = None
130
+
131
+ start = 1 if skip_outer else 0
132
+ end = len(lines) - (1 if skip_outer else 0)
133
+
134
+ for i in range(start, end):
135
+ line = lines[i]
136
+ lineno = base + i
137
+
138
+ # ── heredoc passthrough ──
139
+ if heredoc_end:
140
+ if line.strip() == heredoc_end or line.strip().rstrip("-~") == heredoc_end:
141
+ heredoc_end = None
142
+ continue
143
+ hm = _HEREDOC_START_RE.search(line)
144
+ if hm:
145
+ heredoc_end = hm.group(1)
146
+
147
+ delta = _brace_delta(line)
148
+
149
+ if depth == 0 and delta > 0:
150
+ m = _BLOCK_HEADER_RE.match(line)
151
+ if m:
152
+ block_kind = m.group(1)
153
+ block_label1 = m.group(2) or ""
154
+ block_label2 = m.group(3) or ""
155
+ block_start_idx = i
156
+ depth += delta
157
+ if depth == 0:
158
+ # Single-line block (opened and closed on same line)
159
+ blocks.append(HclBlock(
160
+ kind=block_kind, label1=block_label1, label2=block_label2,
161
+ start_line=lineno, end_line=lineno,
162
+ raw_lines=lines[i:i + 1],
163
+ ))
164
+ block_start_idx = None
165
+ continue # pragma: no cover
166
+
167
+ if depth > 0:
168
+ depth += delta
169
+ if depth <= 0:
170
+ depth = 0
171
+ if block_start_idx is not None:
172
+ blocks.append(HclBlock(
173
+ kind=block_kind, label1=block_label1, label2=block_label2,
174
+ start_line=base + block_start_idx,
175
+ end_line=lineno,
176
+ raw_lines=lines[block_start_idx:i + 1],
177
+ ))
178
+ block_start_idx = None
179
+
180
+ return blocks
@@ -0,0 +1,486 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import json
5
+ import re
6
+ import shutil
7
+ import subprocess
8
+ import sys
9
+ import time
10
+ import urllib.error
11
+ import urllib.request
12
+ from pathlib import Path
13
+
14
+ from ..models import Finding, Severity
15
+ from .base import FileChecker, DirectoryChecker
16
+
17
+ _CACHE_DIR = Path.home() / ".cache" / "proofctl" / "pypi"
18
+ _CACHE_TTL = 86_400 # 24 hours
19
+ _PYPI_TIMEOUT = 3 # seconds
20
+
21
+ # Packages known to be frequent AI hallucination targets
22
+ _HIGH_RISK_NAMES: frozenset[str] = frozenset({
23
+ "boto4", "aiohttp-extras", "langchain-enhanced", "openai-utils",
24
+ "anthropic-client", "fastapi-helpers", "requests-async",
25
+ "sqlalchemy-utils-extended", "pydantic-extras", "flask-utils",
26
+ "django-extras", "numpy-utils", "pandas-extras", "torch-utils",
27
+ "tensorflow-extras", "scikit-learn-utils", "celery-extras",
28
+ })
29
+
30
+ # requirements.txt line: optional extras, then version specifier, then env markers / comments
31
+ _REQ_LINE_RE = re.compile(
32
+ r"^\s*([A-Za-z0-9]([A-Za-z0-9._-]*[A-Za-z0-9])?)" # PEP 508 name
33
+ r"(\[.*?\])?" # optional extras
34
+ r"\s*([~<>=!][^\s;#]*)?" # optional version specifier
35
+ )
36
+
37
+
38
+ def _parse_requirements(text: str) -> list[tuple[str, str | None, int]]:
39
+ """Parse requirements.txt; returns (name, version_spec_or_None, lineno)."""
40
+ results = []
41
+ for lineno, line in enumerate(text.splitlines(), start=1):
42
+ stripped = line.strip()
43
+ if not stripped or stripped.startswith("#") or stripped.startswith("-"):
44
+ continue
45
+ m = _REQ_LINE_RE.match(stripped)
46
+ if m:
47
+ results.append((m.group(1), m.group(4) or None, lineno))
48
+ return results
49
+
50
+
51
+ def _parse_pyproject_deps(text: str) -> list[tuple[str, str | None, int]]:
52
+ """Lightweight parser for [project] dependencies in pyproject.toml."""
53
+ results = []
54
+ in_deps = False
55
+ for lineno, line in enumerate(text.splitlines(), start=1):
56
+ stripped = line.strip()
57
+ if re.match(r"^dependencies\s*=\s*\[", stripped):
58
+ in_deps = True
59
+ continue
60
+ if in_deps:
61
+ if stripped.startswith("]"):
62
+ in_deps = False
63
+ continue
64
+ # Quoted dep string: "boto4>=1.0" or 'boto4'
65
+ m = re.match(r'["\']([A-Za-z0-9]([A-Za-z0-9._-]*[A-Za-z0-9])?)(\[.*?\])?([^"\']*)["\']', stripped)
66
+ if m:
67
+ name = m.group(1)
68
+ spec = m.group(4).strip() or None
69
+ results.append((name, spec, lineno))
70
+ return results
71
+
72
+ # mypy error patterns indicating phantom method calls
73
+ _MYPY_ATTR_RE = re.compile(
74
+ r'^(.+):(\d+):\s+error:\s+(?:Item\s+"[^"]+"\s+of\s+)?"([^"]+)"\s+has\s+no\s+attribute\s+"([^"]+)"'
75
+ r'|^(.+):(\d+):\s+error:\s+Module\s+"[^"]+"\s+has\s+no\s+attribute\s+"([^"]+)"'
76
+ )
77
+
78
+
79
+ def _normalize_pkg_name(name: str) -> str:
80
+ return re.sub(r"[-_.]+", "-", name).lower()
81
+
82
+
83
+ # ── I-002 pattern matching ────────────────────────────────────────────────────
84
+
85
+ # Popular packages whose canonical PyPI name is the short form (no python- prefix).
86
+ # Only include packages where the base name alone is the correct PyPI identifier
87
+ # so that legitimate python-X packages (python-dateutil, python-dotenv, etc.)
88
+ # are NOT flagged — their base names (dateutil, dotenv) are absent from this set.
89
+ _POPULAR_PACKAGES: frozenset[str] = frozenset({
90
+ # HTTP / web clients
91
+ "requests", "httpx", "aiohttp", "urllib3",
92
+ # Web frameworks
93
+ "flask", "django", "fastapi", "starlette", "tornado", "sanic",
94
+ # ASGI / WSGI servers
95
+ "uvicorn", "gunicorn",
96
+ # Data science
97
+ "numpy", "pandas", "scipy", "matplotlib", "seaborn", "plotly",
98
+ # ML / AI — highest hallucination density
99
+ "torch", "tensorflow", "keras", "transformers", "openai", "anthropic",
100
+ "langchain", "cohere", "datasets",
101
+ # Databases / ORMs
102
+ "sqlalchemy", "sqlalchemy-utils", "alembic",
103
+ "pymongo", "redis", "psycopg2", "asyncpg", "motor", "pymysql",
104
+ "elasticsearch",
105
+ # Cloud
106
+ "boto3",
107
+ # Task queues
108
+ "celery", "rq",
109
+ # Validation / config
110
+ "pydantic", "attrs", "marshmallow",
111
+ # CLI / UI
112
+ "click", "typer", "rich",
113
+ # Auth / crypto
114
+ "cryptography", "passlib", "bcrypt", "pyjwt", "authlib",
115
+ # Testing
116
+ "pytest", "hypothesis", "faker", "factory-boy",
117
+ # Tooling
118
+ "mypy", "ruff", "black", "isort", "bandit",
119
+ # Serialization
120
+ "orjson", "ujson",
121
+ # Misc popular
122
+ "pillow", "loguru", "tenacity", "jinja2", "pyyaml", "paramiko",
123
+ })
124
+
125
+ # Suffixes that carry no semantic meaning and that AI appends to real package names.
126
+ # Kept intentionally tight: words like "login", "cors", "mock", "toolbelt" are
127
+ # used in legitimate packages (flask-login, flask-cors, pytest-mock, requests-toolbelt)
128
+ # and must NOT appear here.
129
+ _SUSPICIOUS_SUFFIXES: frozenset[str] = frozenset({
130
+ "utils", "extras", "helpers", "enhanced", "extended",
131
+ "new", "plus", "pro",
132
+ })
133
+
134
+
135
+ def _suspicious_variant_of(name: str) -> str | None:
136
+ """Return the probable real base package if name matches a hallucination pattern.
137
+
138
+ Returns None when the name appears legitimate. Four patterns checked:
139
+ 1. python-{X} where X is a standalone popular package
140
+ 2. {X}-python / {X}-py
141
+ 3. {X}-{meaningless_suffix} where suffix is in _SUSPICIOUS_SUFFIXES
142
+ 4. {X}{N} where {X}{N-1} is a known popular package (e.g. boto4 → boto3)
143
+ """
144
+ n = _normalize_pkg_name(name)
145
+
146
+ # Already a known popular package — not a variant.
147
+ if n in _POPULAR_PACKAGES:
148
+ return None
149
+
150
+ # Pattern 1: python-{X} where X is a standalone popular package.
151
+ # Intentionally excludes python-dateutil / python-dotenv because 'dateutil'
152
+ # and 'dotenv' are not in _POPULAR_PACKAGES.
153
+ if n.startswith("python-"):
154
+ base = n[7:]
155
+ if base in _POPULAR_PACKAGES:
156
+ return base
157
+
158
+ # Pattern 2: {X}-python or {X}-py
159
+ for trailing in ("python", "py"):
160
+ if n.endswith(f"-{trailing}"):
161
+ base = n[:-(len(trailing) + 1)]
162
+ if base in _POPULAR_PACKAGES:
163
+ return base
164
+
165
+ # Pattern 3: {X}-{suffix} where suffix is in _SUSPICIOUS_SUFFIXES.
166
+ # rpartition splits at the LAST hyphen, so multi-hyphen names like
167
+ # langchain-community-extras split as ("langchain-community", "extras").
168
+ base, sep, suffix = n.rpartition("-")
169
+ if sep and suffix in _SUSPICIOUS_SUFFIXES and base in _POPULAR_PACKAGES:
170
+ return base
171
+
172
+ # Pattern 4: digit-incremented variant.
173
+ # Only flag when the predecessor name is explicitly in _POPULAR_PACKAGES
174
+ # (prevents false positives from arbitrary numeric suffixes).
175
+ m = re.match(r"^([a-z][a-z0-9-]*)(\d+)$", n)
176
+ if m:
177
+ base_name, digit = m.group(1), int(m.group(2))
178
+ prev = f"{base_name}{digit - 1}"
179
+ if prev in _POPULAR_PACKAGES:
180
+ return prev
181
+
182
+ return None
183
+
184
+
185
+ # Static fallback list: packages that don't fit the structural patterns above
186
+ # but are confirmed hallucination targets (keeps pattern rules tight).
187
+ _HIGH_RISK_DEPS: frozenset[str] = frozenset(
188
+ _normalize_pkg_name(n) for n in _HIGH_RISK_NAMES
189
+ )
190
+
191
+
192
+ def _pypi_exists(name: str) -> bool | None:
193
+ """Returns True if the package exists on PyPI, False if not, None on network error."""
194
+ normalized = _normalize_pkg_name(name)
195
+ cache_file = _CACHE_DIR / f"{normalized}.json"
196
+
197
+ if cache_file.exists():
198
+ try:
199
+ data = json.loads(cache_file.read_text())
200
+ if time.time() - data["ts"] < _CACHE_TTL:
201
+ return data["exists"]
202
+ except (json.JSONDecodeError, KeyError):
203
+ pass
204
+
205
+ try:
206
+ req = urllib.request.Request(
207
+ f"https://pypi.org/simple/{normalized}/",
208
+ headers={"User-Agent": "proofctl/0.1 (AI slop linter; security research)"},
209
+ )
210
+ urllib.request.urlopen(req, timeout=_PYPI_TIMEOUT)
211
+ exists = True
212
+ except urllib.error.HTTPError as e:
213
+ exists = e.code != 404
214
+ except Exception:
215
+ return None # network unavailable — skip, don't false-positive
216
+
217
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
218
+ cache_file.write_text(json.dumps({"exists": exists, "ts": time.time()}))
219
+ return exists
220
+
221
+
222
+ def _stdlib_names() -> frozenset[str]:
223
+ if hasattr(sys, "stdlib_module_names"):
224
+ return frozenset(sys.stdlib_module_names)
225
+ # Fallback for Python < 3.10: common stdlib modules
226
+ return frozenset({
227
+ "abc", "ast", "asyncio", "builtins", "collections", "contextlib",
228
+ "copy", "dataclasses", "datetime", "enum", "fnmatch", "functools",
229
+ "hashlib", "http", "importlib", "inspect", "io", "itertools", "json",
230
+ "logging", "math", "operator", "os", "pathlib", "pickle", "platform",
231
+ "queue", "random", "re", "shutil", "signal", "socket", "sqlite3",
232
+ "string", "struct", "subprocess", "sys", "tempfile", "textwrap",
233
+ "threading", "time", "traceback", "types", "typing", "unittest",
234
+ "urllib", "uuid", "warnings", "weakref", "xml", "zipfile",
235
+ "_thread", "__future__",
236
+ })
237
+
238
+
239
+ def _local_packages(root: Path) -> frozenset[str]:
240
+ """Top-level importable names within the scanned project."""
241
+ names: set[str] = set()
242
+ try:
243
+ children = list(root.iterdir())
244
+ except (OSError, PermissionError):
245
+ return frozenset()
246
+ for child in children:
247
+ if child.is_dir() and (child / "__init__.py").exists():
248
+ names.add(child.name)
249
+ elif child.suffix == ".py" and child.name != "__init__.py":
250
+ names.add(child.stem)
251
+ # Also check src/ layout
252
+ src = root / "src"
253
+ if src.is_dir():
254
+ for child in src.iterdir():
255
+ if child.is_dir() and (child / "__init__.py").exists():
256
+ names.add(child.name)
257
+ elif child.suffix == ".py":
258
+ names.add(child.stem)
259
+ return frozenset(names)
260
+
261
+
262
+ def _extract_imports(tree: ast.Module) -> list[tuple[str, int, int]]:
263
+ """Returns (top-level package name, lineno, col_offset) for every import."""
264
+ imports = []
265
+ for node in ast.walk(tree):
266
+ if isinstance(node, ast.Import):
267
+ for alias in node.names:
268
+ pkg = alias.name.split(".")[0]
269
+ imports.append((pkg, node.lineno, node.col_offset))
270
+ elif isinstance(node, ast.ImportFrom):
271
+ if node.module and node.level == 0: # skip relative imports
272
+ pkg = node.module.split(".")[0]
273
+ imports.append((pkg, node.lineno, node.col_offset))
274
+ return imports
275
+
276
+
277
+ class ImportChecker(FileChecker):
278
+ def __init__(
279
+ self,
280
+ local_namespaces: list[str] | None = None,
281
+ extra_indexes: list[str] | None = None,
282
+ check_pypi: bool = True,
283
+ ) -> None:
284
+ self._local_namespaces = tuple(local_namespaces or [])
285
+ self._check_pypi = check_pypi
286
+ self._stdlib = _stdlib_names()
287
+
288
+ def check(self, path: Path, source: str, tree: ast.Module | None) -> list[Finding]:
289
+ findings: list[Finding] = []
290
+ if tree is not None:
291
+ findings.extend(self._i001(path, tree))
292
+ return findings
293
+
294
+ def _i001(self, path: Path, tree: ast.Module) -> list[Finding]:
295
+ # Infer project root by walking up to the nearest pyproject.toml / setup.py.
296
+ # Stop at the filesystem root to avoid scanning /. Default to parent dir.
297
+ root = path.parent
298
+ candidate = root
299
+ for _ in range(8): # max 8 levels up
300
+ if (candidate / "pyproject.toml").exists() or (candidate / "setup.py").exists():
301
+ root = candidate
302
+ break
303
+ if candidate.parent == candidate:
304
+ break
305
+ candidate = candidate.parent
306
+ local = _local_packages(root)
307
+
308
+ findings = []
309
+ seen: set[str] = set()
310
+
311
+ for pkg, lineno, col in _extract_imports(tree):
312
+ if pkg in seen:
313
+ continue
314
+ seen.add(pkg)
315
+
316
+ if pkg in self._stdlib:
317
+ continue
318
+ if pkg in local:
319
+ continue
320
+ if self._local_namespaces and pkg.startswith(self._local_namespaces):
321
+ continue
322
+
323
+ if not self._check_pypi:
324
+ continue
325
+
326
+ exists = _pypi_exists(pkg)
327
+ if exists is False:
328
+ findings.append(Finding(
329
+ file=str(path),
330
+ line=lineno,
331
+ col=col,
332
+ rule_id="PROOFCTL-I-001",
333
+ rule_name="Hallucinated import",
334
+ severity=Severity.ERROR,
335
+ message=f"Package '{pkg}' not found on PyPI — possible AI hallucination",
336
+ hint=(
337
+ f"Verify '{pkg}' exists and is spelled correctly. "
338
+ "If it's a private package, add it to local_namespaces in .proofctl.yaml."
339
+ ),
340
+ authority="Slopsquatting research — AI-hallucinated package names",
341
+ ))
342
+
343
+ return findings
344
+
345
+
346
+ class DependencyChecker(DirectoryChecker):
347
+ """PROOFCTL-I-002: Hallucination-prone package name in project dependency files."""
348
+
349
+ def __init__(self, extra_high_risk: list[str] | None = None) -> None:
350
+ extra = frozenset(_normalize_pkg_name(p) for p in (extra_high_risk or []))
351
+ self._high_risk = _HIGH_RISK_DEPS | extra
352
+
353
+ def check(self, root: Path, py_files: list[Path]) -> list[Finding]:
354
+ findings = []
355
+ for dep_file in self._find_dep_files(root):
356
+ findings.extend(self._check_dep_file(dep_file))
357
+ return findings
358
+
359
+ def _find_dep_files(self, root: Path) -> list[Path]:
360
+ files: list[Path] = []
361
+ files.extend(root.glob("requirements*.txt"))
362
+ files.extend(root.glob("requirements/*.txt"))
363
+ pyproject = root / "pyproject.toml"
364
+ if pyproject.exists():
365
+ files.append(pyproject)
366
+ return files
367
+
368
+ def _check_dep_file(self, path: Path) -> list[Finding]:
369
+ try:
370
+ text = path.read_text(encoding="utf-8", errors="replace")
371
+ except OSError:
372
+ return []
373
+
374
+ if path.suffix == ".toml":
375
+ deps = _parse_pyproject_deps(text)
376
+ else:
377
+ deps = _parse_requirements(text)
378
+
379
+ findings = []
380
+ for name, spec, lineno in deps:
381
+ normalized = _normalize_pkg_name(name)
382
+
383
+ # Static list: confirmed hallucination targets that don't fit patterns.
384
+ if normalized in self._high_risk:
385
+ reason = "matches known AI-hallucination-prone package list"
386
+ else:
387
+ # Structural pattern matching: broader coverage, no network needed.
388
+ base_pkg = _suspicious_variant_of(name)
389
+ if base_pkg is None:
390
+ continue
391
+ reason = f"looks like a hallucinated variant of '{base_pkg}'"
392
+
393
+ is_pinned = spec is not None and "==" in spec
394
+ sev = Severity.WARNING if is_pinned else Severity.ERROR
395
+ findings.append(Finding(
396
+ file=str(path),
397
+ line=lineno,
398
+ col=0,
399
+ rule_id="PROOFCTL-I-002",
400
+ rule_name="High-risk dependency name",
401
+ severity=sev,
402
+ message=(
403
+ f"'{name}' {reason}"
404
+ + (" (pinned)" if is_pinned else " — unpinned, squatting risk")
405
+ ),
406
+ hint=(
407
+ f"Verify '{name}' is the intended package. "
408
+ "AI models commonly hallucinate plausible-sounding package names."
409
+ ),
410
+ authority="Slopsquatting research — AI-hallucinated package names",
411
+ ))
412
+ return findings
413
+
414
+
415
+ class MethodChecker(DirectoryChecker):
416
+ """PROOFCTL-M-001: Phantom method calls detected by a single mypy invocation.
417
+
418
+ Running mypy once on all files is orders of magnitude faster than once per
419
+ file: mypy's startup and import-resolution cost is paid once, not N times.
420
+ """
421
+
422
+ def check(self, root: Path, py_files: list[Path]) -> list[Finding]:
423
+ if not shutil.which("mypy") or not py_files:
424
+ return []
425
+ return self._run_mypy(py_files)
426
+
427
+ def _run_mypy(self, py_files: list[Path]) -> list[Finding]:
428
+ try:
429
+ result = subprocess.run(
430
+ [
431
+ "mypy",
432
+ "--ignore-missing-imports",
433
+ "--no-error-summary",
434
+ "--no-pretty",
435
+ *[str(p) for p in py_files],
436
+ ],
437
+ capture_output=True,
438
+ text=True,
439
+ timeout=300,
440
+ )
441
+ except (subprocess.TimeoutExpired, OSError):
442
+ return []
443
+
444
+ # Index scoped files for quick membership check after path normalisation.
445
+ scoped = {str(p.resolve()) for p in py_files}
446
+
447
+ findings = []
448
+ for line in result.stdout.splitlines():
449
+ m = _MYPY_ATTR_RE.match(line)
450
+ if not m:
451
+ continue
452
+
453
+ if m.group(1):
454
+ file_path, lineno_str = m.group(1), m.group(2)
455
+ obj_type, attr = m.group(3), m.group(4)
456
+ msg = f"'{obj_type}' has no attribute '{attr}'"
457
+ else:
458
+ file_path, lineno_str = m.group(5), m.group(6)
459
+ attr = m.group(7)
460
+ msg = f"Module has no attribute '{attr}'"
461
+
462
+ # Skip findings for files outside the scan scope (e.g. installed libs).
463
+ try:
464
+ if str(Path(file_path).resolve()) not in scoped:
465
+ continue
466
+ except OSError:
467
+ continue
468
+
469
+ try:
470
+ lineno = int(lineno_str)
471
+ except ValueError:
472
+ lineno = None
473
+
474
+ findings.append(Finding(
475
+ file=file_path,
476
+ line=lineno,
477
+ col=None,
478
+ rule_id="PROOFCTL-M-001",
479
+ rule_name="Phantom method call",
480
+ severity=Severity.ERROR,
481
+ message=msg,
482
+ hint="Check the library's documentation for the correct method name.",
483
+ authority="mypy attr-defined — AI calls methods that don't exist",
484
+ ))
485
+
486
+ return findings