deadpush 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deadpush/imports.py ADDED
@@ -0,0 +1,239 @@
1
+ """
2
+ Import Hallucination Guard — validates external imports against package registries.
3
+
4
+ AI coding agents frequently hallucinate package names that don't exist or are
5
+ typographical variants of real packages. This module cross-references every
6
+ external import found during analysis against PyPI, npm, and crates.io,
7
+ flagging unknown packages before they cause runtime failures.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ import time
15
+ import urllib.request
16
+ import urllib.error
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+
21
+ REGISTRY_TIMEOUT = 5
22
+ CACHE_FILE = Path.home() / ".deadpush" / "import_cache.json"
23
+ CACHE_MAX_AGE = 86400
24
+
25
+ # Well-known stdlib module roots to skip checking (prevents spam).
26
+ PYTHON_STDLIB = {
27
+ "os", "sys", "re", "json", "math", "time", "datetime", "pathlib",
28
+ "collections", "itertools", "functools", "typing", "enum", "abc",
29
+ "io", "base64", "hashlib", "hmac", "random", "statistics", "uuid",
30
+ "argparse", "click", "logging", "warnings", "traceback", "inspect",
31
+ "fractions", "decimal", "string", "struct", "textwrap", "pprint",
32
+ "shutil", "tempfile", "glob", "fnmatch", "linecache", "fileinput",
33
+ "pickle", "shelve", "marshal", "dbm", "sqlite3", "copy",
34
+ "array", "weakref", "types", "bisect", "heapq", "operator",
35
+ "subprocess", "threading", "multiprocessing", "concurrent",
36
+ "asyncio", "select", "socket", "ssl", "email", "json", "xml",
37
+ "html", "http", "urllib", "cgi", "webbrowser", "csv", "configparser",
38
+ "netrc", "getpass", "crypt", "platform", "errno", "ctypes",
39
+ "atexit", "signal", "mmap", "sysconfig", "syslog", "pdb", "profile",
40
+ "unittest", "test", "doctest", "locale", "calendar", "difflib",
41
+ "logging", "gettext", "codecs", "encodings", "importlib",
42
+ "pkgutil", "zipimport", "pdb", "gc", "inspect", "ast",
43
+ "compileall", "dis", "py_compile", "pyclbr", "token",
44
+ "tokenize", "keyword", "symbol", "symtable", "tabnanny",
45
+ "pyclbr", "py_compile", "compileall", "dis", "pickletools",
46
+ "wave", "audioop", "chunk", "colorsys", "imghdr", "sndhdr",
47
+ "ossaudiodev", "sunaudiodev", "wave", "cProfile",
48
+ "codeop", "code", "rlcompleter", "runpy",
49
+ "__future__", "__main__", "builtins", "__builtins__",
50
+ }
51
+
52
+ KNOWN_TEST_PACKAGES = {
53
+ "pytest", "unittest", "mock", "coverage", "tox", "nox",
54
+ "hypothesis", "factory_boy", "faker", "responses", "vcrpy",
55
+ "freezegun", "time_machine", "pytest_mock", "pytest_cov",
56
+ "pytest_asyncio", "pytest_xdist", "pytest_fixtures",
57
+ "moto", "localstack", "testcontainers",
58
+ }
59
+
60
+ # Hardcoded well-known public packages to avoid hitting the network for common ones.
61
+ WELL_KNOWN_PACKAGES = {
62
+ "django", "flask", "fastapi", "requests", "numpy", "pandas",
63
+ "scipy", "matplotlib", "torch", "tensorflow", "transformers",
64
+ "click", "sqlalchemy", "alembic", "pydantic", "jinja2",
65
+ "werkzeug", "gunicorn", "uvicorn", "celery", "redis",
66
+ "psycopg2", "pymongo", "boto3", "botocore", "aiohttp",
67
+ "httpx", "starlette", "pillow", "opencv_python", "beautifulsoup4",
68
+ "lxml", "sphinx", "black", "ruff", "mypy", "isort", "flake8",
69
+ "pylint", "pre_commit", "poetry", "pip", "setuptools",
70
+ "wheel", "cffi", "cryptography", "bcrypt", "passlib",
71
+ "jwt", "python_jose", "oauthlib", "authlib",
72
+ "pytest", "coverage", "hypothesis", "tox", "pre_commit",
73
+ "loguru", "structlog", "sentry_sdk", "prometheus_client",
74
+ "pydantic_settings", "python_dotenv", "python_multipart",
75
+ "typer", "rich", "colorama", "tqdm", "pyyaml", "toml",
76
+ "orjson", "ujson", "msgpack", "protobuf",
77
+ "grpcio", "grpcio_tools", "kafka_python", "confluent_kafka",
78
+ "elasticsearch", "elasticsearch_dsl", "motor", "beanie",
79
+ "uvloop", "httptools", "websockets", "sse_starlette",
80
+ }
81
+
82
+
83
+ class ImportValidator:
84
+ """Validates external imports by checking against package registries."""
85
+
86
+ def __init__(self, cache_file: Path = CACHE_FILE):
87
+ self.cache_file = cache_file
88
+ self._cache: dict[str, dict[str, Any]] = {}
89
+ self._dirty = False
90
+ self._load_cache()
91
+
92
+ # ------------------------------------------------------------------
93
+ # Cache management
94
+ # ------------------------------------------------------------------
95
+ def _load_cache(self):
96
+ if self.cache_file.exists():
97
+ try:
98
+ data = json.loads(self.cache_file.read_text(encoding="utf-8"))
99
+ now = time.time()
100
+ self._cache = {
101
+ k: v for k, v in data.items()
102
+ if now - v.get("checked_at", 0) < CACHE_MAX_AGE
103
+ }
104
+ except Exception:
105
+ self._cache = {}
106
+
107
+ def _save_cache(self):
108
+ if not self._dirty:
109
+ return
110
+ try:
111
+ self.cache_file.parent.mkdir(parents=True, exist_ok=True)
112
+ self.cache_file.write_text(
113
+ json.dumps(self._cache, indent=2, default=str),
114
+ encoding="utf-8",
115
+ )
116
+ except Exception:
117
+ pass
118
+
119
+ # ------------------------------------------------------------------
120
+ # Registry checks
121
+ # ------------------------------------------------------------------
122
+ def _check_pypi(self, package: str) -> bool:
123
+ url = f"https://pypi.org/pypi/{package}/json"
124
+ try:
125
+ req = urllib.request.Request(url, method="HEAD")
126
+ req.add_header("User-Agent", "deadpush/0.2.0")
127
+ resp = urllib.request.urlopen(req, timeout=REGISTRY_TIMEOUT)
128
+ return resp.status == 200
129
+ except urllib.error.HTTPError as e:
130
+ return e.code != 200
131
+ except Exception:
132
+ return False
133
+
134
+ def _check_npm(self, package: str) -> bool:
135
+ url = f"https://registry.npmjs.org/{package}/latest"
136
+ try:
137
+ req = urllib.request.Request(url, method="HEAD")
138
+ req.add_header("User-Agent", "deadpush/0.2.0")
139
+ resp = urllib.request.urlopen(req, timeout=REGISTRY_TIMEOUT)
140
+ return resp.status == 200
141
+ except urllib.error.HTTPError as e:
142
+ return e.code != 200
143
+ except Exception:
144
+ return False
145
+
146
+ def _check_crates(self, package: str) -> bool:
147
+ url = f"https://crates.io/api/v1/crates/{package}"
148
+ try:
149
+ req = urllib.request.Request(url, method="HEAD")
150
+ req.add_header("User-Agent", "deadpush/0.2.0")
151
+ resp = urllib.request.urlopen(req, timeout=REGISTRY_TIMEOUT)
152
+ return resp.status == 200
153
+ except urllib.error.HTTPError as e:
154
+ return e.code != 200
155
+ except Exception:
156
+ return False
157
+
158
+ def _check_registry(self, package: str, suffix: str) -> bool:
159
+ """Determine registry by convention and check."""
160
+ package_lower = package.lower().replace("_", "-").replace(".", "-")
161
+
162
+ if suffix in (".py",):
163
+ return self._check_pypi(package_lower)
164
+ elif suffix in (".js", ".jsx", ".mjs", ".cjs", ".ts", ".tsx", ".mts", ".cts"):
165
+ return self._check_npm(package_lower)
166
+ elif suffix in (".rs",):
167
+ return self._check_crates(package_lower)
168
+ elif suffix in (".go",):
169
+ # Go modules are URLs, too complex to validate generically
170
+ return True
171
+ return True
172
+
173
+ # ------------------------------------------------------------------
174
+ # Public API
175
+ # ------------------------------------------------------------------
176
+ def validate_batch(self, imports: list[tuple[str, str]]) -> list[dict[str, Any]]:
177
+ """Validate a batch of (package_name, file_suffix) tuples.
178
+
179
+ Returns a list of flag dicts for packages that appear to be hallucinated.
180
+ Each dict has: package, reason, confidence, source_files (sample).
181
+ """
182
+ unique_packages: dict[str, dict[str, Any]] = {}
183
+
184
+ for pkg_name, suffix in imports:
185
+ root = pkg_name.split(".")[0].split("/")[0].split("-")[0]
186
+ root = root.replace("_", "-")
187
+ if not root or len(root) < 2:
188
+ continue
189
+ if root in PYTHON_STDLIB or root in KNOWN_TEST_PACKAGES or root in WELL_KNOWN_PACKAGES:
190
+ continue
191
+
192
+ if root not in unique_packages:
193
+ unique_packages[root] = {
194
+ "package": root,
195
+ "suffixes": set(),
196
+ "sources": [],
197
+ "exists": None,
198
+ }
199
+ unique_packages[root]["suffixes"].add(suffix)
200
+
201
+ if not unique_packages:
202
+ return []
203
+
204
+ # Check cache first
205
+ to_check = []
206
+ for name, info in unique_packages.items():
207
+ cached = self._cache.get(name)
208
+ if cached is not None:
209
+ info["exists"] = cached.get("exists", False)
210
+ else:
211
+ to_check.append(name)
212
+
213
+ # Batch network check for uncached
214
+ for name in to_check:
215
+ info = unique_packages[name]
216
+ suffix = next(iter(info["suffixes"]))
217
+ exists = self._check_registry(name, suffix)
218
+ info["exists"] = exists
219
+ self._cache[name] = {"exists": exists, "checked_at": time.time()}
220
+ self._dirty = True
221
+
222
+ self._save_cache()
223
+
224
+ # Build flags for non-existent packages
225
+ flags = []
226
+ for name, info in unique_packages.items():
227
+ if info["exists"] is False:
228
+ flags.append({
229
+ "category": "hallucinated_import",
230
+ "confidence": 0.92,
231
+ "reason": f"Package '{name}' not found on package registry — may be hallucinated by AI",
232
+ "block": False,
233
+ "suggestion": f"Verify the package '{name}' exists on the appropriate registry (PyPI/npm/crates.io) before importing. AI models often hallucinate package names.",
234
+ })
235
+ elif info["exists"] is None:
236
+ # timed out or unknown
237
+ pass
238
+
239
+ return flags