deadpush 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deadpush/__init__.py +1 -0
- deadpush/churn.py +189 -0
- deadpush/cli.py +1584 -0
- deadpush/comments.py +265 -0
- deadpush/complexity.py +254 -0
- deadpush/config.py +284 -0
- deadpush/crawler.py +133 -0
- deadpush/deadness.py +477 -0
- deadpush/debris.py +729 -0
- deadpush/deps.py +323 -0
- deadpush/deps_guard.py +382 -0
- deadpush/entrypoints.py +193 -0
- deadpush/graph.py +401 -0
- deadpush/guard.py +1386 -0
- deadpush/hooks.py +369 -0
- deadpush/importgraph.py +122 -0
- deadpush/imports.py +239 -0
- deadpush/intercept.py +995 -0
- deadpush/languages/__init__.py +143 -0
- deadpush/languages/base.py +70 -0
- deadpush/languages/cpp.py +150 -0
- deadpush/languages/go_.py +177 -0
- deadpush/languages/java.py +185 -0
- deadpush/languages/javascript.py +202 -0
- deadpush/languages/python_.py +278 -0
- deadpush/languages/rust.py +147 -0
- deadpush/languages/typescript.py +192 -0
- deadpush/layers.py +197 -0
- deadpush/mcp_server.py +1061 -0
- deadpush/reachability.py +183 -0
- deadpush/registration.py +280 -0
- deadpush/report.py +113 -0
- deadpush/rules.py +190 -0
- deadpush/sarif.py +123 -0
- deadpush/scorer.py +151 -0
- deadpush/security.py +187 -0
- deadpush/session.py +224 -0
- deadpush/tests.py +333 -0
- deadpush/ui.py +156 -0
- deadpush/verifier.py +168 -0
- deadpush/watch.py +103 -0
- deadpush-0.2.0.dist-info/METADATA +230 -0
- deadpush-0.2.0.dist-info/RECORD +46 -0
- deadpush-0.2.0.dist-info/WHEEL +4 -0
- deadpush-0.2.0.dist-info/entry_points.txt +2 -0
- deadpush-0.2.0.dist-info/licenses/LICENSE +21 -0
deadpush/deps_guard.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dependency Integrity Guard — detects typosquats and suspicious package additions.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
KNOWN_PACKAGES: dict[str, set[str]] = {
|
|
13
|
+
"python": {
|
|
14
|
+
"requests", "flask", "django", "numpy", "pandas", "fastapi", "scipy",
|
|
15
|
+
"matplotlib", "scikit-learn", "torch", "tensorflow", "pytest", "uvicorn",
|
|
16
|
+
"sqlalchemy", "redis", "celery", "boto3", "click", "jinja2", "werkzeug",
|
|
17
|
+
"pydantic", "alembic", "httpx", "aiohttp", "black", "ruff",
|
|
18
|
+
"mypy", "isort", "flake8", "sphinx", "pillow", "beautifulsoup4",
|
|
19
|
+
"lxml", "pyyaml", "tomli", "jsonschema", "orjson",
|
|
20
|
+
"python-dotenv", "typing-extensions", "attrs",
|
|
21
|
+
"psycopg2-binary", "pymongo", "motor", "grpcio", "protobuf",
|
|
22
|
+
"cryptography", "bcrypt", "jwt", "oauthlib", "passlib",
|
|
23
|
+
"gunicorn", "daphne", "channels",
|
|
24
|
+
"elasticsearch", "loguru", "structlog", "sentry-sdk",
|
|
25
|
+
"prometheus-client", "opentelemetry-api",
|
|
26
|
+
"polars", "dask", "networkx", "nltk",
|
|
27
|
+
"spacy", "transformers", "datasets", "tiktoken", "openai",
|
|
28
|
+
"rich", "typer", "colorama",
|
|
29
|
+
"ray", "joblib", "cloudpickle",
|
|
30
|
+
"websockets", "msgpack", "zstandard",
|
|
31
|
+
"twine", "build", "hatchling", "setuptools", "wheel",
|
|
32
|
+
"coverage", "tox", "pre-commit", "virtualenv",
|
|
33
|
+
"pip", "poetry", "watchdog",
|
|
34
|
+
"anyio", "sniffio", "h11", "httpcore",
|
|
35
|
+
"asyncpg", "aioredis",
|
|
36
|
+
"pyarrow", "shapely", "geopandas", "xarray",
|
|
37
|
+
"sympy", "scrapy", "selenium", "playwright",
|
|
38
|
+
"pygments", "markdown",
|
|
39
|
+
"opencv-python", "scikit-image",
|
|
40
|
+
"torchvision", "torchaudio",
|
|
41
|
+
"wandb", "mlflow", "dvc",
|
|
42
|
+
"kubernetes", "docker",
|
|
43
|
+
"ansible", "paramiko",
|
|
44
|
+
},
|
|
45
|
+
"npm": {
|
|
46
|
+
"react", "react-dom", "lodash", "express", "axios", "next", "vue",
|
|
47
|
+
"typescript", "eslint", "prettier", "webpack", "vite",
|
|
48
|
+
"tailwindcss", "postcss", "autoprefixer", "jest", "mocha",
|
|
49
|
+
"chai", "cypress", "playwright", "storybook",
|
|
50
|
+
"redux", "react-router", "zustand", "zod",
|
|
51
|
+
"mongoose", "prisma", "typeorm",
|
|
52
|
+
"passport", "jsonwebtoken", "bcryptjs",
|
|
53
|
+
"socket.io", "graphql", "apollo-client", "apollo-server",
|
|
54
|
+
"uuid", "date-fns", "dayjs", "moment", "dotenv",
|
|
55
|
+
"ts-node", "esbuild", "rollup",
|
|
56
|
+
"pnpm", "yarn", "bun",
|
|
57
|
+
"cheerio", "puppeteer",
|
|
58
|
+
"sharp", "commander", "yargs",
|
|
59
|
+
"chalk", "winston", "pino",
|
|
60
|
+
"redis", "ioredis",
|
|
61
|
+
"helmet", "cors", "compression", "cookie-parser", "body-parser",
|
|
62
|
+
"aws-sdk", "firebase", "firebase-admin",
|
|
63
|
+
"stripe", "nodemailer",
|
|
64
|
+
"framer-motion", "three", "d3", "chart.js",
|
|
65
|
+
"emotion", "styled-components",
|
|
66
|
+
"react-native", "expo",
|
|
67
|
+
},
|
|
68
|
+
"rust": {
|
|
69
|
+
"serde", "tokio", "reqwest", "clap", "anyhow", "thiserror",
|
|
70
|
+
"rand", "chrono", "log", "tracing", "rayon",
|
|
71
|
+
"futures", "hyper", "actix-web", "axum", "rocket",
|
|
72
|
+
"tonic", "prost", "rustls", "openssl",
|
|
73
|
+
"uuid", "regex", "once_cell", "parking_lot",
|
|
74
|
+
"itertools", "num-traits", "indexmap",
|
|
75
|
+
"serde_json", "serde_yaml", "toml", "csv",
|
|
76
|
+
"sqlx", "diesel", "mongodb",
|
|
77
|
+
"tokio-stream", "tokio-util", "pin-project",
|
|
78
|
+
"bytes", "nom",
|
|
79
|
+
"wasm-bindgen", "wasm-pack",
|
|
80
|
+
"criterion", "tempfile",
|
|
81
|
+
"indicatif", "console",
|
|
82
|
+
"walkdir", "glob", "notify",
|
|
83
|
+
"rust-embed", "mime_guess",
|
|
84
|
+
},
|
|
85
|
+
"go": {
|
|
86
|
+
"gorilla/mux", "gin-gonic/gin", "echo", "fiber", "chi",
|
|
87
|
+
"gorm", "ent",
|
|
88
|
+
"cobra", "viper",
|
|
89
|
+
"zap", "logrus", "zerolog",
|
|
90
|
+
"stretchr/testify",
|
|
91
|
+
"aws/aws-sdk-go", "docker/docker", "kubernetes/client-go",
|
|
92
|
+
"google/uuid",
|
|
93
|
+
"minio/minio-go",
|
|
94
|
+
"go-git/go-git",
|
|
95
|
+
"spf13/afero", "fsnotify/fsnotify",
|
|
96
|
+
},
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# Levenshtein distance
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
def _levenshtein(a: str, b: str) -> int:
|
|
105
|
+
if len(a) < len(b):
|
|
106
|
+
a, b = b, a
|
|
107
|
+
if not b:
|
|
108
|
+
return len(a)
|
|
109
|
+
prev = list(range(len(b) + 1))
|
|
110
|
+
for i, ca in enumerate(a):
|
|
111
|
+
curr = [i + 1]
|
|
112
|
+
for j, cb in enumerate(b):
|
|
113
|
+
cost = 0 if ca == cb else 1
|
|
114
|
+
curr.append(min(
|
|
115
|
+
curr[j] + 1,
|
|
116
|
+
prev[j + 1] + 1,
|
|
117
|
+
prev[j] + cost,
|
|
118
|
+
))
|
|
119
|
+
prev = curr
|
|
120
|
+
return prev[-1]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
# Typosquat and suspicious name checks
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
def _check_typosquat(name: str, ecosystem: str) -> list[str]:
|
|
128
|
+
known = KNOWN_PACKAGES.get(ecosystem, set())
|
|
129
|
+
if not name or name.lower() in known:
|
|
130
|
+
return []
|
|
131
|
+
suspects: list[str] = []
|
|
132
|
+
n_clean = name.lower().replace("-", "").replace("_", "").replace("@", "")
|
|
133
|
+
for known_name in known:
|
|
134
|
+
k_clean = known_name.lower().replace("-", "").replace("_", "").replace("/", "")
|
|
135
|
+
dist = _levenshtein(n_clean, k_clean)
|
|
136
|
+
if 0 < dist <= 1:
|
|
137
|
+
suspects.append(known_name)
|
|
138
|
+
elif dist <= 2 and len(n_clean) <= 4:
|
|
139
|
+
suspects.append(known_name)
|
|
140
|
+
return suspects
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _check_suspicious_name(name: str) -> list[str]:
|
|
144
|
+
issues: list[str] = []
|
|
145
|
+
non_ascii = sum(1 for c in name if ord(c) > 127)
|
|
146
|
+
if non_ascii > 0:
|
|
147
|
+
issues.append(f"Package name contains {non_ascii} non-ASCII character(s)")
|
|
148
|
+
name_clean = name.lower().replace("-", "").replace("_", "").replace(".", "")
|
|
149
|
+
if not name_clean.isalnum():
|
|
150
|
+
special = sum(1 for c in name_clean if not c.isalnum())
|
|
151
|
+
if special > 0:
|
|
152
|
+
issues.append(f"Package name contains {special} special character(s)")
|
|
153
|
+
return issues
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
# Dependency file parsing
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
_REQUIREMENTS_LINE_RE = re.compile(
|
|
161
|
+
r'^([a-zA-Z_][a-zA-Z0-9_.-]*?)(?:\[[^\]]*\])?\s*(?:>=|<=|!=|==|~=|>|<|@)\s*'
|
|
162
|
+
)
|
|
163
|
+
_REQUIREMENTS_NAME_RE = re.compile(r'^([a-zA-Z_][a-zA-Z0-9_.-]*)')
|
|
164
|
+
_PACKAGE_JSON_KEY = re.compile(r'"(@?[a-zA-Z_][a-zA-Z0-9_./-]*?)"\s*:')
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _extract_toml_deps(source: str) -> list[tuple[str, int]]:
|
|
168
|
+
"""Extract dependency names from TOML using tomllib (stdlib, Python 3.11+)."""
|
|
169
|
+
import tomllib
|
|
170
|
+
|
|
171
|
+
deps: list[tuple[str, int]] = []
|
|
172
|
+
try:
|
|
173
|
+
data = tomllib.loads(source)
|
|
174
|
+
except Exception:
|
|
175
|
+
return deps
|
|
176
|
+
|
|
177
|
+
def _add(name: str) -> None:
|
|
178
|
+
"""Find the line number for a dependency name."""
|
|
179
|
+
pkg = re.split(r'[>=<!~@]', name.split("[")[0].strip())[0].strip()
|
|
180
|
+
if not pkg or not re.match(r'^[a-zA-Z_]', pkg):
|
|
181
|
+
return
|
|
182
|
+
line = _find_toml_line(source, pkg)
|
|
183
|
+
deps.append((pkg, line))
|
|
184
|
+
|
|
185
|
+
# PEP 621: [project] dependencies = ["pkg>=1.0"]
|
|
186
|
+
project = data.get("project", {})
|
|
187
|
+
for dep_str in project.get("dependencies", []):
|
|
188
|
+
if isinstance(dep_str, str):
|
|
189
|
+
_add(dep_str)
|
|
190
|
+
|
|
191
|
+
# PEP 621: [project.optional-dependencies] name = ["pkg>=1.0"]
|
|
192
|
+
for dep_list in project.get("optional-dependencies", {}).values():
|
|
193
|
+
for dep_str in dep_list:
|
|
194
|
+
if isinstance(dep_str, str):
|
|
195
|
+
_add(dep_str)
|
|
196
|
+
|
|
197
|
+
# Poetry: [tool.poetry.dependencies] pkg = "^1.0"
|
|
198
|
+
poetry = data.get("tool", {}).get("poetry", {})
|
|
199
|
+
for key in ("dependencies", "dev-dependencies"):
|
|
200
|
+
for dep_name in poetry.get(key, {}):
|
|
201
|
+
_add(dep_name)
|
|
202
|
+
|
|
203
|
+
# Poetry modern: [tool.poetry.group.{any}.dependencies]
|
|
204
|
+
for group_name, group_data in poetry.get("group", {}).items():
|
|
205
|
+
for dep_name in group_data.get("dependencies", {}):
|
|
206
|
+
_add(dep_name)
|
|
207
|
+
|
|
208
|
+
# Pipfile: [packages] / [dev-packages]
|
|
209
|
+
for section_name in ("packages", "dev-packages"):
|
|
210
|
+
for dep_name in data.get(section_name, {}):
|
|
211
|
+
_add(dep_name)
|
|
212
|
+
|
|
213
|
+
return deps
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _find_toml_line(source: str, name: str) -> int:
|
|
217
|
+
"""Find approximate line number for a dependency name in TOML source."""
|
|
218
|
+
lines = source.splitlines()
|
|
219
|
+
clean = name.split("[")[0].strip().lower()
|
|
220
|
+
for i, line in enumerate(lines, 1):
|
|
221
|
+
stripped = line.strip().lower()
|
|
222
|
+
# Match key = ... or "key" = ... or 'key' = ...
|
|
223
|
+
if re.match(rf'^["\']?{re.escape(clean)}["\']?\s*=', stripped):
|
|
224
|
+
return i
|
|
225
|
+
return 1
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _extract_requirements_txt(source: str) -> list[tuple[str, int]]:
|
|
229
|
+
deps: list[tuple[str, int]] = []
|
|
230
|
+
for i, line in enumerate(source.splitlines(), 1):
|
|
231
|
+
stripped = line.strip()
|
|
232
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("-"):
|
|
233
|
+
continue
|
|
234
|
+
m = _REQUIREMENTS_LINE_RE.match(stripped)
|
|
235
|
+
if m:
|
|
236
|
+
deps.append((m.group(1), i))
|
|
237
|
+
elif "==" in stripped or ">=" in stripped:
|
|
238
|
+
name = stripped.split("==")[0].split(">=")[0].strip()
|
|
239
|
+
if re.match(r'^[a-zA-Z_]', name):
|
|
240
|
+
deps.append((name, i))
|
|
241
|
+
else:
|
|
242
|
+
m2 = _REQUIREMENTS_NAME_RE.match(stripped)
|
|
243
|
+
if m2:
|
|
244
|
+
deps.append((m2.group(1), i))
|
|
245
|
+
return deps
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _extract_package_json(source: str) -> list[tuple[str, int]]:
|
|
249
|
+
deps: list[tuple[str, int]] = []
|
|
250
|
+
with_braces = re.compile(r'"(?:devDependencies|dependencies)"\s*:\s*\{')
|
|
251
|
+
for m in with_braces.finditer(source):
|
|
252
|
+
block_start = m.end()
|
|
253
|
+
depth = 1
|
|
254
|
+
pos = block_start
|
|
255
|
+
while depth > 0 and pos < len(source):
|
|
256
|
+
ch = source[pos]
|
|
257
|
+
if ch == '{':
|
|
258
|
+
depth += 1
|
|
259
|
+
elif ch == '}':
|
|
260
|
+
depth -= 1
|
|
261
|
+
pos += 1
|
|
262
|
+
block = source[block_start:pos-1]
|
|
263
|
+
for km in _PACKAGE_JSON_KEY.finditer(block):
|
|
264
|
+
line = source[:block_start + km.start()].count("\n") + 1
|
|
265
|
+
deps.append((km.group(1), line))
|
|
266
|
+
return deps
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _extract_cargo_toml(source: str) -> list[tuple[str, int]]:
|
|
270
|
+
deps: list[tuple[str, int]] = []
|
|
271
|
+
in_deps = False
|
|
272
|
+
for i, line in enumerate(source.splitlines(), 1):
|
|
273
|
+
stripped = line.strip()
|
|
274
|
+
if stripped.startswith("[") and stripped.endswith("]"):
|
|
275
|
+
in_deps = stripped[1:-1].strip().lower() == "dependencies"
|
|
276
|
+
continue
|
|
277
|
+
if in_deps and not stripped.startswith("#"):
|
|
278
|
+
m = re.match(r'^([a-zA-Z_][a-zA-Z0-9_-]*?)\s*=', stripped)
|
|
279
|
+
if m:
|
|
280
|
+
deps.append((m.group(1), i))
|
|
281
|
+
return deps
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _extract_go_mod(source: str) -> list[tuple[str, int]]:
|
|
285
|
+
deps: list[tuple[str, int]] = []
|
|
286
|
+
in_require = False
|
|
287
|
+
for i, line in enumerate(source.splitlines(), 1):
|
|
288
|
+
stripped = line.strip()
|
|
289
|
+
if stripped.startswith("require ("):
|
|
290
|
+
in_require = True
|
|
291
|
+
continue
|
|
292
|
+
if in_require and stripped == ")":
|
|
293
|
+
in_require = False
|
|
294
|
+
continue
|
|
295
|
+
if in_require:
|
|
296
|
+
parts = stripped.split()
|
|
297
|
+
if parts and not parts[0].startswith("//"):
|
|
298
|
+
deps.append((parts[0], i))
|
|
299
|
+
if stripped.startswith("require ") and not stripped.endswith("("):
|
|
300
|
+
parts = stripped.split()
|
|
301
|
+
if len(parts) >= 2:
|
|
302
|
+
deps.append((parts[1], i))
|
|
303
|
+
return deps
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
ECOSYSTEM_FOR_FILE: dict[str, str] = {
|
|
307
|
+
"pyproject.toml": "python",
|
|
308
|
+
"requirements.txt": "python",
|
|
309
|
+
"Pipfile": "python",
|
|
310
|
+
"setup.py": "python",
|
|
311
|
+
"setup.cfg": "python",
|
|
312
|
+
"package.json": "npm",
|
|
313
|
+
"Cargo.toml": "rust",
|
|
314
|
+
"go.mod": "go",
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
_PARSE_FOR_FILE: dict[str, Any] = {
|
|
318
|
+
"pyproject.toml": _extract_toml_deps,
|
|
319
|
+
"requirements.txt": _extract_requirements_txt,
|
|
320
|
+
"Pipfile": _extract_toml_deps,
|
|
321
|
+
"package.json": _extract_package_json,
|
|
322
|
+
"Cargo.toml": _extract_cargo_toml,
|
|
323
|
+
"go.mod": _extract_go_mod,
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def get_ecosystem(rel_path: str) -> str | None:
|
|
328
|
+
name = Path(rel_path).name if "/" in rel_path or "\\" in rel_path else rel_path
|
|
329
|
+
return ECOSYSTEM_FOR_FILE.get(name)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def parse_deps(source: str, rel_path: str) -> list[tuple[str, int]]:
|
|
333
|
+
name = Path(rel_path).name if "/" in rel_path or "\\" in rel_path else rel_path
|
|
334
|
+
parser = _PARSE_FOR_FILE.get(name)
|
|
335
|
+
if parser:
|
|
336
|
+
return parser(source)
|
|
337
|
+
return []
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# ---------------------------------------------------------------------------
|
|
341
|
+
# High-level check
|
|
342
|
+
# ---------------------------------------------------------------------------
|
|
343
|
+
|
|
344
|
+
def check_deps(source: str, rel_path: str, old_source: str = "") -> list[dict[str, Any]]:
|
|
345
|
+
"""Check a dependency file for typosquats and suspicious additions.
|
|
346
|
+
|
|
347
|
+
Returns a list of violation dicts with keys:
|
|
348
|
+
- category: "dependency"
|
|
349
|
+
- description: human-readable explanation
|
|
350
|
+
- line: line number
|
|
351
|
+
- severity: "high" | "medium" | "low"
|
|
352
|
+
"""
|
|
353
|
+
violations: list[dict[str, Any]] = []
|
|
354
|
+
ecosystem = get_ecosystem(rel_path)
|
|
355
|
+
if not ecosystem:
|
|
356
|
+
return violations
|
|
357
|
+
|
|
358
|
+
new_deps = parse_deps(source, rel_path)
|
|
359
|
+
old_deps = parse_deps(old_source, rel_path) if old_source else []
|
|
360
|
+
|
|
361
|
+
old_set = {d[0].lower() for d in old_deps}
|
|
362
|
+
added = [(name, line) for name, line in new_deps if name.lower() not in old_set]
|
|
363
|
+
|
|
364
|
+
for name, line in added:
|
|
365
|
+
suspects = _check_typosquat(name, ecosystem)
|
|
366
|
+
if suspects:
|
|
367
|
+
violations.append({
|
|
368
|
+
"category": "dependency",
|
|
369
|
+
"description": f"Package '{name}' is a possible typosquat of: {', '.join(suspects[:3])}",
|
|
370
|
+
"line": line,
|
|
371
|
+
"severity": "high",
|
|
372
|
+
})
|
|
373
|
+
suspicious = _check_suspicious_name(name)
|
|
374
|
+
for issue in suspicious:
|
|
375
|
+
violations.append({
|
|
376
|
+
"category": "dependency",
|
|
377
|
+
"description": f"Package '{name}': {issue}",
|
|
378
|
+
"line": line,
|
|
379
|
+
"severity": "medium",
|
|
380
|
+
})
|
|
381
|
+
|
|
382
|
+
return violations
|
deadpush/entrypoints.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Entry point resolution using plugins + config + heuristics + framework-aware route detection.
|
|
3
|
+
|
|
4
|
+
This integrates language plugins deeply: each plugin can contribute
|
|
5
|
+
detect_entry_points + we also honor explicit config + common conventions.
|
|
6
|
+
Framework route registrations (Flask, FastAPI, Express, etc.) are detected
|
|
7
|
+
via pattern scanning across source files.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from .config import Config
|
|
17
|
+
from .graph import CallGraph, Symbol
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Framework-aware route pattern detection
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
_FRAMEWORK_PATTERNS: list[tuple[str, str, list[str]]] = [
|
|
25
|
+
("flask", r'@\w+\.route\([\'"]([^\'"]+)[\'"]', [".py"]),
|
|
26
|
+
("flask_blueprint", r'@\w+\.(?:route|get|post|put|delete|patch)\([\'"]([^\'"]+)[\'"]', [".py"]),
|
|
27
|
+
("fastapi", r'@\w+\.(?:get|post|put|delete|patch|options|head|trace)\([\'"]([^\'"]+)[\'"]', [".py"]),
|
|
28
|
+
("django_url", r"path\([\'\"]([^\'\"]+)[\'\"],\s*(\w+)", [".py"]),
|
|
29
|
+
("django_re_path", r"re_path\([\'\"]([^\'\"]+)[\'\"],\s*(\w+)", [".py"]),
|
|
30
|
+
("django_include", r"include\([\'\"]([^\'\"]+)[\'\"]", [".py"]),
|
|
31
|
+
("express_get", r"\.(?:get|post|put|delete|patch|use)\s*\(\s*[\'\"]([^\'\"]*)[\'\"],\s*(\w+)", [".js", ".jsx", ".ts", ".tsx"]),
|
|
32
|
+
("express_route", r"(?:app|router)\.route\([\'\"]([^\'\"]+)[\'\"][^)]*\)\s*\.(?:get|post|put|delete|patch)\s*\((\w+)", [".js", ".jsx", ".ts", ".tsx"]),
|
|
33
|
+
("nextjs_page", r"export\s+default\s+(?:function|const|async\s+function)\s+(\w+)", [".js", ".jsx", ".ts", ".tsx"]),
|
|
34
|
+
("go_http", r"http\.HandleFunc\([\'\"]([^\'\"]+)[\'\"],\s*(\w+)", [".go"]),
|
|
35
|
+
("go_gin", r"(?:router|r|gin\.Default\(\))\.(?:GET|POST|PUT|DELETE|PATCH|Handle)\([\'\"]([^\'\"]+)[\'\"],\s*(\w+)", [".go"]),
|
|
36
|
+
("rust_axum", r"\.route\([\'\"]([^\'\"]+)[\'\"],\s*(\w+)", [".rs"]),
|
|
37
|
+
("rust_actix", r"\.route\([\'\"]([^\'\"]+)[\'\"],\s*\w+\.\w+\(\)\.to\((\w+)", [".rs"]),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _scan_file_for_routes(path: Path) -> list[str]:
|
|
42
|
+
"""Scan a single source file for framework route handler references."""
|
|
43
|
+
try:
|
|
44
|
+
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
45
|
+
except Exception:
|
|
46
|
+
return []
|
|
47
|
+
|
|
48
|
+
handlers: list[str] = []
|
|
49
|
+
for name, pattern, extensions in _FRAMEWORK_PATTERNS:
|
|
50
|
+
if path.suffix.lower() in extensions:
|
|
51
|
+
for match in re.finditer(pattern, text, re.MULTILINE):
|
|
52
|
+
if match.lastindex and match.lastindex >= 2:
|
|
53
|
+
handlers.append(match.group(match.lastindex))
|
|
54
|
+
elif match.lastindex == 1:
|
|
55
|
+
# Some patterns only capture the route, not the handler
|
|
56
|
+
pass
|
|
57
|
+
return handlers
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def detect_framework_entry_points(
|
|
61
|
+
files: list[Any],
|
|
62
|
+
graph: CallGraph,
|
|
63
|
+
) -> list[str]:
|
|
64
|
+
"""Detect entry points from framework route registrations.
|
|
65
|
+
|
|
66
|
+
Scans source files for common framework routing patterns (Flask, FastAPI,
|
|
67
|
+
Express, Django, Gin, Axum, etc.) and returns symbol IDs for handler
|
|
68
|
+
functions referenced in route definitions.
|
|
69
|
+
|
|
70
|
+
This catches cases like Flask @app.route, FastAPI @app.get, Express app.get(),
|
|
71
|
+
Django urlpatterns, etc. — all of which are "entry points" from the
|
|
72
|
+
framework's perspective even if they don't have a traditional main().
|
|
73
|
+
"""
|
|
74
|
+
roots: set[str] = set()
|
|
75
|
+
|
|
76
|
+
# Collect handler names from all source files
|
|
77
|
+
handler_names: set[str] = set()
|
|
78
|
+
for f in files:
|
|
79
|
+
if not getattr(f, "is_text", True):
|
|
80
|
+
continue
|
|
81
|
+
handlers = _scan_file_for_routes(f.path)
|
|
82
|
+
handler_names.update(handlers)
|
|
83
|
+
|
|
84
|
+
if not handler_names:
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
# Match handler names to symbol IDs in the graph
|
|
88
|
+
name_index: dict[str, list[str]] = {}
|
|
89
|
+
for sid, sym in graph.symbols.items():
|
|
90
|
+
name_index.setdefault(sym.name, []).append(sid)
|
|
91
|
+
|
|
92
|
+
for name in handler_names:
|
|
93
|
+
ids = name_index.get(name, [])
|
|
94
|
+
for sid in ids:
|
|
95
|
+
roots.add(sid)
|
|
96
|
+
|
|
97
|
+
# Try stem of file (e.g., for Django views)
|
|
98
|
+
for sid, sym in graph.symbols.items():
|
|
99
|
+
if sym.name == name:
|
|
100
|
+
roots.add(sid)
|
|
101
|
+
|
|
102
|
+
return list(roots)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
# Main resolver
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
def resolve_entry_points(
|
|
110
|
+
graph: CallGraph,
|
|
111
|
+
files: list[Any], # list of FileInfo or similar
|
|
112
|
+
plugins: dict[str, Any],
|
|
113
|
+
config: Config,
|
|
114
|
+
) -> list[str]:
|
|
115
|
+
"""
|
|
116
|
+
Return list of symbol IDs that are considered roots / entry points.
|
|
117
|
+
"""
|
|
118
|
+
roots: set[str] = set()
|
|
119
|
+
|
|
120
|
+
# 1. Explicit --entry / config include (names or paths)
|
|
121
|
+
for inc in config.entrypoints.include:
|
|
122
|
+
inc = inc.strip()
|
|
123
|
+
if not inc:
|
|
124
|
+
continue
|
|
125
|
+
# try exact symbol match by id suffix or name
|
|
126
|
+
for sym_id, sym in graph.symbols.items():
|
|
127
|
+
if sym.name == inc or inc in sym_id or str(sym.path).endswith(inc):
|
|
128
|
+
roots.add(sym_id)
|
|
129
|
+
|
|
130
|
+
# 2. Plugin-provided detection (the good stuff)
|
|
131
|
+
dynamic_pats = config.entrypoints.dynamic_patterns
|
|
132
|
+
for f in files:
|
|
133
|
+
if not getattr(f, "is_text", True):
|
|
134
|
+
continue
|
|
135
|
+
lang_plug = None
|
|
136
|
+
for p in plugins.values():
|
|
137
|
+
if f.path.suffix.lower() in getattr(p, "extensions", []):
|
|
138
|
+
lang_plug = p
|
|
139
|
+
break
|
|
140
|
+
if not lang_plug or not hasattr(lang_plug, "detect_entry_points"):
|
|
141
|
+
continue
|
|
142
|
+
try:
|
|
143
|
+
tree = lang_plug.parse(f.path.read_bytes(), str(f.path))
|
|
144
|
+
detected = lang_plug.detect_entry_points(tree, str(f.path), dynamic_pats)
|
|
145
|
+
for det in detected:
|
|
146
|
+
# match against symbols we have for this file
|
|
147
|
+
matched = False
|
|
148
|
+
for sym_id, sym in graph.symbols.items():
|
|
149
|
+
if sym.path != str(f.path):
|
|
150
|
+
continue
|
|
151
|
+
if sym.name == det:
|
|
152
|
+
roots.add(sym_id)
|
|
153
|
+
matched = True
|
|
154
|
+
break
|
|
155
|
+
if not matched:
|
|
156
|
+
for sym_id, sym in graph.symbols.items():
|
|
157
|
+
if sym.path == str(f.path) and det in sym.name:
|
|
158
|
+
roots.add(sym_id)
|
|
159
|
+
break
|
|
160
|
+
# fallback synthetic if not parsed as symbol
|
|
161
|
+
if det in ("main", "__main__", "default", "app"):
|
|
162
|
+
candidate = f"{Path(f.path).as_posix()}::{det}"
|
|
163
|
+
if candidate in graph.symbols:
|
|
164
|
+
roots.add(candidate)
|
|
165
|
+
except Exception:
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
# 3. Heuristics: anything marked is_entry_point=True by a plugin
|
|
169
|
+
for sym_id, sym in graph.symbols.items():
|
|
170
|
+
if sym.is_entry_point:
|
|
171
|
+
roots.add(sym_id)
|
|
172
|
+
|
|
173
|
+
# 4. Framework-aware route detection
|
|
174
|
+
try:
|
|
175
|
+
framework_roots = detect_framework_entry_points(files, graph)
|
|
176
|
+
roots.update(framework_roots)
|
|
177
|
+
except Exception:
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
# 5. Common fallbacks if nothing found
|
|
181
|
+
if not roots:
|
|
182
|
+
for sym_id, sym in graph.symbols.items():
|
|
183
|
+
if sym.name in ("main", "Main", "__main__", "index", "app", "server"):
|
|
184
|
+
roots.add(sym_id)
|
|
185
|
+
if "main" in str(sym.path).lower() and sym.kind == "file":
|
|
186
|
+
roots.add(sym_id)
|
|
187
|
+
|
|
188
|
+
# Always treat file symbols of "entry-ish" files as soft roots
|
|
189
|
+
for sym_id, sym in graph.symbols.items():
|
|
190
|
+
if sym.kind == "file" and any(k in str(sym.path).lower() for k in ("main", "app", "index", "cli", "cmd")):
|
|
191
|
+
roots.add(sym_id)
|
|
192
|
+
|
|
193
|
+
return sorted(roots)
|