deadpush 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deadpush/__init__.py +1 -0
- deadpush/churn.py +189 -0
- deadpush/cli.py +1584 -0
- deadpush/comments.py +265 -0
- deadpush/complexity.py +254 -0
- deadpush/config.py +284 -0
- deadpush/crawler.py +133 -0
- deadpush/deadness.py +477 -0
- deadpush/debris.py +729 -0
- deadpush/deps.py +323 -0
- deadpush/deps_guard.py +382 -0
- deadpush/entrypoints.py +193 -0
- deadpush/graph.py +401 -0
- deadpush/guard.py +1386 -0
- deadpush/hooks.py +369 -0
- deadpush/importgraph.py +122 -0
- deadpush/imports.py +239 -0
- deadpush/intercept.py +995 -0
- deadpush/languages/__init__.py +143 -0
- deadpush/languages/base.py +70 -0
- deadpush/languages/cpp.py +150 -0
- deadpush/languages/go_.py +177 -0
- deadpush/languages/java.py +185 -0
- deadpush/languages/javascript.py +202 -0
- deadpush/languages/python_.py +278 -0
- deadpush/languages/rust.py +147 -0
- deadpush/languages/typescript.py +192 -0
- deadpush/layers.py +197 -0
- deadpush/mcp_server.py +1061 -0
- deadpush/reachability.py +183 -0
- deadpush/registration.py +280 -0
- deadpush/report.py +113 -0
- deadpush/rules.py +190 -0
- deadpush/sarif.py +123 -0
- deadpush/scorer.py +151 -0
- deadpush/security.py +187 -0
- deadpush/session.py +224 -0
- deadpush/tests.py +333 -0
- deadpush/ui.py +156 -0
- deadpush/verifier.py +168 -0
- deadpush/watch.py +103 -0
- deadpush-0.2.0.dist-info/METADATA +230 -0
- deadpush-0.2.0.dist-info/RECORD +46 -0
- deadpush-0.2.0.dist-info/WHEEL +4 -0
- deadpush-0.2.0.dist-info/entry_points.txt +2 -0
- deadpush-0.2.0.dist-info/licenses/LICENSE +21 -0
deadpush/graph.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
# Full advanced implementation of Symbol, Edge, CallGraph, DeadSymbol, DebrisFile etc.
|
|
2
|
+
# Inspired by BlastRadius's callgraph_model.py for proper function-scoped call graphs
|
|
3
|
+
# with qualified names, cross-file resolution, snippets, bindings, and entry points.
|
|
4
|
+
# This makes dead code reachability and impact analysis much more accurate
|
|
5
|
+
# down to individual function/symbol calls.
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Literal, Optional
|
|
13
|
+
|
|
14
|
+
SCHEMA_VERSION = 3
|
|
15
|
+
MODULE_SCOPE = "<module>"
|
|
16
|
+
|
|
17
|
+
SymbolKind = Literal["function", "class", "method", "variable", "export", "file", "module"]
|
|
18
|
+
EdgeKind = Literal["calls", "imports", "inherits", "re-exports", "decorates", "contains"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True, slots=True)
|
|
22
|
+
class FunctionDef:
|
|
23
|
+
"""Rich function/method definition, similar to BlastRadius."""
|
|
24
|
+
id: str
|
|
25
|
+
name: str
|
|
26
|
+
qualified_name: str
|
|
27
|
+
line_start: int
|
|
28
|
+
line_end: int
|
|
29
|
+
is_entry_point: bool = False
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> dict[str, Any]:
|
|
32
|
+
return {
|
|
33
|
+
"id": self.id,
|
|
34
|
+
"name": self.name,
|
|
35
|
+
"qualified_name": self.qualified_name,
|
|
36
|
+
"line_start": self.line_start,
|
|
37
|
+
"line_end": self.line_end,
|
|
38
|
+
"is_entry_point": self.is_entry_point,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True, slots=True)
|
|
43
|
+
class CallEdge:
|
|
44
|
+
"""Rich call edge with resolution info, snippet, usage, binding, package.
|
|
45
|
+
Directly modeled on BlastRadius CallEdge for proper symbol-level calls."""
|
|
46
|
+
caller_id: str
|
|
47
|
+
callee_name: str
|
|
48
|
+
line: int
|
|
49
|
+
snippet: str = ""
|
|
50
|
+
usage: str = "call"
|
|
51
|
+
callee_id: str | None = None
|
|
52
|
+
package: str | None = None
|
|
53
|
+
binding: str | None = None
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> dict[str, Any]:
|
|
56
|
+
return {
|
|
57
|
+
"caller_id": self.caller_id,
|
|
58
|
+
"callee_name": self.callee_name,
|
|
59
|
+
"callee_id": self.callee_id,
|
|
60
|
+
"line": self.line,
|
|
61
|
+
"snippet": self.snippet,
|
|
62
|
+
"usage": self.usage,
|
|
63
|
+
"package": self.package,
|
|
64
|
+
"binding": self.binding,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Legacy flat structures kept for backward compat in existing plugins / reachability
|
|
69
|
+
@dataclass(frozen=True, slots=True)
|
|
70
|
+
class Symbol:
|
|
71
|
+
id: str
|
|
72
|
+
name: str
|
|
73
|
+
kind: SymbolKind
|
|
74
|
+
path: str
|
|
75
|
+
line: int
|
|
76
|
+
is_entry_point: bool = False
|
|
77
|
+
dynamic_risk: float = 0.0
|
|
78
|
+
qualified_name: str | None = None
|
|
79
|
+
line_end: int | None = None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass(frozen=True, slots=True)
|
|
83
|
+
class Edge:
|
|
84
|
+
src: str
|
|
85
|
+
dst: str
|
|
86
|
+
kind: EdgeKind
|
|
87
|
+
confidence: float = 1.0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class CallGraph:
|
|
92
|
+
symbols: dict[str, Symbol] = field(default_factory=dict)
|
|
93
|
+
edges: list[Edge] = field(default_factory=list)
|
|
94
|
+
# New rich data for proper call graphs
|
|
95
|
+
files_graph: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
96
|
+
function_index: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
97
|
+
call_edges: list[dict[str, Any]] = field(default_factory=list)
|
|
98
|
+
entry_points: list[str] = field(default_factory=list)
|
|
99
|
+
|
|
100
|
+
def add_symbol(self, symbol: Symbol) -> None:
|
|
101
|
+
self.symbols[symbol.id] = symbol
|
|
102
|
+
|
|
103
|
+
def add_edge(self, edge: Edge) -> None:
|
|
104
|
+
self.edges.append(edge)
|
|
105
|
+
|
|
106
|
+
def outgoing(self, symbol_id: str) -> list[Edge]:
|
|
107
|
+
return [e for e in self.edges if e.src == symbol_id]
|
|
108
|
+
|
|
109
|
+
def incoming(self, symbol_id: str) -> list[Edge]:
|
|
110
|
+
return [e for e in self.edges if e.dst == symbol_id]
|
|
111
|
+
|
|
112
|
+
def get_symbol(self, symbol_id: str) -> Symbol | None:
|
|
113
|
+
return self.symbols.get(symbol_id)
|
|
114
|
+
|
|
115
|
+
def add_rich_call_edge(self, edge: dict[str, Any]) -> None:
|
|
116
|
+
self.call_edges.append(edge)
|
|
117
|
+
|
|
118
|
+
def add_file_graph(self, path: str, file_graph: dict[str, Any]) -> None:
|
|
119
|
+
self.files_graph[path] = file_graph
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def make_symbol_id(
|
|
123
|
+
path: str, name: str, qualified_name: str | None = None, line: int | None = None
|
|
124
|
+
) -> str:
|
|
125
|
+
"""Create a deterministic unique identifier for a symbol.
|
|
126
|
+
|
|
127
|
+
Enhanced to support qualified_name and line (inspired by BlastRadius
|
|
128
|
+
make_function_id) to avoid collisions and enable precise call resolution
|
|
129
|
+
down to specific function/symbol definitions and calls.
|
|
130
|
+
"""
|
|
131
|
+
normalized = Path(path).as_posix().lstrip("./")
|
|
132
|
+
qname = qualified_name or name
|
|
133
|
+
safe = qname.strip().replace(" ", "_")
|
|
134
|
+
if line is not None:
|
|
135
|
+
return f"{normalized}::{safe}@{line}"
|
|
136
|
+
return f"{normalized}::{safe}"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def module_caller_id(file_path: str) -> str:
|
|
140
|
+
"""Id for the module-level scope (like BlastRadius)."""
|
|
141
|
+
normalized = Path(file_path).as_posix().lstrip("./")
|
|
142
|
+
return f"{normalized}::{MODULE_SCOPE}"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def dedupe_calls(items: list[CallEdge]) -> list[CallEdge]:
|
|
146
|
+
"""Dedupe call edges (from BlastRadius)."""
|
|
147
|
+
seen: set[tuple[str, str, int, str | None]] = set()
|
|
148
|
+
out: list[CallEdge] = []
|
|
149
|
+
for item in items:
|
|
150
|
+
key = (item.caller_id, item.callee_name, item.line, item.package)
|
|
151
|
+
if key in seen:
|
|
152
|
+
continue
|
|
153
|
+
seen.add(key)
|
|
154
|
+
out.append(item)
|
|
155
|
+
return sorted(out, key=lambda x: (x.caller_id, x.line, x.callee_name))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def dedupe_imports(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
159
|
+
"""Dedupe import records (from BlastRadius)."""
|
|
160
|
+
seen: set[tuple[str, int]] = set()
|
|
161
|
+
out: list[dict[str, Any]] = []
|
|
162
|
+
for item in items:
|
|
163
|
+
key = (str(item.get("name", "")), int(item.get("line", 1)))
|
|
164
|
+
if key in seen:
|
|
165
|
+
continue
|
|
166
|
+
seen.add(key)
|
|
167
|
+
out.append(item)
|
|
168
|
+
return sorted(out, key=lambda x: (str(x.get("name")), int(x.get("line", 1))))
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def legacy_calls_from_edges(calls: list[CallEdge]) -> list[dict[str, Any]]:
|
|
172
|
+
"""Flatten for backward compat with older deadpush consumers."""
|
|
173
|
+
return [
|
|
174
|
+
{
|
|
175
|
+
"name": edge.callee_name,
|
|
176
|
+
"line": edge.line,
|
|
177
|
+
"snippet": edge.snippet,
|
|
178
|
+
"binding": edge.binding,
|
|
179
|
+
"package": edge.package,
|
|
180
|
+
"usage": edge.usage,
|
|
181
|
+
"caller_id": edge.caller_id,
|
|
182
|
+
"callee_id": edge.callee_id,
|
|
183
|
+
}
|
|
184
|
+
for edge in calls
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def content_hash(path: Path | str) -> str | None:
|
|
189
|
+
"""Compute SHA256 content hash for duplicate/debris detection. Returns None on I/O error."""
|
|
190
|
+
try:
|
|
191
|
+
p = Path(path)
|
|
192
|
+
if not p.exists() or not p.is_file():
|
|
193
|
+
return None
|
|
194
|
+
data = p.read_bytes()
|
|
195
|
+
return hashlib.sha256(data).hexdigest()
|
|
196
|
+
except Exception:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _index_functions(files_graph: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
|
201
|
+
"""Build id -> function meta index (from BlastRadius _index_functions)."""
|
|
202
|
+
index: dict[str, dict[str, Any]] = {}
|
|
203
|
+
for file_path, meta in files_graph.items():
|
|
204
|
+
raw_funcs = meta.get("functions", [])
|
|
205
|
+
if not isinstance(raw_funcs, list):
|
|
206
|
+
continue
|
|
207
|
+
for fn in raw_funcs:
|
|
208
|
+
if not isinstance(fn, dict):
|
|
209
|
+
continue
|
|
210
|
+
fn_id = str(fn.get("id") or "")
|
|
211
|
+
if fn_id:
|
|
212
|
+
index[fn_id] = {**fn, "file_path": file_path}
|
|
213
|
+
return index
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _resolve_cross_file_callees(files_graph: dict[str, dict[str, Any]]) -> list[dict[str, Any]]:
|
|
217
|
+
"""Best-effort cross-file callee resolution (inspired by BlastRadius).
|
|
218
|
+
|
|
219
|
+
Uses simple name and qualified_name fallbacks to set callee_id on edges.
|
|
220
|
+
This enables proper function-to-function call graphs instead of only
|
|
221
|
+
name strings, dramatically improving reachability accuracy for dead code
|
|
222
|
+
and blast radius.
|
|
223
|
+
"""
|
|
224
|
+
func_index = _index_functions(files_graph)
|
|
225
|
+
by_simple_name: dict[str, list[str]] = {}
|
|
226
|
+
for fn_id, fn in func_index.items():
|
|
227
|
+
name = str(fn.get("name") or "")
|
|
228
|
+
if name:
|
|
229
|
+
by_simple_name.setdefault(name, []).append(fn_id)
|
|
230
|
+
|
|
231
|
+
resolved_edges: list[dict[str, Any]] = []
|
|
232
|
+
|
|
233
|
+
for file_path, meta in files_graph.items():
|
|
234
|
+
raw_calls = meta.get("calls", [])
|
|
235
|
+
if not isinstance(raw_calls, list):
|
|
236
|
+
continue
|
|
237
|
+
for call in raw_calls:
|
|
238
|
+
if not isinstance(call, dict):
|
|
239
|
+
continue
|
|
240
|
+
caller_id = str(call.get("caller_id") or "")
|
|
241
|
+
callee_name = str(call.get("callee_name") or "")
|
|
242
|
+
callee_id = call.get("callee_id")
|
|
243
|
+
if not caller_id or not callee_name:
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
if not callee_id:
|
|
247
|
+
root = callee_name.split(".", 1)[0]
|
|
248
|
+
candidates = by_simple_name.get(root, [])
|
|
249
|
+
if len(candidates) == 1:
|
|
250
|
+
callee_id = candidates[0]
|
|
251
|
+
elif root != callee_name:
|
|
252
|
+
qualified_candidates = [
|
|
253
|
+
fid
|
|
254
|
+
for fid, fn in func_index.items()
|
|
255
|
+
if str(fn.get("qualified_name") or "").endswith(callee_name)
|
|
256
|
+
or str(fn.get("qualified_name") or "") == callee_name
|
|
257
|
+
]
|
|
258
|
+
if len(qualified_candidates) == 1:
|
|
259
|
+
callee_id = qualified_candidates[0]
|
|
260
|
+
|
|
261
|
+
edge = {
|
|
262
|
+
"file_path": file_path,
|
|
263
|
+
"caller_id": caller_id,
|
|
264
|
+
"callee_id": callee_id,
|
|
265
|
+
"callee_name": callee_name,
|
|
266
|
+
"line": int(call.get("line") or 1),
|
|
267
|
+
"package": call.get("package"),
|
|
268
|
+
"binding": call.get("binding"),
|
|
269
|
+
"usage": call.get("usage") or "call",
|
|
270
|
+
}
|
|
271
|
+
resolved_edges.append(edge)
|
|
272
|
+
|
|
273
|
+
return resolved_edges
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def build_repo_call_graph(files_graph: dict[str, dict[str, Any]]) -> dict[str, Any]:
|
|
277
|
+
"""Assemble repo-wide call graph with resolved cross-file edges.
|
|
278
|
+
|
|
279
|
+
Direct port/adaptation of BlastRadius build_repo_call_graph + helpers.
|
|
280
|
+
Used by the analysis pipeline (cli.py) after language plugins emit per-file
|
|
281
|
+
FileGraph data. This gives deadpush proper function-level call graphs.
|
|
282
|
+
"""
|
|
283
|
+
func_index = _index_functions(files_graph)
|
|
284
|
+
call_edges = _resolve_cross_file_callees(files_graph)
|
|
285
|
+
entry_points = [
|
|
286
|
+
fn_id for fn_id, fn in func_index.items() if bool(fn.get("is_entry_point"))
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
total_calls = sum(
|
|
290
|
+
len(meta.get("calls", []))
|
|
291
|
+
for meta in files_graph.values()
|
|
292
|
+
if isinstance(meta.get("calls"), list)
|
|
293
|
+
)
|
|
294
|
+
total_imports = sum(
|
|
295
|
+
len(meta.get("imports", []))
|
|
296
|
+
for meta in files_graph.values()
|
|
297
|
+
if isinstance(meta.get("imports"), list)
|
|
298
|
+
)
|
|
299
|
+
total_bindings = sum(
|
|
300
|
+
len(meta.get("bindings", {}))
|
|
301
|
+
for meta in files_graph.values()
|
|
302
|
+
if isinstance(meta.get("bindings"), dict)
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
"files": files_graph,
|
|
307
|
+
"function_index": func_index,
|
|
308
|
+
"call_edges": call_edges,
|
|
309
|
+
"entry_points": entry_points,
|
|
310
|
+
"summary": {
|
|
311
|
+
"file_count": len(files_graph),
|
|
312
|
+
"function_count": len(func_index),
|
|
313
|
+
"call_count": total_calls,
|
|
314
|
+
"import_count": total_imports,
|
|
315
|
+
"binding_count": total_bindings,
|
|
316
|
+
"entry_point_count": len(entry_points),
|
|
317
|
+
},
|
|
318
|
+
"schema_version": SCHEMA_VERSION,
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def build_forward_adjacency(call_edges: list[dict[str, Any]]) -> dict[str, list[str]]:
|
|
323
|
+
"""Build caller -> [callee_ids] adjacency for reachability (from BlastRadius)."""
|
|
324
|
+
adj: dict[str, list[str]] = {}
|
|
325
|
+
for edge in call_edges:
|
|
326
|
+
caller = str(edge.get("caller_id") or "")
|
|
327
|
+
callee = edge.get("callee_id")
|
|
328
|
+
if not caller or not isinstance(callee, str) or not callee:
|
|
329
|
+
continue
|
|
330
|
+
adj.setdefault(caller, [])
|
|
331
|
+
if callee not in adj[caller]:
|
|
332
|
+
adj[caller].append(callee)
|
|
333
|
+
return adj
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@dataclass
|
|
337
|
+
class DeadSymbol:
|
|
338
|
+
symbol: Symbol
|
|
339
|
+
tier: Literal["definite", "probable", "suspicious", "uncertain"]
|
|
340
|
+
confidence: float
|
|
341
|
+
reasons: list[str]
|
|
342
|
+
safe_to_delete: bool = True
|
|
343
|
+
delete_order: int = 0
|
|
344
|
+
alive_score: float = 0.0
|
|
345
|
+
tier_new: str = "uncertain"
|
|
346
|
+
factor_breakdown: dict[str, float] = field(default_factory=dict)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
@dataclass(frozen=True, slots=True)
|
|
350
|
+
class DebrisFile:
|
|
351
|
+
path: str
|
|
352
|
+
category: str
|
|
353
|
+
confidence: float
|
|
354
|
+
reasons: list[str] = field(default_factory=list)
|
|
355
|
+
block_push: bool = False
|
|
356
|
+
suggestion: str = ""
|
|
357
|
+
@dataclass
|
|
358
|
+
class FileGraph:
|
|
359
|
+
"""Per-file call graph data (imports, bindings, functions, calls).
|
|
360
|
+
|
|
361
|
+
Matches the shape emitted by deadpush language plugins (now aligned
|
|
362
|
+
with BlastRadius callgraph_model for proper cross-file resolution).
|
|
363
|
+
"""
|
|
364
|
+
language: str
|
|
365
|
+
imports: list[dict[str, Any]] = field(default_factory=list)
|
|
366
|
+
bindings: dict[str, str] = field(default_factory=dict)
|
|
367
|
+
functions: list[FunctionDef] = field(default_factory=list)
|
|
368
|
+
calls: list[CallEdge] = field(default_factory=list)
|
|
369
|
+
|
|
370
|
+
def to_dict(self) -> dict[str, Any]:
|
|
371
|
+
return {
|
|
372
|
+
"language": self.language,
|
|
373
|
+
"imports": self.imports,
|
|
374
|
+
"bindings": self.bindings,
|
|
375
|
+
"functions": [f.to_dict() for f in self.functions],
|
|
376
|
+
"calls": [c.to_dict() for c in self.calls],
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
__all__ = [
|
|
381
|
+
"SymbolKind",
|
|
382
|
+
"EdgeKind",
|
|
383
|
+
"Symbol",
|
|
384
|
+
"Edge",
|
|
385
|
+
"CallGraph",
|
|
386
|
+
"FunctionDef",
|
|
387
|
+
"CallEdge",
|
|
388
|
+
"FileGraph",
|
|
389
|
+
"make_symbol_id",
|
|
390
|
+
"module_caller_id",
|
|
391
|
+
"dedupe_calls",
|
|
392
|
+
"dedupe_imports",
|
|
393
|
+
"legacy_calls_from_edges",
|
|
394
|
+
"build_repo_call_graph",
|
|
395
|
+
"build_forward_adjacency",
|
|
396
|
+
"_index_functions",
|
|
397
|
+
"_resolve_cross_file_callees",
|
|
398
|
+
"DeadSymbol",
|
|
399
|
+
"DebrisFile",
|
|
400
|
+
"content_hash",
|
|
401
|
+
]
|