deadpush 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ """
2
+ Reachability analysis for dead code detection.
3
+
4
+ Given the (partial) call graph built from language plugins, compute
5
+ what is reachable from the entry point roots.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from .graph import CallGraph
15
+
16
+
17
+ @dataclass
18
+ class ReachabilityResult:
19
+ reachable: set[str] = field(default_factory=set)
20
+ unreachable: set[str] = field(default_factory=set)
21
+ uncertain: set[str] = field(default_factory=set) # dynamic / risk / unresolved calls
22
+
23
+
24
+ def compute_reachability(
25
+ graph: CallGraph,
26
+ roots: list[str],
27
+ config: Any,
28
+ ) -> ReachabilityResult:
29
+ """
30
+ Naive but effective DFS/BFS reachability.
31
+
32
+ Because call sites from plugins currently record raw callee *text*, we do
33
+ fuzzy matching to symbol names. Real prod would do proper name resolution.
34
+ """
35
+ reachable: set[str] = set()
36
+ uncertain: set[str] = set()
37
+
38
+ # Build quick name -> ids index (last wins for simplicity, or collect)
39
+ name_to_ids: dict[str, list[str]] = {}
40
+ for sid, sym in graph.symbols.items():
41
+ name_to_ids.setdefault(sym.name, []).append(sid)
42
+ # also basename of path for file symbols etc
43
+ base = Path(sym.path).name
44
+ if base != sym.name:
45
+ name_to_ids.setdefault(base, []).append(sid)
46
+
47
+ from collections import deque
48
+ q = deque(roots)
49
+ for r in roots:
50
+ reachable.add(r)
51
+
52
+ # Also add edges if they were added (future proof)
53
+ adj: dict[str, list[str]] = {sid: [] for sid in graph.symbols}
54
+ for edge in getattr(graph, "edges", []):
55
+ if edge.src in adj:
56
+ adj[edge.src].append(edge.dst)
57
+
58
+ # Prefer rich resolved call_edges from the new BlastRadius-style graph assembly
59
+ rich_edges = getattr(graph, "call_edges", []) or []
60
+ for e in rich_edges:
61
+ src = str(e.get("caller_id") or e.get("src") or "")
62
+ dst = e.get("callee_id") or e.get("dst")
63
+ if src and dst and src in adj:
64
+ adj.setdefault(src, [])
65
+ if dst not in adj[src]:
66
+ adj[src].append(str(dst))
67
+
68
+ visited = set(reachable)
69
+
70
+ def resolve_callee(callee_text: str) -> list[str]:
71
+ """Very heuristic resolution of a raw callee string to symbol ids."""
72
+ c = callee_text.strip().strip("()[]{}; ")
73
+ if not c:
74
+ return []
75
+ # direct name match
76
+ if c in name_to_ids:
77
+ return name_to_ids[c]
78
+ # last segment after . (method calls)
79
+ last = c.split(".")[-1].split("(")[0]
80
+ if last in name_to_ids:
81
+ return name_to_ids[last]
82
+ # bare function
83
+ bare = c.split("(")[0].split("::")[-1]
84
+ if bare in name_to_ids:
85
+ return name_to_ids[bare]
86
+ return []
87
+
88
+ # Traverse from graph edges first (if present)
89
+ while q:
90
+ cur = q.popleft()
91
+ # explicit edges
92
+ for dst in adj.get(cur, []):
93
+ if dst not in visited:
94
+ visited.add(dst)
95
+ reachable.add(dst)
96
+ q.append(dst)
97
+
98
+ # also from symbols that have outgoing? we didn't populate many edges yet
99
+ sym = graph.get_symbol(cur)
100
+ if not sym:
101
+ continue
102
+
103
+ # Second pass: use the raw call_sites that were collected in plugins but not wired.
104
+ # In current cli the calls are parsed but not added to graph; we simulate here using all known calls.
105
+ # To make plugins contribute, we scan again? For now do a global pass using symbols.
106
+ # Simpler: consider every symbol that is called by a reachable one.
107
+ # We do this by iterating call data? Since calls aren't stored on graph, we re-walk? Skip for perf.
108
+ # For integration, we mark high dynamic_risk symbols as uncertain even if named match.
109
+
110
+ # Second pass: use stored raw call edges (from plugins) to reach more symbols.
111
+ # We do fuzzy name resolution on the dst side.
112
+ name_index: dict[str, list[str]] = {}
113
+ for sid, s in graph.symbols.items():
114
+ name_index.setdefault(s.name, []).append(sid)
115
+ base = Path(s.path).stem
116
+ name_index.setdefault(base, []).append(sid)
117
+
118
+ def _resolve(dst: str) -> list[str]:
119
+ d = dst.strip().strip("()[]{};, ")
120
+ if not d:
121
+ return []
122
+ if d in name_index:
123
+ return name_index[d]
124
+ last = d.split(".")[-1].split("::")[-1].split("(")[0]
125
+ if last in name_index:
126
+ return name_index[last]
127
+ return []
128
+
129
+ # Walk the call edges recorded in the graph (prefer rich resolved ones)
130
+ rich_edges = getattr(graph, "call_edges", []) or []
131
+ for e in rich_edges:
132
+ src = str(e.get("caller_id") or e.get("src") or "")
133
+ if src in reachable:
134
+ dst = e.get("callee_id") or e.get("dst") or e.get("callee_name")
135
+ if dst:
136
+ for target in _resolve(str(dst)):
137
+ if target not in visited:
138
+ visited.add(target)
139
+ reachable.add(target)
140
+ q.append(target)
141
+
142
+ for edge in getattr(graph, "edges", []):
143
+ if edge.src in reachable:
144
+ for target in _resolve(edge.dst):
145
+ if target not in visited:
146
+ visited.add(target)
147
+ reachable.add(target)
148
+ q.append(target) # continue DFS from here
149
+
150
+ # Drain any newly enqueued from raw calls
151
+ while q:
152
+ cur = q.popleft()
153
+ for edge in getattr(graph, "edges", []):
154
+ if edge.src == cur:
155
+ for target in _resolve(edge.dst):
156
+ if target not in visited:
157
+ visited.add(target)
158
+ reachable.add(target)
159
+ q.append(target)
160
+
161
+ # Now compute unreachable
162
+ all_sym_ids = set(graph.symbols.keys())
163
+ unreachable = all_sym_ids - reachable
164
+
165
+ # Promote some to uncertain if they had dynamic risk or were only reachable via raw text that didn't resolve cleanly
166
+ for sid in list(unreachable):
167
+ sym = graph.get_symbol(sid)
168
+ if sym and sym.dynamic_risk > 0.3:
169
+ uncertain.add(sid)
170
+ unreachable.discard(sid)
171
+
172
+ # Files themselves are rarely "dead" unless whole module
173
+ for sid in list(unreachable):
174
+ sym = graph.get_symbol(sid)
175
+ if sym and sym.kind == "file":
176
+ # don't report the file as dead code usually
177
+ unreachable.discard(sid)
178
+
179
+ return ReachabilityResult(
180
+ reachable=reachable,
181
+ unreachable=unreachable,
182
+ uncertain=uncertain,
183
+ )
@@ -0,0 +1,280 @@
1
+ """Registration pattern detection for dead code analysis.
2
+
3
+ Scans source files for common framework registration patterns that would
4
+ make a symbol "alive" even if not directly called in the call graph:
5
+ - Decorator registrations (@register, @route, @app.get, etc.)
6
+ - Dict/list registrations (plugin registries, handler maps)
7
+ - String references that match symbol names in registry contexts
8
+ - Framework-specific registries (Django urlpatterns, Flask routes, Click groups)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import ast
14
+ import re
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ _KNOWN_FRAMEWORK_OBJECTS: set[str] = {
19
+ "app", "application", "router", "bp", "blueprint", "api",
20
+ "route", "routes", "site", "server", "web",
21
+ }
22
+
23
+ _DECORATOR_PATTERNS: dict[str, float] = {
24
+ "register": 0.9,
25
+ "route": 0.8,
26
+ "command": 0.8,
27
+ "group": 0.8,
28
+ "click_command": 0.8,
29
+ "click_group": 0.8,
30
+ "app_route": 0.9,
31
+ "router_route": 0.8,
32
+ "bp_route": 0.8,
33
+ "get": 0.7,
34
+ "post": 0.7,
35
+ "put": 0.7,
36
+ "delete": 0.7,
37
+ "patch": 0.7,
38
+ "hookimpl": 0.8,
39
+ "hookspec": 0.7,
40
+ "signal": 0.7,
41
+ "receiver": 0.7,
42
+ "listen": 0.7,
43
+ "on_event": 0.7,
44
+ "task": 0.6,
45
+ "periodic_task": 0.7,
46
+ "background_task": 0.7,
47
+ "entrypoint": 0.8,
48
+ "console_script": 0.8,
49
+ "setuptools_entry": 0.8,
50
+ "expose": 0.7,
51
+ "action": 0.7,
52
+ "filter": 0.6,
53
+ "template_filter": 0.7,
54
+ "context_processor": 0.7,
55
+ "extension": 0.6,
56
+ "middleware": 0.6,
57
+ "errorhandler": 0.7,
58
+ "before_request": 0.6,
59
+ "after_request": 0.6,
60
+ "teardown_request": 0.6,
61
+ "on_message": 0.7,
62
+ "subscribe": 0.7,
63
+ "publish": 0.6,
64
+ "event_handler": 0.7,
65
+ "model": 0.5,
66
+ "table": 0.5,
67
+ "collection": 0.5,
68
+ "resource": 0.5,
69
+ "service": 0.5,
70
+ "provider": 0.5,
71
+ "factory": 0.5,
72
+ "component": 0.5,
73
+ "inject": 0.6,
74
+ "implement": 0.6,
75
+ "override": 0.5,
76
+ "implements": 0.6,
77
+ "dataclass": 0.3,
78
+ }
79
+
80
+ _REGISTRY_VARIABLE_PATTERNS: list[re.Pattern] = [
81
+ re.compile(r"(?:handlers|routes|urls|urlpatterns|views|controllers|actions|commands|tasks|workers|plugins|extensions|providers|services|maps|mappings|registry|registries|middlewares|filters|signals|events|listeners|consumers|producers|sinks|sources|adapters|ports|drivers|brokers|queues|schedules|jobs|crons|blueprints|modules|resources|endpoints|patterns)"),
82
+ re.compile(r".*_registry$"),
83
+ re.compile(r".*_handlers$"),
84
+ re.compile(r".*_routes$"),
85
+ re.compile(r".*_commands$"),
86
+ re.compile(r".*_tasks$"),
87
+ re.compile(r".*_listeners$"),
88
+ re.compile(r".*_events$"),
89
+ re.compile(r".*_plugins$"),
90
+ re.compile(r".*_services$"),
91
+ ]
92
+
93
+ _ENTRY_POINT_FILE_PATTERNS: list[re.Pattern] = [
94
+ re.compile(r"(?:main|app|server|cli|cmd|entry|index|wsgi|asgi|manage|setup|run|launch|start|boot|bootstrap|kernel|router|routes|urls|views|api|graphql|rest|grpc|consumer|worker|scheduler)\.py$", re.IGNORECASE),
95
+ re.compile(r"(?:manage|setup|wsgi|asgi|cli)\.py$", re.IGNORECASE),
96
+ re.compile(r"__main__\.py$"),
97
+ re.compile(r"conftest\.py$"),
98
+ re.compile(r"__init__\.py$"),
99
+ ]
100
+
101
+
102
+ class RegistrationDetector:
103
+ """Detect symbols registered via decorators, dict entries, or string refs.
104
+
105
+ Scans all source files once and caches results.
106
+ """
107
+
108
+ def __init__(self, file_paths: list[Path], repo_root: Path):
109
+ self.repo_root = repo_root
110
+ self._registered: set[str] = set()
111
+ self._scores: dict[str, float] = {}
112
+ self._entry_point_files: set[str] = set()
113
+ self._string_refs: dict[str, list[str]] = {}
114
+ self._custom_patterns: list[str] = []
115
+ self._scan(file_paths)
116
+
117
+ def add_custom_pattern(self, pattern: str) -> None:
118
+ self._custom_patterns.append(pattern)
119
+
120
+ def _scan(self, file_paths: list[Path]) -> None:
121
+ for fp in file_paths:
122
+ rel = self._rel_path(fp)
123
+ if rel in self._entry_point_files:
124
+ continue
125
+ for pat in _ENTRY_POINT_FILE_PATTERNS:
126
+ if pat.search(fp.name):
127
+ self._entry_point_files.add(rel)
128
+ break
129
+ try:
130
+ text = fp.read_text(encoding="utf-8", errors="ignore")
131
+ except Exception:
132
+ continue
133
+
134
+ self._scan_decorators(text, rel)
135
+ self._scan_dict_registrations(text, rel)
136
+ self._scan_string_refs(text, rel)
137
+
138
+ for custom in self._custom_patterns:
139
+ try:
140
+ cre = re.compile(custom)
141
+ for fp in file_paths:
142
+ rel = self._rel_path(fp)
143
+ try:
144
+ text = fp.read_text(encoding="utf-8", errors="ignore")
145
+ except Exception:
146
+ continue
147
+ for m in cre.finditer(text):
148
+ name = m.group(1) if m.lastindex and m.lastindex >= 1 else m.group(0)
149
+ sym_id = f"{rel}::{name}"
150
+ self._registered.add(sym_id)
151
+ self._scores[sym_id] = max(self._scores.get(sym_id, 0), 0.5)
152
+ except re.error:
153
+ pass
154
+
155
+ def _rel_path(self, path: Path) -> str:
156
+ try:
157
+ return str(path.relative_to(self.repo_root))
158
+ except ValueError:
159
+ return str(path)
160
+
161
+ def _scan_decorators(self, text: str, rel: str) -> None:
162
+ try:
163
+ tree = ast.parse(text)
164
+ except SyntaxError:
165
+ return
166
+
167
+ for node in ast.walk(tree):
168
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
169
+ continue
170
+ for dec in node.decorator_list:
171
+ weight = self._decorator_weight(dec)
172
+ if weight > 0:
173
+ sym_id = f"{rel}::{node.name}"
174
+ self._registered.add(sym_id)
175
+ self._scores[sym_id] = max(self._scores.get(sym_id, 0), weight)
176
+
177
+ @staticmethod
178
+ def _namespace_for_attr(dec: ast.expr) -> str | None:
179
+ """Extract the leftmost name from an attribute chain (e.g. app.get → 'app')."""
180
+ if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute):
181
+ return RegistrationDetector._namespace_for_attr(dec.func)
182
+ if isinstance(dec, ast.Attribute):
183
+ if isinstance(dec.value, ast.Name):
184
+ return dec.value.id.lower()
185
+ if isinstance(dec.value, ast.Attribute):
186
+ return RegistrationDetector._namespace_for_attr(dec.value)
187
+ return None
188
+
189
+ def _decorator_weight(self, dec: ast.expr) -> float:
190
+ if isinstance(dec, (ast.Call, ast.Attribute)):
191
+ if isinstance(dec, ast.Call):
192
+ func = dec.func
193
+ else:
194
+ func = dec
195
+ if isinstance(func, ast.Attribute):
196
+ attr_name = func.attr.lower()
197
+ weight = _DECORATOR_PATTERNS.get(attr_name, 0.0)
198
+ namespace = self._namespace_for_attr(dec)
199
+ if weight > 0 and namespace and namespace not in _KNOWN_FRAMEWORK_OBJECTS:
200
+ weight *= 0.3
201
+ return weight
202
+ if isinstance(func, ast.Name):
203
+ return _DECORATOR_PATTERNS.get(func.id.lower(), 0.0)
204
+ return 0.0
205
+ if isinstance(dec, ast.Name):
206
+ return _DECORATOR_PATTERNS.get(dec.id.lower(), 0.0)
207
+ return 0.0
208
+
209
+ def _scan_dict_registrations(self, text: str, rel: str) -> None:
210
+ try:
211
+ tree = ast.parse(text)
212
+ except SyntaxError:
213
+ return
214
+
215
+ def _var_matches(name: str) -> bool:
216
+ return any(p.search(name) for p in _REGISTRY_VARIABLE_PATTERNS)
217
+
218
+ def _scan_value(val: ast.expr) -> None:
219
+ if isinstance(val, ast.Dict):
220
+ for v in val.values:
221
+ self._check_registry_value(v, rel)
222
+ elif isinstance(val, ast.List):
223
+ for elt in val.elts:
224
+ self._check_registry_value(elt, rel)
225
+ elif isinstance(val, ast.Call):
226
+ for kw in val.keywords:
227
+ self._check_registry_value(kw.value, rel)
228
+
229
+ for node in ast.walk(tree):
230
+ if isinstance(node, ast.Assign):
231
+ for target in node.targets:
232
+ var_name = None
233
+ if isinstance(target, ast.Name):
234
+ var_name = target.id
235
+ elif isinstance(target, ast.Subscript) and isinstance(target.value, ast.Name):
236
+ var_name = target.value.id
237
+ if var_name and _var_matches(var_name):
238
+ _scan_value(node.value)
239
+ break
240
+ elif isinstance(node, ast.AugAssign):
241
+ if isinstance(node.target, ast.Name) and _var_matches(node.target.id):
242
+ _scan_value(node.value)
243
+
244
+ def _check_registry_value(self, val: ast.expr, rel: str) -> None:
245
+ if isinstance(val, ast.Name):
246
+ weight = 0.3
247
+ sym_id = f"{rel}::{val.id}"
248
+ self._registered.add(sym_id)
249
+ self._scores[sym_id] = max(self._scores.get(sym_id, 0), weight)
250
+ elif isinstance(val, ast.Attribute):
251
+ weight = 0.25
252
+ sym_id = f"{rel}::{val.attr}"
253
+ self._registered.add(sym_id)
254
+ self._scores[sym_id] = max(self._scores.get(sym_id, 0), weight)
255
+ elif isinstance(val, ast.Constant) and isinstance(val.value, str):
256
+ weight = 0.2
257
+ sym_id = f"{rel}::{val.value}"
258
+ self._registered.add(sym_id)
259
+ self._scores[sym_id] = max(self._scores.get(sym_id, 0), weight)
260
+
261
+ def _scan_string_refs(self, text: str, rel: str) -> None:
262
+ for m in re.finditer(r'["\'](\w+)["\']\s*[:=]\s*["\'](\w+)["\']', text):
263
+ key, val = m.group(1), m.group(2)
264
+ for name in (key, val):
265
+ sym_id = f"{rel}::{name}"
266
+ self._registered.add(sym_id)
267
+ current = self._scores.get(sym_id, 0)
268
+ self._scores[sym_id] = max(current, 0.15)
269
+
270
+ def score(self, sym_id: str) -> float:
271
+ return self._scores.get(sym_id, 0.0)
272
+
273
+ def is_registered(self, sym_id: str) -> bool:
274
+ return sym_id in self._registered
275
+
276
+ def is_entry_point_file(self, path: str) -> bool:
277
+ return path in self._entry_point_files
278
+
279
+ def get_all_registered(self) -> set[str]:
280
+ return self._registered.copy()
deadpush/report.py ADDED
@@ -0,0 +1,113 @@
1
+ """
2
+ Report generators (markdown + json) for deadpush scan results.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from .graph import DeadSymbol, DebrisFile
13
+
14
+
15
+ def generate_markdown_report(
16
+ dead_symbols: list[DeadSymbol],
17
+ debris: list[DebrisFile],
18
+ repo_root: Path,
19
+ roots: list[str] | None = None,
20
+ ) -> str:
21
+ lines: list[str] = []
22
+ lines.append("# deadpush Report")
23
+ lines.append(f"Generated: {datetime.now(timezone.utc).isoformat()}")
24
+ lines.append(f"Repo: {repo_root}")
25
+ lines.append("")
26
+
27
+ lines.append("## Summary")
28
+ lines.append(f"- Dead symbols: {len(dead_symbols)}")
29
+ lines.append(f"- Debris items: {len(debris)}")
30
+ blocking = [d for d in debris if getattr(d, "block_push", False)]
31
+ lines.append(f"- Blocking debris: {len(blocking)}")
32
+ lines.append("")
33
+
34
+ if dead_symbols:
35
+ lines.append("## Dead Code")
36
+ by_file: dict[str, list[DeadSymbol]] = {}
37
+ for ds in dead_symbols:
38
+ by_file.setdefault(ds.symbol.path, []).append(ds)
39
+ for fpath in sorted(by_file):
40
+ lines.append(f"\n### {fpath}")
41
+ for ds in sorted(by_file[fpath], key=lambda x: x.symbol.line):
42
+ tier = ds.tier.upper()
43
+ lines.append(
44
+ f"- **{ds.symbol.name}** (line {ds.symbol.line}) — {tier} "
45
+ f"({ds.confidence*100:.0f}%) — safe={ds.safe_to_delete}"
46
+ )
47
+ for r in ds.reasons[:3]:
48
+ lines.append(f" - {r}")
49
+ else:
50
+ lines.append("## Dead Code\n\nNo dead code found. Great!")
51
+
52
+ lines.append("\n## Debris")
53
+ if debris:
54
+ for d in sorted(debris, key=lambda x: (not x.block_push, x.category, x.path)):
55
+ flag = "🚫 BLOCK" if d.block_push else "warn"
56
+ lines.append(f"- {d.path} [{d.category}] {flag} conf={d.confidence:.0%}")
57
+ if d.suggestion:
58
+ lines.append(f" → {d.suggestion}")
59
+ else:
60
+ lines.append("No semantic debris detected.")
61
+
62
+ if roots:
63
+ lines.append("\n## Entry Points Used")
64
+ for r in roots[:20]:
65
+ lines.append(f"- {r}")
66
+
67
+ lines.append("\n---\n*Report by deadpush — keep your vibe coding safe.*")
68
+ return "\n".join(lines)
69
+
70
+
71
+ def generate_json_report(
72
+ dead_symbols: list[DeadSymbol],
73
+ debris: list[DebrisFile],
74
+ repo_root: Path,
75
+ roots: list[str] | None = None,
76
+ ) -> dict[str, Any]:
77
+ return {
78
+ "version": "0.2",
79
+ "generated_at": datetime.now(timezone.utc).isoformat(),
80
+ "repo_root": str(repo_root),
81
+ "summary": {
82
+ "dead_count": len(dead_symbols),
83
+ "debris_count": len(debris),
84
+ "blocking_debris": len([d for d in debris if getattr(d, "block_push", False)]),
85
+ "entry_points": len(roots or []),
86
+ },
87
+ "dead_symbols": [
88
+ {
89
+ "id": ds.symbol.id,
90
+ "name": ds.symbol.name,
91
+ "kind": ds.symbol.kind,
92
+ "path": ds.symbol.path,
93
+ "line": ds.symbol.line,
94
+ "tier": ds.tier,
95
+ "confidence": ds.confidence,
96
+ "reasons": ds.reasons,
97
+ "safe_to_delete": ds.safe_to_delete,
98
+ }
99
+ for ds in dead_symbols
100
+ ],
101
+ "debris": [
102
+ {
103
+ "path": d.path,
104
+ "category": d.category,
105
+ "confidence": d.confidence,
106
+ "block_push": d.block_push,
107
+ "suggestion": d.suggestion,
108
+ "reasons": d.reasons,
109
+ }
110
+ for d in debris
111
+ ],
112
+ "roots": roots or [],
113
+ }