deadpush 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deadpush/graph.py ADDED
@@ -0,0 +1,401 @@
1
+ # Full advanced implementation of Symbol, Edge, CallGraph, DeadSymbol, DebrisFile etc.
2
+ # Inspired by BlastRadius's callgraph_model.py for proper function-scoped call graphs
3
+ # with qualified names, cross-file resolution, snippets, bindings, and entry points.
4
+ # This makes dead code reachability and impact analysis much more accurate
5
+ # down to individual function/symbol calls.
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ from dataclasses import dataclass, field
11
+ from pathlib import Path
12
+ from typing import Any, Literal, Optional
13
+
14
+ SCHEMA_VERSION = 3
15
+ MODULE_SCOPE = "<module>"
16
+
17
+ SymbolKind = Literal["function", "class", "method", "variable", "export", "file", "module"]
18
+ EdgeKind = Literal["calls", "imports", "inherits", "re-exports", "decorates", "contains"]
19
+
20
+
21
+ @dataclass(frozen=True, slots=True)
22
+ class FunctionDef:
23
+ """Rich function/method definition, similar to BlastRadius."""
24
+ id: str
25
+ name: str
26
+ qualified_name: str
27
+ line_start: int
28
+ line_end: int
29
+ is_entry_point: bool = False
30
+
31
+ def to_dict(self) -> dict[str, Any]:
32
+ return {
33
+ "id": self.id,
34
+ "name": self.name,
35
+ "qualified_name": self.qualified_name,
36
+ "line_start": self.line_start,
37
+ "line_end": self.line_end,
38
+ "is_entry_point": self.is_entry_point,
39
+ }
40
+
41
+
42
+ @dataclass(frozen=True, slots=True)
43
+ class CallEdge:
44
+ """Rich call edge with resolution info, snippet, usage, binding, package.
45
+ Directly modeled on BlastRadius CallEdge for proper symbol-level calls."""
46
+ caller_id: str
47
+ callee_name: str
48
+ line: int
49
+ snippet: str = ""
50
+ usage: str = "call"
51
+ callee_id: str | None = None
52
+ package: str | None = None
53
+ binding: str | None = None
54
+
55
+ def to_dict(self) -> dict[str, Any]:
56
+ return {
57
+ "caller_id": self.caller_id,
58
+ "callee_name": self.callee_name,
59
+ "callee_id": self.callee_id,
60
+ "line": self.line,
61
+ "snippet": self.snippet,
62
+ "usage": self.usage,
63
+ "package": self.package,
64
+ "binding": self.binding,
65
+ }
66
+
67
+
68
+ # Legacy flat structures kept for backward compat in existing plugins / reachability
69
+ @dataclass(frozen=True, slots=True)
70
+ class Symbol:
71
+ id: str
72
+ name: str
73
+ kind: SymbolKind
74
+ path: str
75
+ line: int
76
+ is_entry_point: bool = False
77
+ dynamic_risk: float = 0.0
78
+ qualified_name: str | None = None
79
+ line_end: int | None = None
80
+
81
+
82
+ @dataclass(frozen=True, slots=True)
83
+ class Edge:
84
+ src: str
85
+ dst: str
86
+ kind: EdgeKind
87
+ confidence: float = 1.0
88
+
89
+
90
+ @dataclass
91
+ class CallGraph:
92
+ symbols: dict[str, Symbol] = field(default_factory=dict)
93
+ edges: list[Edge] = field(default_factory=list)
94
+ # New rich data for proper call graphs
95
+ files_graph: dict[str, dict[str, Any]] = field(default_factory=dict)
96
+ function_index: dict[str, dict[str, Any]] = field(default_factory=dict)
97
+ call_edges: list[dict[str, Any]] = field(default_factory=list)
98
+ entry_points: list[str] = field(default_factory=list)
99
+
100
+ def add_symbol(self, symbol: Symbol) -> None:
101
+ self.symbols[symbol.id] = symbol
102
+
103
+ def add_edge(self, edge: Edge) -> None:
104
+ self.edges.append(edge)
105
+
106
+ def outgoing(self, symbol_id: str) -> list[Edge]:
107
+ return [e for e in self.edges if e.src == symbol_id]
108
+
109
+ def incoming(self, symbol_id: str) -> list[Edge]:
110
+ return [e for e in self.edges if e.dst == symbol_id]
111
+
112
+ def get_symbol(self, symbol_id: str) -> Symbol | None:
113
+ return self.symbols.get(symbol_id)
114
+
115
+ def add_rich_call_edge(self, edge: dict[str, Any]) -> None:
116
+ self.call_edges.append(edge)
117
+
118
+ def add_file_graph(self, path: str, file_graph: dict[str, Any]) -> None:
119
+ self.files_graph[path] = file_graph
120
+
121
+
122
+ def make_symbol_id(
123
+ path: str, name: str, qualified_name: str | None = None, line: int | None = None
124
+ ) -> str:
125
+ """Create a deterministic unique identifier for a symbol.
126
+
127
+ Enhanced to support qualified_name and line (inspired by BlastRadius
128
+ make_function_id) to avoid collisions and enable precise call resolution
129
+ down to specific function/symbol definitions and calls.
130
+ """
131
+ normalized = Path(path).as_posix().lstrip("./")
132
+ qname = qualified_name or name
133
+ safe = qname.strip().replace(" ", "_")
134
+ if line is not None:
135
+ return f"{normalized}::{safe}@{line}"
136
+ return f"{normalized}::{safe}"
137
+
138
+
139
+ def module_caller_id(file_path: str) -> str:
140
+ """Id for the module-level scope (like BlastRadius)."""
141
+ normalized = Path(file_path).as_posix().lstrip("./")
142
+ return f"{normalized}::{MODULE_SCOPE}"
143
+
144
+
145
+ def dedupe_calls(items: list[CallEdge]) -> list[CallEdge]:
146
+ """Dedupe call edges (from BlastRadius)."""
147
+ seen: set[tuple[str, str, int, str | None]] = set()
148
+ out: list[CallEdge] = []
149
+ for item in items:
150
+ key = (item.caller_id, item.callee_name, item.line, item.package)
151
+ if key in seen:
152
+ continue
153
+ seen.add(key)
154
+ out.append(item)
155
+ return sorted(out, key=lambda x: (x.caller_id, x.line, x.callee_name))
156
+
157
+
158
+ def dedupe_imports(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
159
+ """Dedupe import records (from BlastRadius)."""
160
+ seen: set[tuple[str, int]] = set()
161
+ out: list[dict[str, Any]] = []
162
+ for item in items:
163
+ key = (str(item.get("name", "")), int(item.get("line", 1)))
164
+ if key in seen:
165
+ continue
166
+ seen.add(key)
167
+ out.append(item)
168
+ return sorted(out, key=lambda x: (str(x.get("name")), int(x.get("line", 1))))
169
+
170
+
171
+ def legacy_calls_from_edges(calls: list[CallEdge]) -> list[dict[str, Any]]:
172
+ """Flatten for backward compat with older deadpush consumers."""
173
+ return [
174
+ {
175
+ "name": edge.callee_name,
176
+ "line": edge.line,
177
+ "snippet": edge.snippet,
178
+ "binding": edge.binding,
179
+ "package": edge.package,
180
+ "usage": edge.usage,
181
+ "caller_id": edge.caller_id,
182
+ "callee_id": edge.callee_id,
183
+ }
184
+ for edge in calls
185
+ ]
186
+
187
+
188
+ def content_hash(path: Path | str) -> str | None:
189
+ """Compute SHA256 content hash for duplicate/debris detection. Returns None on I/O error."""
190
+ try:
191
+ p = Path(path)
192
+ if not p.exists() or not p.is_file():
193
+ return None
194
+ data = p.read_bytes()
195
+ return hashlib.sha256(data).hexdigest()
196
+ except Exception:
197
+ return None
198
+
199
+
200
+ def _index_functions(files_graph: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]]:
201
+ """Build id -> function meta index (from BlastRadius _index_functions)."""
202
+ index: dict[str, dict[str, Any]] = {}
203
+ for file_path, meta in files_graph.items():
204
+ raw_funcs = meta.get("functions", [])
205
+ if not isinstance(raw_funcs, list):
206
+ continue
207
+ for fn in raw_funcs:
208
+ if not isinstance(fn, dict):
209
+ continue
210
+ fn_id = str(fn.get("id") or "")
211
+ if fn_id:
212
+ index[fn_id] = {**fn, "file_path": file_path}
213
+ return index
214
+
215
+
216
+ def _resolve_cross_file_callees(files_graph: dict[str, dict[str, Any]]) -> list[dict[str, Any]]:
217
+ """Best-effort cross-file callee resolution (inspired by BlastRadius).
218
+
219
+ Uses simple name and qualified_name fallbacks to set callee_id on edges.
220
+ This enables proper function-to-function call graphs instead of only
221
+ name strings, dramatically improving reachability accuracy for dead code
222
+ and blast radius.
223
+ """
224
+ func_index = _index_functions(files_graph)
225
+ by_simple_name: dict[str, list[str]] = {}
226
+ for fn_id, fn in func_index.items():
227
+ name = str(fn.get("name") or "")
228
+ if name:
229
+ by_simple_name.setdefault(name, []).append(fn_id)
230
+
231
+ resolved_edges: list[dict[str, Any]] = []
232
+
233
+ for file_path, meta in files_graph.items():
234
+ raw_calls = meta.get("calls", [])
235
+ if not isinstance(raw_calls, list):
236
+ continue
237
+ for call in raw_calls:
238
+ if not isinstance(call, dict):
239
+ continue
240
+ caller_id = str(call.get("caller_id") or "")
241
+ callee_name = str(call.get("callee_name") or "")
242
+ callee_id = call.get("callee_id")
243
+ if not caller_id or not callee_name:
244
+ continue
245
+
246
+ if not callee_id:
247
+ root = callee_name.split(".", 1)[0]
248
+ candidates = by_simple_name.get(root, [])
249
+ if len(candidates) == 1:
250
+ callee_id = candidates[0]
251
+ elif root != callee_name:
252
+ qualified_candidates = [
253
+ fid
254
+ for fid, fn in func_index.items()
255
+ if str(fn.get("qualified_name") or "").endswith(callee_name)
256
+ or str(fn.get("qualified_name") or "") == callee_name
257
+ ]
258
+ if len(qualified_candidates) == 1:
259
+ callee_id = qualified_candidates[0]
260
+
261
+ edge = {
262
+ "file_path": file_path,
263
+ "caller_id": caller_id,
264
+ "callee_id": callee_id,
265
+ "callee_name": callee_name,
266
+ "line": int(call.get("line") or 1),
267
+ "package": call.get("package"),
268
+ "binding": call.get("binding"),
269
+ "usage": call.get("usage") or "call",
270
+ }
271
+ resolved_edges.append(edge)
272
+
273
+ return resolved_edges
274
+
275
+
276
+ def build_repo_call_graph(files_graph: dict[str, dict[str, Any]]) -> dict[str, Any]:
277
+ """Assemble repo-wide call graph with resolved cross-file edges.
278
+
279
+ Direct port/adaptation of BlastRadius build_repo_call_graph + helpers.
280
+ Used by the analysis pipeline (cli.py) after language plugins emit per-file
281
+ FileGraph data. This gives deadpush proper function-level call graphs.
282
+ """
283
+ func_index = _index_functions(files_graph)
284
+ call_edges = _resolve_cross_file_callees(files_graph)
285
+ entry_points = [
286
+ fn_id for fn_id, fn in func_index.items() if bool(fn.get("is_entry_point"))
287
+ ]
288
+
289
+ total_calls = sum(
290
+ len(meta.get("calls", []))
291
+ for meta in files_graph.values()
292
+ if isinstance(meta.get("calls"), list)
293
+ )
294
+ total_imports = sum(
295
+ len(meta.get("imports", []))
296
+ for meta in files_graph.values()
297
+ if isinstance(meta.get("imports"), list)
298
+ )
299
+ total_bindings = sum(
300
+ len(meta.get("bindings", {}))
301
+ for meta in files_graph.values()
302
+ if isinstance(meta.get("bindings"), dict)
303
+ )
304
+
305
+ return {
306
+ "files": files_graph,
307
+ "function_index": func_index,
308
+ "call_edges": call_edges,
309
+ "entry_points": entry_points,
310
+ "summary": {
311
+ "file_count": len(files_graph),
312
+ "function_count": len(func_index),
313
+ "call_count": total_calls,
314
+ "import_count": total_imports,
315
+ "binding_count": total_bindings,
316
+ "entry_point_count": len(entry_points),
317
+ },
318
+ "schema_version": SCHEMA_VERSION,
319
+ }
320
+
321
+
322
+ def build_forward_adjacency(call_edges: list[dict[str, Any]]) -> dict[str, list[str]]:
323
+ """Build caller -> [callee_ids] adjacency for reachability (from BlastRadius)."""
324
+ adj: dict[str, list[str]] = {}
325
+ for edge in call_edges:
326
+ caller = str(edge.get("caller_id") or "")
327
+ callee = edge.get("callee_id")
328
+ if not caller or not isinstance(callee, str) or not callee:
329
+ continue
330
+ adj.setdefault(caller, [])
331
+ if callee not in adj[caller]:
332
+ adj[caller].append(callee)
333
+ return adj
334
+
335
+
336
+ @dataclass
337
+ class DeadSymbol:
338
+ symbol: Symbol
339
+ tier: Literal["definite", "probable", "suspicious", "uncertain"]
340
+ confidence: float
341
+ reasons: list[str]
342
+ safe_to_delete: bool = True
343
+ delete_order: int = 0
344
+ alive_score: float = 0.0
345
+ tier_new: str = "uncertain"
346
+ factor_breakdown: dict[str, float] = field(default_factory=dict)
347
+
348
+
349
+ @dataclass(frozen=True, slots=True)
350
+ class DebrisFile:
351
+ path: str
352
+ category: str
353
+ confidence: float
354
+ reasons: list[str] = field(default_factory=list)
355
+ block_push: bool = False
356
+ suggestion: str = ""
357
+ @dataclass
358
+ class FileGraph:
359
+ """Per-file call graph data (imports, bindings, functions, calls).
360
+
361
+ Matches the shape emitted by deadpush language plugins (now aligned
362
+ with BlastRadius callgraph_model for proper cross-file resolution).
363
+ """
364
+ language: str
365
+ imports: list[dict[str, Any]] = field(default_factory=list)
366
+ bindings: dict[str, str] = field(default_factory=dict)
367
+ functions: list[FunctionDef] = field(default_factory=list)
368
+ calls: list[CallEdge] = field(default_factory=list)
369
+
370
+ def to_dict(self) -> dict[str, Any]:
371
+ return {
372
+ "language": self.language,
373
+ "imports": self.imports,
374
+ "bindings": self.bindings,
375
+ "functions": [f.to_dict() for f in self.functions],
376
+ "calls": [c.to_dict() for c in self.calls],
377
+ }
378
+
379
+
380
+ __all__ = [
381
+ "SymbolKind",
382
+ "EdgeKind",
383
+ "Symbol",
384
+ "Edge",
385
+ "CallGraph",
386
+ "FunctionDef",
387
+ "CallEdge",
388
+ "FileGraph",
389
+ "make_symbol_id",
390
+ "module_caller_id",
391
+ "dedupe_calls",
392
+ "dedupe_imports",
393
+ "legacy_calls_from_edges",
394
+ "build_repo_call_graph",
395
+ "build_forward_adjacency",
396
+ "_index_functions",
397
+ "_resolve_cross_file_callees",
398
+ "DeadSymbol",
399
+ "DebrisFile",
400
+ "content_hash",
401
+ ]