bubble-analysis 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bubble/__init__.py +3 -0
- bubble/cache.py +207 -0
- bubble/cli.py +470 -0
- bubble/config.py +52 -0
- bubble/detectors.py +90 -0
- bubble/enums.py +65 -0
- bubble/extractor.py +829 -0
- bubble/formatters.py +887 -0
- bubble/integrations/__init__.py +92 -0
- bubble/integrations/base.py +98 -0
- bubble/integrations/cli_scripts/__init__.py +49 -0
- bubble/integrations/cli_scripts/cli.py +108 -0
- bubble/integrations/cli_scripts/detector.py +149 -0
- bubble/integrations/django/__init__.py +63 -0
- bubble/integrations/django/cli.py +111 -0
- bubble/integrations/django/detector.py +331 -0
- bubble/integrations/django/semantics.py +40 -0
- bubble/integrations/fastapi/__init__.py +57 -0
- bubble/integrations/fastapi/cli.py +110 -0
- bubble/integrations/fastapi/detector.py +176 -0
- bubble/integrations/fastapi/semantics.py +14 -0
- bubble/integrations/flask/__init__.py +57 -0
- bubble/integrations/flask/cli.py +110 -0
- bubble/integrations/flask/detector.py +191 -0
- bubble/integrations/flask/semantics.py +19 -0
- bubble/integrations/formatters.py +268 -0
- bubble/integrations/generic/__init__.py +13 -0
- bubble/integrations/generic/config.py +106 -0
- bubble/integrations/generic/detector.py +346 -0
- bubble/integrations/generic/frameworks.py +145 -0
- bubble/integrations/models.py +68 -0
- bubble/integrations/queries.py +481 -0
- bubble/loader.py +118 -0
- bubble/models.py +397 -0
- bubble/propagation.py +737 -0
- bubble/protocols.py +104 -0
- bubble/queries.py +627 -0
- bubble/results.py +211 -0
- bubble/stubs.py +89 -0
- bubble/timing.py +144 -0
- bubble_analysis-0.2.0.dist-info/METADATA +264 -0
- bubble_analysis-0.2.0.dist-info/RECORD +46 -0
- bubble_analysis-0.2.0.dist-info/WHEEL +5 -0
- bubble_analysis-0.2.0.dist-info/entry_points.txt +2 -0
- bubble_analysis-0.2.0.dist-info/licenses/LICENSE +21 -0
- bubble_analysis-0.2.0.dist-info/top_level.txt +1 -0
bubble/models.py
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
"""Data models for code flow analysis."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
from bubble.enums import ConfidenceLevel, EntrypointKind, ResolutionKind
|
|
6
|
+
from bubble.integrations.base import (
|
|
7
|
+
Entrypoint,
|
|
8
|
+
GlobalHandler,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
BUILTIN_EXCEPTION_HIERARCHY: dict[str, list[str]] = {
|
|
12
|
+
"BaseException": [],
|
|
13
|
+
"Exception": ["BaseException"],
|
|
14
|
+
"ArithmeticError": ["Exception"],
|
|
15
|
+
"AssertionError": ["Exception"],
|
|
16
|
+
"AttributeError": ["Exception"],
|
|
17
|
+
"BlockingIOError": ["OSError"],
|
|
18
|
+
"BrokenPipeError": ["ConnectionError"],
|
|
19
|
+
"BufferError": ["Exception"],
|
|
20
|
+
"ChildProcessError": ["OSError"],
|
|
21
|
+
"ConnectionAbortedError": ["ConnectionError"],
|
|
22
|
+
"ConnectionError": ["OSError"],
|
|
23
|
+
"ConnectionRefusedError": ["ConnectionError"],
|
|
24
|
+
"ConnectionResetError": ["ConnectionError"],
|
|
25
|
+
"EOFError": ["Exception"],
|
|
26
|
+
"EnvironmentError": ["OSError"],
|
|
27
|
+
"FileExistsError": ["OSError"],
|
|
28
|
+
"FileNotFoundError": ["OSError"],
|
|
29
|
+
"FloatingPointError": ["ArithmeticError"],
|
|
30
|
+
"GeneratorExit": ["BaseException"],
|
|
31
|
+
"IOError": ["OSError"],
|
|
32
|
+
"ImportError": ["Exception"],
|
|
33
|
+
"IndentationError": ["SyntaxError"],
|
|
34
|
+
"IndexError": ["LookupError"],
|
|
35
|
+
"InterruptedError": ["OSError"],
|
|
36
|
+
"IsADirectoryError": ["OSError"],
|
|
37
|
+
"KeyboardInterrupt": ["BaseException"],
|
|
38
|
+
"KeyError": ["LookupError"],
|
|
39
|
+
"LookupError": ["Exception"],
|
|
40
|
+
"MemoryError": ["Exception"],
|
|
41
|
+
"ModuleNotFoundError": ["ImportError"],
|
|
42
|
+
"NameError": ["Exception"],
|
|
43
|
+
"NotADirectoryError": ["OSError"],
|
|
44
|
+
"NotImplementedError": ["RuntimeError"],
|
|
45
|
+
"OSError": ["Exception"],
|
|
46
|
+
"OverflowError": ["ArithmeticError"],
|
|
47
|
+
"PermissionError": ["OSError"],
|
|
48
|
+
"ProcessLookupError": ["OSError"],
|
|
49
|
+
"RecursionError": ["RuntimeError"],
|
|
50
|
+
"ReferenceError": ["Exception"],
|
|
51
|
+
"RuntimeError": ["Exception"],
|
|
52
|
+
"StopAsyncIteration": ["Exception"],
|
|
53
|
+
"StopIteration": ["Exception"],
|
|
54
|
+
"SyntaxError": ["Exception"],
|
|
55
|
+
"SystemError": ["Exception"],
|
|
56
|
+
"SystemExit": ["BaseException"],
|
|
57
|
+
"TabError": ["IndentationError"],
|
|
58
|
+
"TimeoutError": ["OSError"],
|
|
59
|
+
"TypeError": ["Exception"],
|
|
60
|
+
"UnboundLocalError": ["NameError"],
|
|
61
|
+
"UnicodeDecodeError": ["UnicodeError"],
|
|
62
|
+
"UnicodeEncodeError": ["UnicodeError"],
|
|
63
|
+
"UnicodeError": ["ValueError"],
|
|
64
|
+
"UnicodeTranslateError": ["UnicodeError"],
|
|
65
|
+
"ValueError": ["Exception"],
|
|
66
|
+
"ZeroDivisionError": ["ArithmeticError"],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
__all__ = [
|
|
70
|
+
"FunctionDef",
|
|
71
|
+
"ClassDef",
|
|
72
|
+
"RaiseSite",
|
|
73
|
+
"CatchSite",
|
|
74
|
+
"ResolutionKind",
|
|
75
|
+
"CallSite",
|
|
76
|
+
"ResolutionEdge",
|
|
77
|
+
"ExceptionEvidence",
|
|
78
|
+
"compute_confidence",
|
|
79
|
+
"Entrypoint",
|
|
80
|
+
"EntrypointKind",
|
|
81
|
+
"GlobalHandler",
|
|
82
|
+
"DependencyEdge",
|
|
83
|
+
"ImportInfo",
|
|
84
|
+
"ClassHierarchy",
|
|
85
|
+
"ExceptionHierarchy",
|
|
86
|
+
"ProgramModel",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class FunctionDef:
|
|
92
|
+
"""A function or method definition."""
|
|
93
|
+
|
|
94
|
+
name: str
|
|
95
|
+
qualified_name: str
|
|
96
|
+
file: str
|
|
97
|
+
line: int
|
|
98
|
+
is_method: bool
|
|
99
|
+
is_async: bool
|
|
100
|
+
class_name: str | None = None
|
|
101
|
+
return_type: str | None = None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class ClassDef:
|
|
106
|
+
"""A class definition."""
|
|
107
|
+
|
|
108
|
+
name: str
|
|
109
|
+
qualified_name: str
|
|
110
|
+
file: str
|
|
111
|
+
line: int
|
|
112
|
+
bases: list[str] = field(default_factory=list)
|
|
113
|
+
is_abstract: bool = False
|
|
114
|
+
abstract_methods: set[str] = field(default_factory=set)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class RaiseSite:
|
|
119
|
+
"""A location where an exception is raised."""
|
|
120
|
+
|
|
121
|
+
file: str
|
|
122
|
+
line: int
|
|
123
|
+
function: str
|
|
124
|
+
exception_type: str
|
|
125
|
+
is_bare_raise: bool
|
|
126
|
+
code: str
|
|
127
|
+
message_expr: str | None = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass
|
|
131
|
+
class CatchSite:
|
|
132
|
+
"""A location where exceptions are caught."""
|
|
133
|
+
|
|
134
|
+
file: str
|
|
135
|
+
line: int
|
|
136
|
+
function: str
|
|
137
|
+
caught_types: list[str]
|
|
138
|
+
has_bare_except: bool
|
|
139
|
+
has_reraise: bool
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class CallSite:
|
|
144
|
+
"""A location where a function is called."""
|
|
145
|
+
|
|
146
|
+
file: str
|
|
147
|
+
line: int
|
|
148
|
+
caller_function: str
|
|
149
|
+
callee_name: str
|
|
150
|
+
is_method_call: bool
|
|
151
|
+
caller_qualified: str | None = None
|
|
152
|
+
callee_qualified: str | None = None
|
|
153
|
+
resolution_kind: ResolutionKind = ResolutionKind.UNRESOLVED
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@dataclass
|
|
157
|
+
class ResolutionEdge:
|
|
158
|
+
"""An edge in the call path with resolution metadata."""
|
|
159
|
+
|
|
160
|
+
caller: str
|
|
161
|
+
callee: str
|
|
162
|
+
file: str
|
|
163
|
+
line: int
|
|
164
|
+
resolution_kind: ResolutionKind
|
|
165
|
+
is_heuristic: bool
|
|
166
|
+
match_count: int = 1
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@dataclass
|
|
170
|
+
class ExceptionEvidence:
|
|
171
|
+
"""Evidence for how an exception propagates to a function."""
|
|
172
|
+
|
|
173
|
+
raise_site: "RaiseSite"
|
|
174
|
+
call_path: list[ResolutionEdge]
|
|
175
|
+
confidence: ConfidenceLevel
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def compute_confidence(edges: list[ResolutionEdge]) -> ConfidenceLevel:
|
|
179
|
+
"""Compute confidence level based on resolution kinds in the path."""
|
|
180
|
+
if not edges:
|
|
181
|
+
return ConfidenceLevel.HIGH
|
|
182
|
+
|
|
183
|
+
has_ambiguous_fallback = any(
|
|
184
|
+
e.resolution_kind == ResolutionKind.NAME_FALLBACK and e.match_count > 1 for e in edges
|
|
185
|
+
)
|
|
186
|
+
if has_ambiguous_fallback:
|
|
187
|
+
return ConfidenceLevel.LOW
|
|
188
|
+
|
|
189
|
+
has_polymorphic = any(e.resolution_kind == ResolutionKind.POLYMORPHIC for e in edges)
|
|
190
|
+
if has_polymorphic:
|
|
191
|
+
return ConfidenceLevel.LOW
|
|
192
|
+
|
|
193
|
+
has_unambiguous_fallback = any(
|
|
194
|
+
e.resolution_kind == ResolutionKind.NAME_FALLBACK and e.match_count == 1 for e in edges
|
|
195
|
+
)
|
|
196
|
+
if has_unambiguous_fallback:
|
|
197
|
+
return ConfidenceLevel.MEDIUM
|
|
198
|
+
|
|
199
|
+
if any(e.resolution_kind == ResolutionKind.RETURN_TYPE for e in edges):
|
|
200
|
+
return ConfidenceLevel.MEDIUM
|
|
201
|
+
|
|
202
|
+
return ConfidenceLevel.HIGH
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@dataclass
|
|
206
|
+
class DependencyEdge:
|
|
207
|
+
"""An implicit dependency (e.g., FastAPI Depends)."""
|
|
208
|
+
|
|
209
|
+
dependent_file: str
|
|
210
|
+
dependent_function: str
|
|
211
|
+
dependency_name: str
|
|
212
|
+
kind: str
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
@dataclass
|
|
216
|
+
class ImportInfo:
|
|
217
|
+
"""An import statement in a module."""
|
|
218
|
+
|
|
219
|
+
file: str
|
|
220
|
+
module: str
|
|
221
|
+
name: str
|
|
222
|
+
alias: str | None = None
|
|
223
|
+
is_from_import: bool = False
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@dataclass
|
|
227
|
+
class ClassHierarchy:
|
|
228
|
+
"""Complete class hierarchy with inheritance relationships."""
|
|
229
|
+
|
|
230
|
+
classes: dict[str, ClassDef] = field(default_factory=dict)
|
|
231
|
+
parent_map: dict[str, list[str]] = field(default_factory=dict)
|
|
232
|
+
child_map: dict[str, list[str]] = field(default_factory=dict)
|
|
233
|
+
_subclass_cache: dict[tuple[str, str], bool] = field(default_factory=dict, repr=False)
|
|
234
|
+
|
|
235
|
+
def __post_init__(self) -> None:
|
|
236
|
+
"""Bootstrap with built-in Python exceptions."""
|
|
237
|
+
self._bootstrap_builtins()
|
|
238
|
+
|
|
239
|
+
def _bootstrap_builtins(self) -> None:
|
|
240
|
+
"""Add built-in Python exception hierarchy."""
|
|
241
|
+
for exc_name, parents in BUILTIN_EXCEPTION_HIERARCHY.items():
|
|
242
|
+
self.parent_map[exc_name] = parents
|
|
243
|
+
for parent in parents:
|
|
244
|
+
if parent not in self.child_map:
|
|
245
|
+
self.child_map[parent] = []
|
|
246
|
+
if exc_name not in self.child_map[parent]:
|
|
247
|
+
self.child_map[parent].append(exc_name)
|
|
248
|
+
|
|
249
|
+
def add_class(self, cls: ClassDef) -> None:
|
|
250
|
+
"""Add a class to the hierarchy."""
|
|
251
|
+
self.classes[cls.name] = cls
|
|
252
|
+
self.parent_map[cls.name] = cls.bases
|
|
253
|
+
|
|
254
|
+
for base in cls.bases:
|
|
255
|
+
base_simple = base.split(".")[-1]
|
|
256
|
+
if base_simple not in self.child_map:
|
|
257
|
+
self.child_map[base_simple] = []
|
|
258
|
+
if cls.name not in self.child_map[base_simple]:
|
|
259
|
+
self.child_map[base_simple].append(cls.name)
|
|
260
|
+
|
|
261
|
+
self._subclass_cache.clear()
|
|
262
|
+
|
|
263
|
+
def get_all_subclasses(self, class_name: str) -> set[str]:
|
|
264
|
+
"""Get all subclasses of a class (direct and indirect)."""
|
|
265
|
+
result: set[str] = set()
|
|
266
|
+
to_visit = [class_name]
|
|
267
|
+
|
|
268
|
+
while to_visit:
|
|
269
|
+
current = to_visit.pop()
|
|
270
|
+
for child in self.child_map.get(current, []):
|
|
271
|
+
if child not in result:
|
|
272
|
+
result.add(child)
|
|
273
|
+
to_visit.append(child)
|
|
274
|
+
|
|
275
|
+
return result
|
|
276
|
+
|
|
277
|
+
def get_subclasses(self, class_name: str) -> set[str]:
|
|
278
|
+
"""Alias for get_all_subclasses for backwards compatibility."""
|
|
279
|
+
return self.get_all_subclasses(class_name)
|
|
280
|
+
|
|
281
|
+
def is_subclass_of(self, child: str, parent: str) -> bool:
|
|
282
|
+
"""Check if child is a subclass of parent."""
|
|
283
|
+
import time
|
|
284
|
+
|
|
285
|
+
from bubble import timing
|
|
286
|
+
|
|
287
|
+
if child == parent:
|
|
288
|
+
return True
|
|
289
|
+
|
|
290
|
+
cache_key = (child, parent)
|
|
291
|
+
if cache_key in self._subclass_cache:
|
|
292
|
+
if timing.is_enabled():
|
|
293
|
+
timing.record("hierarchy_cache_hit", 0)
|
|
294
|
+
return self._subclass_cache[cache_key]
|
|
295
|
+
|
|
296
|
+
start = time.perf_counter()
|
|
297
|
+
|
|
298
|
+
visited: set[str] = set()
|
|
299
|
+
to_check = [child]
|
|
300
|
+
result = False
|
|
301
|
+
|
|
302
|
+
while to_check:
|
|
303
|
+
current = to_check.pop()
|
|
304
|
+
if current in visited:
|
|
305
|
+
continue
|
|
306
|
+
visited.add(current)
|
|
307
|
+
|
|
308
|
+
parents = self.parent_map.get(current, [])
|
|
309
|
+
for p in parents:
|
|
310
|
+
p_simple = p.split(".")[-1]
|
|
311
|
+
if p_simple == parent or p == parent:
|
|
312
|
+
result = True
|
|
313
|
+
break
|
|
314
|
+
if result:
|
|
315
|
+
break
|
|
316
|
+
to_check.extend(p.split(".")[-1] for p in parents)
|
|
317
|
+
|
|
318
|
+
self._subclass_cache[cache_key] = result
|
|
319
|
+
|
|
320
|
+
if timing.is_enabled():
|
|
321
|
+
timing.record("hierarchy_lookup", time.perf_counter() - start)
|
|
322
|
+
|
|
323
|
+
return result
|
|
324
|
+
|
|
325
|
+
def is_abstract_method(self, class_name: str, method_name: str) -> bool:
|
|
326
|
+
"""Check if a method is abstract on a class."""
|
|
327
|
+
cls = self.classes.get(class_name)
|
|
328
|
+
if cls:
|
|
329
|
+
return method_name in cls.abstract_methods
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
def get_concrete_implementations(
|
|
333
|
+
self, base_class: str, method_name: str
|
|
334
|
+
) -> list[tuple[str, ClassDef]]:
|
|
335
|
+
"""Get all concrete implementations of an abstract method.
|
|
336
|
+
|
|
337
|
+
Returns list of (class_name, class_def) tuples.
|
|
338
|
+
"""
|
|
339
|
+
if not self.is_abstract_method(base_class, method_name):
|
|
340
|
+
return []
|
|
341
|
+
|
|
342
|
+
implementations: list[tuple[str, ClassDef]] = []
|
|
343
|
+
subclasses = self.get_all_subclasses(base_class)
|
|
344
|
+
|
|
345
|
+
for subclass_name in subclasses:
|
|
346
|
+
cls = self.classes.get(subclass_name)
|
|
347
|
+
if cls and method_name not in cls.abstract_methods:
|
|
348
|
+
implementations.append((subclass_name, cls))
|
|
349
|
+
|
|
350
|
+
return implementations
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
ExceptionHierarchy = ClassHierarchy
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@dataclass
|
|
357
|
+
class ProgramModel:
|
|
358
|
+
"""The complete model of a codebase for analysis."""
|
|
359
|
+
|
|
360
|
+
functions: dict[str, FunctionDef] = field(default_factory=dict)
|
|
361
|
+
classes: dict[str, ClassDef] = field(default_factory=dict)
|
|
362
|
+
raise_sites: list[RaiseSite] = field(default_factory=list)
|
|
363
|
+
catch_sites: list[CatchSite] = field(default_factory=list)
|
|
364
|
+
call_sites: list[CallSite] = field(default_factory=list)
|
|
365
|
+
entrypoints: list[Entrypoint] = field(default_factory=list)
|
|
366
|
+
global_handlers: list[GlobalHandler] = field(default_factory=list)
|
|
367
|
+
exception_hierarchy: ExceptionHierarchy = field(default_factory=ExceptionHierarchy)
|
|
368
|
+
imports: list[ImportInfo] = field(default_factory=list)
|
|
369
|
+
import_maps: dict[str, dict[str, str]] = field(default_factory=dict)
|
|
370
|
+
return_types: dict[str, str] = field(default_factory=dict)
|
|
371
|
+
detected_frameworks: set[str] = field(default_factory=set)
|
|
372
|
+
|
|
373
|
+
def get_function_by_name(self, name: str, file: str | None = None) -> FunctionDef | None:
|
|
374
|
+
"""Find a function by name, optionally scoped to a file."""
|
|
375
|
+
for key, func in self.functions.items():
|
|
376
|
+
if file and not key.startswith(file):
|
|
377
|
+
continue
|
|
378
|
+
if func.name == name or func.qualified_name == name:
|
|
379
|
+
return func
|
|
380
|
+
return None
|
|
381
|
+
|
|
382
|
+
def get_callers(self, function_name: str) -> list[CallSite]:
|
|
383
|
+
"""Get all call sites that call a function by name."""
|
|
384
|
+
return [c for c in self.call_sites if c.callee_name == function_name]
|
|
385
|
+
|
|
386
|
+
def get_callers_qualified(self, qualified_name: str) -> list[CallSite]:
|
|
387
|
+
"""Get all call sites that call a function by qualified name."""
|
|
388
|
+
return [c for c in self.call_sites if c.callee_qualified == qualified_name]
|
|
389
|
+
|
|
390
|
+
def resolve_name(self, name: str, file: str) -> str | None:
|
|
391
|
+
"""Resolve a name to its qualified form using the file's import map."""
|
|
392
|
+
import_map = self.import_maps.get(file, {})
|
|
393
|
+
return import_map.get(name)
|
|
394
|
+
|
|
395
|
+
def get_return_type(self, qualified_name: str) -> str | None:
|
|
396
|
+
"""Get the return type of a function by its qualified name."""
|
|
397
|
+
return self.return_types.get(qualified_name)
|