bubble-analysis 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bubble/__init__.py +3 -0
- bubble/cache.py +207 -0
- bubble/cli.py +470 -0
- bubble/config.py +52 -0
- bubble/detectors.py +90 -0
- bubble/enums.py +65 -0
- bubble/extractor.py +829 -0
- bubble/formatters.py +887 -0
- bubble/integrations/__init__.py +92 -0
- bubble/integrations/base.py +98 -0
- bubble/integrations/cli_scripts/__init__.py +49 -0
- bubble/integrations/cli_scripts/cli.py +108 -0
- bubble/integrations/cli_scripts/detector.py +149 -0
- bubble/integrations/django/__init__.py +63 -0
- bubble/integrations/django/cli.py +111 -0
- bubble/integrations/django/detector.py +331 -0
- bubble/integrations/django/semantics.py +40 -0
- bubble/integrations/fastapi/__init__.py +57 -0
- bubble/integrations/fastapi/cli.py +110 -0
- bubble/integrations/fastapi/detector.py +176 -0
- bubble/integrations/fastapi/semantics.py +14 -0
- bubble/integrations/flask/__init__.py +57 -0
- bubble/integrations/flask/cli.py +110 -0
- bubble/integrations/flask/detector.py +191 -0
- bubble/integrations/flask/semantics.py +19 -0
- bubble/integrations/formatters.py +268 -0
- bubble/integrations/generic/__init__.py +13 -0
- bubble/integrations/generic/config.py +106 -0
- bubble/integrations/generic/detector.py +346 -0
- bubble/integrations/generic/frameworks.py +145 -0
- bubble/integrations/models.py +68 -0
- bubble/integrations/queries.py +481 -0
- bubble/loader.py +118 -0
- bubble/models.py +397 -0
- bubble/propagation.py +737 -0
- bubble/protocols.py +104 -0
- bubble/queries.py +627 -0
- bubble/results.py +211 -0
- bubble/stubs.py +89 -0
- bubble/timing.py +144 -0
- bubble_analysis-0.2.0.dist-info/METADATA +264 -0
- bubble_analysis-0.2.0.dist-info/RECORD +46 -0
- bubble_analysis-0.2.0.dist-info/WHEEL +5 -0
- bubble_analysis-0.2.0.dist-info/entry_points.txt +2 -0
- bubble_analysis-0.2.0.dist-info/licenses/LICENSE +21 -0
- bubble_analysis-0.2.0.dist-info/top_level.txt +1 -0
bubble/extractor.py
ADDED
|
@@ -0,0 +1,829 @@
|
|
|
1
|
+
"""Extract structural information from Python source files using libcst."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
import libcst as cst
|
|
10
|
+
from libcst.metadata import MetadataWrapper, PositionProvider
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
from bubble.detectors import detect_entrypoints, detect_global_handlers
|
|
16
|
+
from bubble.enums import ResolutionKind
|
|
17
|
+
from bubble.loader import load_detectors
|
|
18
|
+
from bubble.models import (
|
|
19
|
+
CallSite,
|
|
20
|
+
CatchSite,
|
|
21
|
+
ClassDef,
|
|
22
|
+
Entrypoint,
|
|
23
|
+
FunctionDef,
|
|
24
|
+
GlobalHandler,
|
|
25
|
+
ImportInfo,
|
|
26
|
+
ProgramModel,
|
|
27
|
+
RaiseSite,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CodeExtractor(cst.CSTVisitor):
|
|
32
|
+
"""Extracts structural information from a Python module."""
|
|
33
|
+
|
|
34
|
+
METADATA_DEPENDENCIES = (PositionProvider,)
|
|
35
|
+
|
|
36
|
+
def __init__(self, file_path: str, relative_path: str | None = None) -> None:
|
|
37
|
+
self.file_path = file_path
|
|
38
|
+
self.relative_path = relative_path or file_path
|
|
39
|
+
self.functions: list[FunctionDef] = []
|
|
40
|
+
self.classes: list[ClassDef] = []
|
|
41
|
+
self.raise_sites: list[RaiseSite] = []
|
|
42
|
+
self.catch_sites: list[CatchSite] = []
|
|
43
|
+
self.call_sites: list[CallSite] = []
|
|
44
|
+
self.imports: list[ImportInfo] = []
|
|
45
|
+
self.import_map: dict[str, str] = {}
|
|
46
|
+
self.return_types: dict[str, str] = {}
|
|
47
|
+
self.detected_frameworks: set[str] = set()
|
|
48
|
+
|
|
49
|
+
self._class_stack: list[str] = []
|
|
50
|
+
self._function_stack: list[str] = []
|
|
51
|
+
self._local_types: dict[str, str] = {}
|
|
52
|
+
self._abstract_methods: dict[str, set[str]] = {}
|
|
53
|
+
self._class_bases: dict[str, list[str]] = {}
|
|
54
|
+
|
|
55
|
+
def visit_Import(self, node: cst.Import) -> bool:
|
|
56
|
+
for name in node.names if isinstance(node.names, tuple) else []:
|
|
57
|
+
if isinstance(name, cst.ImportAlias):
|
|
58
|
+
module_name = self._get_name_from_expr(name.name)
|
|
59
|
+
alias = (
|
|
60
|
+
name.asname.name.value
|
|
61
|
+
if name.asname and isinstance(name.asname.name, cst.Name)
|
|
62
|
+
else None
|
|
63
|
+
)
|
|
64
|
+
self.imports.append(
|
|
65
|
+
ImportInfo(
|
|
66
|
+
file=self.file_path,
|
|
67
|
+
module=module_name,
|
|
68
|
+
name=module_name,
|
|
69
|
+
alias=alias,
|
|
70
|
+
is_from_import=False,
|
|
71
|
+
)
|
|
72
|
+
)
|
|
73
|
+
local_name = alias or module_name.split(".")[0]
|
|
74
|
+
self.import_map[local_name] = module_name
|
|
75
|
+
self._detect_framework(module_name)
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
def visit_ImportFrom(self, node: cst.ImportFrom) -> bool:
|
|
79
|
+
if node.module is None:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
module_name = self._get_name_from_expr(node.module)
|
|
83
|
+
self._detect_framework(module_name)
|
|
84
|
+
|
|
85
|
+
if isinstance(node.names, cst.ImportStar):
|
|
86
|
+
self.imports.append(
|
|
87
|
+
ImportInfo(
|
|
88
|
+
file=self.file_path,
|
|
89
|
+
module=module_name,
|
|
90
|
+
name="*",
|
|
91
|
+
alias=None,
|
|
92
|
+
is_from_import=True,
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
elif isinstance(node.names, tuple):
|
|
96
|
+
for name in node.names:
|
|
97
|
+
if isinstance(name, cst.ImportAlias):
|
|
98
|
+
imported_name = self._get_name_from_expr(name.name)
|
|
99
|
+
alias = (
|
|
100
|
+
name.asname.name.value
|
|
101
|
+
if name.asname and isinstance(name.asname.name, cst.Name)
|
|
102
|
+
else None
|
|
103
|
+
)
|
|
104
|
+
self.imports.append(
|
|
105
|
+
ImportInfo(
|
|
106
|
+
file=self.file_path,
|
|
107
|
+
module=module_name,
|
|
108
|
+
name=imported_name,
|
|
109
|
+
alias=alias,
|
|
110
|
+
is_from_import=True,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
local_name = alias or imported_name
|
|
114
|
+
self.import_map[local_name] = f"{module_name}.{imported_name}"
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
def visit_ClassDef(self, node: cst.ClassDef) -> bool:
|
|
118
|
+
class_name = node.name.value
|
|
119
|
+
|
|
120
|
+
bases: list[str] = []
|
|
121
|
+
for arg in node.bases:
|
|
122
|
+
base_name = self._get_name_from_expr(arg.value)
|
|
123
|
+
if base_name:
|
|
124
|
+
bases.append(base_name)
|
|
125
|
+
|
|
126
|
+
self._class_stack.append(class_name)
|
|
127
|
+
self._abstract_methods[class_name] = set()
|
|
128
|
+
self._class_bases[class_name] = bases
|
|
129
|
+
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
def leave_ClassDef(self, node: cst.ClassDef) -> None:
|
|
133
|
+
class_name = self._class_stack.pop()
|
|
134
|
+
pos = self.get_metadata(PositionProvider, node)
|
|
135
|
+
|
|
136
|
+
bases = self._class_bases.get(class_name, [])
|
|
137
|
+
abstract_methods = self._abstract_methods.get(class_name, set())
|
|
138
|
+
|
|
139
|
+
is_abstract = len(abstract_methods) > 0 or "ABC" in bases or "abc.ABC" in bases
|
|
140
|
+
|
|
141
|
+
qualified_name = (
|
|
142
|
+
".".join(self._class_stack + [class_name]) if self._class_stack else class_name
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
self.classes.append(
|
|
146
|
+
ClassDef(
|
|
147
|
+
name=class_name,
|
|
148
|
+
qualified_name=qualified_name,
|
|
149
|
+
file=self.file_path,
|
|
150
|
+
line=pos.start.line,
|
|
151
|
+
bases=bases,
|
|
152
|
+
is_abstract=is_abstract,
|
|
153
|
+
abstract_methods=abstract_methods,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def visit_FunctionDef(self, node: cst.FunctionDef) -> bool:
|
|
158
|
+
pos = self.get_metadata(PositionProvider, node)
|
|
159
|
+
func_name = node.name.value
|
|
160
|
+
|
|
161
|
+
is_method = len(self._class_stack) > 0
|
|
162
|
+
class_name = self._class_stack[-1] if is_method else None
|
|
163
|
+
|
|
164
|
+
if is_method:
|
|
165
|
+
qualified_name = ".".join(self._class_stack + [func_name])
|
|
166
|
+
else:
|
|
167
|
+
qualified_name = func_name
|
|
168
|
+
|
|
169
|
+
is_async = isinstance(node.asynchronous, cst.Asynchronous)
|
|
170
|
+
|
|
171
|
+
return_type: str | None = None
|
|
172
|
+
if node.returns is not None:
|
|
173
|
+
return_type = self._get_name_from_expr(node.returns.annotation)
|
|
174
|
+
if return_type:
|
|
175
|
+
full_qualified = f"{self.relative_path}::{qualified_name}"
|
|
176
|
+
self.return_types[full_qualified] = return_type
|
|
177
|
+
|
|
178
|
+
if is_method and class_name:
|
|
179
|
+
is_abstract = self._is_abstract_method(node)
|
|
180
|
+
if is_abstract:
|
|
181
|
+
self._abstract_methods[class_name].add(func_name)
|
|
182
|
+
|
|
183
|
+
self.functions.append(
|
|
184
|
+
FunctionDef(
|
|
185
|
+
name=func_name,
|
|
186
|
+
qualified_name=qualified_name,
|
|
187
|
+
file=self.file_path,
|
|
188
|
+
line=pos.start.line,
|
|
189
|
+
is_method=is_method,
|
|
190
|
+
is_async=is_async,
|
|
191
|
+
class_name=class_name,
|
|
192
|
+
return_type=return_type,
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
caller_qualified = f"{self.relative_path}::{qualified_name}"
|
|
197
|
+
self._extract_depends_calls(node.params, func_name, caller_qualified, pos.start.line)
|
|
198
|
+
|
|
199
|
+
self._function_stack.append(func_name)
|
|
200
|
+
self._local_types.clear()
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
def _is_abstract_method(self, node: cst.FunctionDef) -> bool:
|
|
204
|
+
"""Check if a method is abstract."""
|
|
205
|
+
if self._has_abstractmethod_decorator(node):
|
|
206
|
+
return True
|
|
207
|
+
|
|
208
|
+
if self._is_raise_not_implemented(node.body):
|
|
209
|
+
return True
|
|
210
|
+
|
|
211
|
+
if self._is_pass_or_ellipsis(node.body):
|
|
212
|
+
return True
|
|
213
|
+
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
def _has_abstractmethod_decorator(self, node: cst.FunctionDef) -> bool:
|
|
217
|
+
"""Check for @abstractmethod or @abc.abstractmethod decorator."""
|
|
218
|
+
for decorator in node.decorators:
|
|
219
|
+
if isinstance(decorator.decorator, cst.Name):
|
|
220
|
+
if decorator.decorator.value == "abstractmethod":
|
|
221
|
+
return True
|
|
222
|
+
elif isinstance(decorator.decorator, cst.Attribute):
|
|
223
|
+
if decorator.decorator.attr.value == "abstractmethod":
|
|
224
|
+
return True
|
|
225
|
+
return False
|
|
226
|
+
|
|
227
|
+
def _is_raise_not_implemented(self, body: cst.BaseSuite) -> bool:
|
|
228
|
+
"""Check if method body ends with 'raise NotImplementedError'.
|
|
229
|
+
|
|
230
|
+
Allows a docstring before the raise statement.
|
|
231
|
+
"""
|
|
232
|
+
if not isinstance(body, cst.IndentedBlock):
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
stmts = [s for s in body.body if not isinstance(s, cst.EmptyLine)]
|
|
236
|
+
if not stmts:
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
last_stmt = stmts[-1]
|
|
240
|
+
if not isinstance(last_stmt, cst.SimpleStatementLine):
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
if len(last_stmt.body) != 1:
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
inner = last_stmt.body[0]
|
|
247
|
+
if not isinstance(inner, cst.Raise):
|
|
248
|
+
return False
|
|
249
|
+
|
|
250
|
+
if inner.exc is None:
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
exc_name = None
|
|
254
|
+
if isinstance(inner.exc, cst.Name):
|
|
255
|
+
exc_name = inner.exc.value
|
|
256
|
+
elif isinstance(inner.exc, cst.Call):
|
|
257
|
+
if isinstance(inner.exc.func, cst.Name):
|
|
258
|
+
exc_name = inner.exc.func.value
|
|
259
|
+
|
|
260
|
+
return exc_name == "NotImplementedError"
|
|
261
|
+
|
|
262
|
+
def _is_pass_or_ellipsis(self, body: cst.BaseSuite) -> bool:
|
|
263
|
+
"""Check if method body is just 'pass' or '...'."""
|
|
264
|
+
if not isinstance(body, cst.IndentedBlock):
|
|
265
|
+
return False
|
|
266
|
+
|
|
267
|
+
stmts = [s for s in body.body if not isinstance(s, cst.EmptyLine)]
|
|
268
|
+
if len(stmts) != 1:
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
stmt = stmts[0]
|
|
272
|
+
if not isinstance(stmt, cst.SimpleStatementLine):
|
|
273
|
+
return False
|
|
274
|
+
|
|
275
|
+
if len(stmt.body) != 1:
|
|
276
|
+
return False
|
|
277
|
+
|
|
278
|
+
inner = stmt.body[0]
|
|
279
|
+
|
|
280
|
+
if isinstance(inner, cst.Pass):
|
|
281
|
+
return True
|
|
282
|
+
|
|
283
|
+
if isinstance(inner, cst.Expr) and isinstance(inner.value, cst.Ellipsis):
|
|
284
|
+
return True
|
|
285
|
+
|
|
286
|
+
return False
|
|
287
|
+
|
|
288
|
+
def leave_FunctionDef(self, node: cst.FunctionDef) -> None:
|
|
289
|
+
self._function_stack.pop()
|
|
290
|
+
self._local_types.clear()
|
|
291
|
+
|
|
292
|
+
def visit_Raise(self, node: cst.Raise) -> bool:
|
|
293
|
+
pos = self.get_metadata(PositionProvider, node)
|
|
294
|
+
|
|
295
|
+
if self._function_stack:
|
|
296
|
+
if self._class_stack:
|
|
297
|
+
qualified_function = ".".join(self._class_stack + [self._function_stack[-1]])
|
|
298
|
+
else:
|
|
299
|
+
qualified_function = self._function_stack[-1]
|
|
300
|
+
else:
|
|
301
|
+
qualified_function = "<module>"
|
|
302
|
+
|
|
303
|
+
is_bare_raise = node.exc is None
|
|
304
|
+
|
|
305
|
+
exception_type = "Unknown"
|
|
306
|
+
message_expr: str | None = None
|
|
307
|
+
code = ""
|
|
308
|
+
|
|
309
|
+
if node.exc is not None:
|
|
310
|
+
code = cst.parse_module("").code_for_node(node)
|
|
311
|
+
|
|
312
|
+
if isinstance(node.exc, cst.Call):
|
|
313
|
+
exception_type = self._get_name_from_expr(node.exc.func)
|
|
314
|
+
if node.exc.args:
|
|
315
|
+
first_arg = node.exc.args[0].value
|
|
316
|
+
if isinstance(
|
|
317
|
+
first_arg, cst.SimpleString | cst.FormattedString | cst.ConcatenatedString
|
|
318
|
+
):
|
|
319
|
+
message_expr = cst.parse_module("").code_for_node(first_arg)
|
|
320
|
+
elif isinstance(node.exc, cst.Name):
|
|
321
|
+
exception_type = node.exc.value
|
|
322
|
+
|
|
323
|
+
self.raise_sites.append(
|
|
324
|
+
RaiseSite(
|
|
325
|
+
file=self.relative_path,
|
|
326
|
+
line=pos.start.line,
|
|
327
|
+
function=qualified_function,
|
|
328
|
+
exception_type=exception_type,
|
|
329
|
+
is_bare_raise=is_bare_raise,
|
|
330
|
+
code=code.strip(),
|
|
331
|
+
message_expr=message_expr,
|
|
332
|
+
)
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
return True
|
|
336
|
+
|
|
337
|
+
def visit_Try(self, node: cst.Try) -> bool:
|
|
338
|
+
self.get_metadata(PositionProvider, node)
|
|
339
|
+
|
|
340
|
+
if self._function_stack:
|
|
341
|
+
if self._class_stack:
|
|
342
|
+
qualified_function = ".".join(self._class_stack + [self._function_stack[-1]])
|
|
343
|
+
else:
|
|
344
|
+
qualified_function = self._function_stack[-1]
|
|
345
|
+
else:
|
|
346
|
+
qualified_function = "<module>"
|
|
347
|
+
|
|
348
|
+
for handler in node.handlers:
|
|
349
|
+
caught_types: list[str] = []
|
|
350
|
+
has_bare_except = False
|
|
351
|
+
|
|
352
|
+
if handler.type is None:
|
|
353
|
+
has_bare_except = True
|
|
354
|
+
elif isinstance(handler.type, cst.Tuple):
|
|
355
|
+
for el in handler.type.elements:
|
|
356
|
+
if isinstance(el.value, cst.Name | cst.Attribute):
|
|
357
|
+
name = self._get_name_from_expr(el.value)
|
|
358
|
+
if name:
|
|
359
|
+
caught_types.append(name)
|
|
360
|
+
else:
|
|
361
|
+
name = self._get_name_from_expr(handler.type)
|
|
362
|
+
if name:
|
|
363
|
+
caught_types.append(name)
|
|
364
|
+
|
|
365
|
+
has_reraise = self._block_has_reraise(handler.body)
|
|
366
|
+
|
|
367
|
+
handler_pos = self.get_metadata(PositionProvider, handler)
|
|
368
|
+
|
|
369
|
+
self.catch_sites.append(
|
|
370
|
+
CatchSite(
|
|
371
|
+
file=self.relative_path,
|
|
372
|
+
line=handler_pos.start.line,
|
|
373
|
+
function=qualified_function,
|
|
374
|
+
caught_types=caught_types,
|
|
375
|
+
has_bare_except=has_bare_except,
|
|
376
|
+
has_reraise=has_reraise,
|
|
377
|
+
)
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
return True
|
|
381
|
+
|
|
382
|
+
def visit_Call(self, node: cst.Call) -> bool:
|
|
383
|
+
pos = self.get_metadata(PositionProvider, node)
|
|
384
|
+
current_function = self._function_stack[-1] if self._function_stack else "<module>"
|
|
385
|
+
|
|
386
|
+
caller_qualified = self._get_current_qualified_name()
|
|
387
|
+
|
|
388
|
+
callee_name: str
|
|
389
|
+
callee_qualified: str | None = None
|
|
390
|
+
resolution_kind: ResolutionKind = ResolutionKind.UNRESOLVED
|
|
391
|
+
is_method_call = False
|
|
392
|
+
|
|
393
|
+
if isinstance(node.func, cst.Attribute):
|
|
394
|
+
callee_name = node.func.attr.value
|
|
395
|
+
is_method_call = True
|
|
396
|
+
base_expr = node.func.value
|
|
397
|
+
|
|
398
|
+
if isinstance(base_expr, cst.Name):
|
|
399
|
+
base_name = base_expr.value
|
|
400
|
+
if base_name == "self" and self._class_stack:
|
|
401
|
+
callee_qualified = (
|
|
402
|
+
f"{self.relative_path}::{'.'.join(self._class_stack)}.{callee_name}"
|
|
403
|
+
)
|
|
404
|
+
resolution_kind = ResolutionKind.SELF
|
|
405
|
+
elif base_name in self._local_types:
|
|
406
|
+
type_name = self._local_types[base_name]
|
|
407
|
+
if type_name in self.import_map:
|
|
408
|
+
callee_qualified = f"{self.import_map[type_name]}.{callee_name}"
|
|
409
|
+
resolution_kind = ResolutionKind.CONSTRUCTOR
|
|
410
|
+
else:
|
|
411
|
+
callee_qualified = f"{self.relative_path}::{type_name}.{callee_name}"
|
|
412
|
+
resolution_kind = ResolutionKind.CONSTRUCTOR
|
|
413
|
+
elif base_name in self.import_map:
|
|
414
|
+
module_qualified = self.import_map[base_name]
|
|
415
|
+
callee_qualified = f"{module_qualified}.{callee_name}"
|
|
416
|
+
resolution_kind = ResolutionKind.MODULE_ATTRIBUTE
|
|
417
|
+
is_method_call = False
|
|
418
|
+
|
|
419
|
+
elif isinstance(node.func, cst.Name):
|
|
420
|
+
callee_name = node.func.value
|
|
421
|
+
if callee_name in self.import_map:
|
|
422
|
+
callee_qualified = self.import_map[callee_name]
|
|
423
|
+
resolution_kind = ResolutionKind.IMPORT
|
|
424
|
+
else:
|
|
425
|
+
return True
|
|
426
|
+
|
|
427
|
+
self.call_sites.append(
|
|
428
|
+
CallSite(
|
|
429
|
+
file=self.file_path,
|
|
430
|
+
line=pos.start.line,
|
|
431
|
+
caller_function=current_function,
|
|
432
|
+
callee_name=callee_name,
|
|
433
|
+
is_method_call=is_method_call,
|
|
434
|
+
caller_qualified=caller_qualified,
|
|
435
|
+
callee_qualified=callee_qualified,
|
|
436
|
+
resolution_kind=resolution_kind,
|
|
437
|
+
)
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
return True
|
|
441
|
+
|
|
442
|
+
def _get_current_qualified_name(self) -> str:
|
|
443
|
+
"""Get the fully qualified name of the current context."""
|
|
444
|
+
parts = [self.relative_path]
|
|
445
|
+
if self._class_stack:
|
|
446
|
+
parts.append(".".join(self._class_stack))
|
|
447
|
+
if self._function_stack:
|
|
448
|
+
parts.append(self._function_stack[-1])
|
|
449
|
+
return "::".join(parts) if len(parts) > 1 else parts[0]
|
|
450
|
+
|
|
451
|
+
def _extract_depends_calls(
|
|
452
|
+
self,
|
|
453
|
+
params: cst.Parameters,
|
|
454
|
+
caller_function: str,
|
|
455
|
+
caller_qualified: str,
|
|
456
|
+
line: int,
|
|
457
|
+
) -> None:
|
|
458
|
+
"""Extract FastAPI Depends() declarations from function parameters."""
|
|
459
|
+
all_params = list(params.params) + list(params.kwonly_params)
|
|
460
|
+
|
|
461
|
+
for param in all_params:
|
|
462
|
+
if param.default is None:
|
|
463
|
+
continue
|
|
464
|
+
|
|
465
|
+
dep_info = self._parse_depends(param.default)
|
|
466
|
+
if dep_info:
|
|
467
|
+
self.call_sites.append(
|
|
468
|
+
CallSite(
|
|
469
|
+
file=self.file_path,
|
|
470
|
+
line=line,
|
|
471
|
+
caller_function=caller_function,
|
|
472
|
+
callee_name=dep_info["name"],
|
|
473
|
+
is_method_call=False,
|
|
474
|
+
caller_qualified=caller_qualified,
|
|
475
|
+
callee_qualified=dep_info.get("qualified"),
|
|
476
|
+
resolution_kind=ResolutionKind.FASTAPI_DEPENDS,
|
|
477
|
+
)
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
def _parse_depends(self, node: cst.BaseExpression) -> dict[str, str | None] | None:
|
|
481
|
+
"""Parse Depends(func) and return dependency info."""
|
|
482
|
+
if not isinstance(node, cst.Call):
|
|
483
|
+
return None
|
|
484
|
+
|
|
485
|
+
func_name = self._get_name_from_expr(node.func)
|
|
486
|
+
if func_name not in ("Depends", "fastapi.Depends"):
|
|
487
|
+
return None
|
|
488
|
+
|
|
489
|
+
if not node.args:
|
|
490
|
+
return None
|
|
491
|
+
|
|
492
|
+
first_arg = node.args[0].value
|
|
493
|
+
dep_name = self._get_name_from_expr(first_arg)
|
|
494
|
+
if not dep_name:
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
qualified = self.import_map.get(dep_name)
|
|
498
|
+
|
|
499
|
+
return {
|
|
500
|
+
"name": dep_name,
|
|
501
|
+
"qualified": qualified,
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
def visit_Assign(self, node: cst.Assign) -> bool:
|
|
505
|
+
"""Track variable assignments for constructor resolution."""
|
|
506
|
+
if not isinstance(node.value, cst.Call):
|
|
507
|
+
return True
|
|
508
|
+
|
|
509
|
+
call = node.value
|
|
510
|
+
if not isinstance(call.func, cst.Name):
|
|
511
|
+
return True
|
|
512
|
+
|
|
513
|
+
type_name = call.func.value
|
|
514
|
+
|
|
515
|
+
for target in node.targets:
|
|
516
|
+
if isinstance(target.target, cst.Name):
|
|
517
|
+
var_name = target.target.value
|
|
518
|
+
self._local_types[var_name] = type_name
|
|
519
|
+
|
|
520
|
+
return True
|
|
521
|
+
|
|
522
|
+
def visit_AnnAssign(self, node: cst.AnnAssign) -> bool:
|
|
523
|
+
"""Track annotated assignments for type resolution."""
|
|
524
|
+
if node.target is None or not isinstance(node.target, cst.Name):
|
|
525
|
+
return True
|
|
526
|
+
|
|
527
|
+
var_name = node.target.value
|
|
528
|
+
|
|
529
|
+
if node.annotation and node.annotation.annotation:
|
|
530
|
+
type_name = self._get_name_from_expr(node.annotation.annotation)
|
|
531
|
+
if type_name:
|
|
532
|
+
self._local_types[var_name] = type_name
|
|
533
|
+
|
|
534
|
+
if node.value and isinstance(node.value, cst.Call):
|
|
535
|
+
call = node.value
|
|
536
|
+
if isinstance(call.func, cst.Name):
|
|
537
|
+
self._local_types[var_name] = call.func.value
|
|
538
|
+
|
|
539
|
+
return True
|
|
540
|
+
|
|
541
|
+
def _get_name_from_expr(self, expr: cst.BaseExpression) -> str:
|
|
542
|
+
"""Extract a name from an expression (handles Name and Attribute)."""
|
|
543
|
+
if isinstance(expr, cst.Name):
|
|
544
|
+
return expr.value
|
|
545
|
+
elif isinstance(expr, cst.Attribute):
|
|
546
|
+
base = self._get_name_from_expr(expr.value)
|
|
547
|
+
if base:
|
|
548
|
+
return f"{base}.{expr.attr.value}"
|
|
549
|
+
return expr.attr.value
|
|
550
|
+
return ""
|
|
551
|
+
|
|
552
|
+
def _detect_framework(self, module_name: str) -> None:
|
|
553
|
+
"""Detect frameworks from import module names."""
|
|
554
|
+
module_lower = module_name.lower()
|
|
555
|
+
if "flask" in module_lower:
|
|
556
|
+
self.detected_frameworks.add("flask")
|
|
557
|
+
elif "fastapi" in module_lower or "starlette" in module_lower:
|
|
558
|
+
self.detected_frameworks.add("fastapi")
|
|
559
|
+
elif "django" in module_lower or "rest_framework" in module_lower:
|
|
560
|
+
self.detected_frameworks.add("django")
|
|
561
|
+
|
|
562
|
+
def _block_has_reraise(self, body: cst.BaseSuite) -> bool:
|
|
563
|
+
"""Check if a block contains a raise statement (re-raise)."""
|
|
564
|
+
if isinstance(body, cst.IndentedBlock):
|
|
565
|
+
for stmt in body.body:
|
|
566
|
+
if isinstance(stmt, cst.SimpleStatementLine):
|
|
567
|
+
for s in stmt.body:
|
|
568
|
+
if isinstance(s, cst.Raise):
|
|
569
|
+
return True
|
|
570
|
+
return False
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
class FileExtraction:
|
|
574
|
+
"""Results from extracting a single file."""
|
|
575
|
+
|
|
576
|
+
def __init__(self) -> None:
|
|
577
|
+
self.functions: list[FunctionDef] = []
|
|
578
|
+
self.classes: list[ClassDef] = []
|
|
579
|
+
self.raise_sites: list[RaiseSite] = []
|
|
580
|
+
self.catch_sites: list[CatchSite] = []
|
|
581
|
+
self.call_sites: list[CallSite] = []
|
|
582
|
+
self.imports: list[ImportInfo] = []
|
|
583
|
+
self.entrypoints: list[Entrypoint] = []
|
|
584
|
+
self.global_handlers: list[GlobalHandler] = []
|
|
585
|
+
self.import_map: dict[str, str] = {}
|
|
586
|
+
self.return_types: dict[str, str] = {}
|
|
587
|
+
self.detected_frameworks: set[str] = set()
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def extract_from_file(file_path: Path, relative_path: str | None = None) -> FileExtraction:
|
|
591
|
+
"""Extract structural information from a single Python file."""
|
|
592
|
+
result = FileExtraction()
|
|
593
|
+
|
|
594
|
+
try:
|
|
595
|
+
source = file_path.read_text()
|
|
596
|
+
module = cst.parse_module(source)
|
|
597
|
+
except Exception:
|
|
598
|
+
return result
|
|
599
|
+
|
|
600
|
+
wrapper = MetadataWrapper(module)
|
|
601
|
+
extractor = CodeExtractor(str(file_path), relative_path)
|
|
602
|
+
|
|
603
|
+
try:
|
|
604
|
+
wrapper.visit(extractor)
|
|
605
|
+
except Exception:
|
|
606
|
+
return result
|
|
607
|
+
|
|
608
|
+
result.functions = extractor.functions
|
|
609
|
+
result.classes = extractor.classes
|
|
610
|
+
result.raise_sites = extractor.raise_sites
|
|
611
|
+
result.catch_sites = extractor.catch_sites
|
|
612
|
+
result.call_sites = extractor.call_sites
|
|
613
|
+
result.imports = extractor.imports
|
|
614
|
+
result.import_map = extractor.import_map
|
|
615
|
+
result.return_types = extractor.return_types
|
|
616
|
+
result.detected_frameworks = extractor.detected_frameworks
|
|
617
|
+
|
|
618
|
+
try:
|
|
619
|
+
result.entrypoints = detect_entrypoints(source, str(file_path))
|
|
620
|
+
except Exception:
|
|
621
|
+
pass
|
|
622
|
+
|
|
623
|
+
try:
|
|
624
|
+
result.global_handlers = detect_global_handlers(source, str(file_path))
|
|
625
|
+
except Exception:
|
|
626
|
+
pass
|
|
627
|
+
|
|
628
|
+
return result
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _should_exclude(path_str: str, exclude_dirs: Sequence[str]) -> bool:
|
|
632
|
+
"""Check if a path should be excluded based on directory names."""
|
|
633
|
+
parts = path_str.split("/")
|
|
634
|
+
for part in parts:
|
|
635
|
+
if part in exclude_dirs:
|
|
636
|
+
return True
|
|
637
|
+
if part.startswith(".") and part != ".":
|
|
638
|
+
return True
|
|
639
|
+
return False
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
DRF_HTTP_METHODS = {"get", "post", "put", "patch", "delete", "head", "options", "trace"}
|
|
643
|
+
DRF_ACTION_METHODS = {"list", "create", "retrieve", "update", "partial_update", "destroy"}
|
|
644
|
+
DRF_DISPATCH_METHODS = DRF_HTTP_METHODS | DRF_ACTION_METHODS
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def _extract_single_file_for_process(
|
|
648
|
+
file_path_str: str,
|
|
649
|
+
relative_path: str,
|
|
650
|
+
) -> tuple[str, FileExtraction]:
|
|
651
|
+
"""Extract from a single file without cache access.
|
|
652
|
+
|
|
653
|
+
This function is designed to be called from a ProcessPoolExecutor.
|
|
654
|
+
Cache lookups are done in the main process before dispatching.
|
|
655
|
+
"""
|
|
656
|
+
file_path = Path(file_path_str)
|
|
657
|
+
extraction = extract_from_file(file_path, relative_path)
|
|
658
|
+
return (relative_path, extraction)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _inject_drf_dispatch_calls(model: ProgramModel) -> None:
|
|
662
|
+
"""Inject synthetic call edges for Django/DRF class-based view dispatch.
|
|
663
|
+
|
|
664
|
+
When a DRF view class is detected as an entrypoint, this creates CallSite
|
|
665
|
+
entries from the view class to each HTTP method handler (get, post, etc.)
|
|
666
|
+
that exists on the class.
|
|
667
|
+
"""
|
|
668
|
+
drf_view_entrypoints = [
|
|
669
|
+
ep
|
|
670
|
+
for ep in model.entrypoints
|
|
671
|
+
if ep.metadata.get("framework") == "django" and ep.metadata.get("view_type") == "class"
|
|
672
|
+
]
|
|
673
|
+
|
|
674
|
+
for entrypoint in drf_view_entrypoints:
|
|
675
|
+
view_class = entrypoint.function
|
|
676
|
+
view_file = entrypoint.file
|
|
677
|
+
view_line = entrypoint.line
|
|
678
|
+
|
|
679
|
+
for _func_key, func_def in model.functions.items():
|
|
680
|
+
if not func_def.is_method:
|
|
681
|
+
continue
|
|
682
|
+
if func_def.class_name != view_class:
|
|
683
|
+
continue
|
|
684
|
+
if func_def.name not in DRF_DISPATCH_METHODS:
|
|
685
|
+
continue
|
|
686
|
+
|
|
687
|
+
relative_file = view_file
|
|
688
|
+
if "/" in relative_file or "\\" in relative_file:
|
|
689
|
+
pass
|
|
690
|
+
else:
|
|
691
|
+
for key in model.functions:
|
|
692
|
+
if view_class in key and func_def.name in key:
|
|
693
|
+
parts = key.split(":")
|
|
694
|
+
if parts:
|
|
695
|
+
relative_file = parts[0]
|
|
696
|
+
break
|
|
697
|
+
|
|
698
|
+
caller_qualified = f"{relative_file}::{view_class}"
|
|
699
|
+
callee_qualified = f"{relative_file}::{view_class}.{func_def.name}"
|
|
700
|
+
|
|
701
|
+
model.call_sites.append(
|
|
702
|
+
CallSite(
|
|
703
|
+
file=view_file,
|
|
704
|
+
line=view_line,
|
|
705
|
+
caller_function=view_class,
|
|
706
|
+
callee_name=func_def.name,
|
|
707
|
+
is_method_call=True,
|
|
708
|
+
caller_qualified=caller_qualified,
|
|
709
|
+
callee_qualified=callee_qualified,
|
|
710
|
+
resolution_kind=ResolutionKind.IMPLICIT_DISPATCH,
|
|
711
|
+
)
|
|
712
|
+
)
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
def extract_from_directory(
|
|
716
|
+
directory: Path,
|
|
717
|
+
exclude_dirs: Sequence[str] | None = None,
|
|
718
|
+
use_cache: bool = True,
|
|
719
|
+
) -> ProgramModel:
|
|
720
|
+
"""Extract structural information from all Python files in a directory."""
|
|
721
|
+
from bubble import timing
|
|
722
|
+
from bubble.cache import FileCache
|
|
723
|
+
|
|
724
|
+
if exclude_dirs is None:
|
|
725
|
+
exclude_dirs = [
|
|
726
|
+
"__pycache__",
|
|
727
|
+
".venv",
|
|
728
|
+
"venv",
|
|
729
|
+
"site-packages",
|
|
730
|
+
"node_modules",
|
|
731
|
+
".git",
|
|
732
|
+
"dist",
|
|
733
|
+
"build",
|
|
734
|
+
"tests",
|
|
735
|
+
"test",
|
|
736
|
+
]
|
|
737
|
+
|
|
738
|
+
model = ProgramModel()
|
|
739
|
+
|
|
740
|
+
custom_detectors = load_detectors(directory)
|
|
741
|
+
|
|
742
|
+
cache = None
|
|
743
|
+
if use_cache:
|
|
744
|
+
cache = FileCache(directory / ".flow")
|
|
745
|
+
|
|
746
|
+
with timing.timed("file_discovery"):
|
|
747
|
+
python_files = list(directory.rglob("*.py"))
|
|
748
|
+
|
|
749
|
+
work_items: list[tuple[Path, str]] = []
|
|
750
|
+
for file_path in python_files:
|
|
751
|
+
relative_path = file_path.relative_to(directory)
|
|
752
|
+
path_str = str(relative_path)
|
|
753
|
+
if not _should_exclude(path_str, exclude_dirs):
|
|
754
|
+
work_items.append((file_path, path_str))
|
|
755
|
+
|
|
756
|
+
extractions: list[tuple[str, FileExtraction]] = []
|
|
757
|
+
cache_misses: list[tuple[Path, str, FileExtraction]] = []
|
|
758
|
+
work_to_process: list[tuple[str, str]] = []
|
|
759
|
+
|
|
760
|
+
if cache:
|
|
761
|
+
for file_path, relative_path in work_items:
|
|
762
|
+
cached = cache.get(file_path)
|
|
763
|
+
if cached is not None:
|
|
764
|
+
extractions.append((relative_path, cached))
|
|
765
|
+
else:
|
|
766
|
+
work_to_process.append((str(file_path), relative_path))
|
|
767
|
+
else:
|
|
768
|
+
work_to_process = [(str(fp), rp) for fp, rp in work_items]
|
|
769
|
+
|
|
770
|
+
max_workers = min(32, (os.cpu_count() or 1) + 4)
|
|
771
|
+
|
|
772
|
+
with timing.timed("parallel_extraction"):
|
|
773
|
+
if work_to_process:
|
|
774
|
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
775
|
+
futures = {
|
|
776
|
+
executor.submit(_extract_single_file_for_process, fp_str, rp): (fp_str, rp)
|
|
777
|
+
for fp_str, rp in work_to_process
|
|
778
|
+
}
|
|
779
|
+
for future in as_completed(futures):
|
|
780
|
+
fp_str, path_str = futures[future]
|
|
781
|
+
result_path, extraction = future.result()
|
|
782
|
+
extractions.append((result_path, extraction))
|
|
783
|
+
cache_misses.append((Path(fp_str), path_str, extraction))
|
|
784
|
+
|
|
785
|
+
with timing.timed("cache_writes"):
|
|
786
|
+
if cache:
|
|
787
|
+
for file_path, _path_str, extraction in cache_misses:
|
|
788
|
+
cache.put(file_path, extraction)
|
|
789
|
+
|
|
790
|
+
with timing.timed("model_aggregation"):
|
|
791
|
+
for path_str, extraction in extractions:
|
|
792
|
+
for func in extraction.functions:
|
|
793
|
+
key = f"{path_str}:{func.qualified_name}"
|
|
794
|
+
model.functions[key] = func
|
|
795
|
+
|
|
796
|
+
for cls in extraction.classes:
|
|
797
|
+
key = f"{path_str}:{cls.qualified_name}"
|
|
798
|
+
model.classes[key] = cls
|
|
799
|
+
model.exception_hierarchy.add_class(cls)
|
|
800
|
+
|
|
801
|
+
model.raise_sites.extend(extraction.raise_sites)
|
|
802
|
+
model.catch_sites.extend(extraction.catch_sites)
|
|
803
|
+
model.call_sites.extend(extraction.call_sites)
|
|
804
|
+
model.imports.extend(extraction.imports)
|
|
805
|
+
model.entrypoints.extend(extraction.entrypoints)
|
|
806
|
+
model.global_handlers.extend(extraction.global_handlers)
|
|
807
|
+
model.import_maps[path_str] = extraction.import_map
|
|
808
|
+
model.return_types.update(extraction.return_types)
|
|
809
|
+
model.detected_frameworks.update(extraction.detected_frameworks)
|
|
810
|
+
|
|
811
|
+
for file_path, _path_str in work_items:
|
|
812
|
+
if custom_detectors.entrypoint_detectors or custom_detectors.global_handler_detectors:
|
|
813
|
+
try:
|
|
814
|
+
source = file_path.read_text()
|
|
815
|
+
model.entrypoints.extend(
|
|
816
|
+
custom_detectors.detect_entrypoints(source, str(file_path))
|
|
817
|
+
)
|
|
818
|
+
model.global_handlers.extend(
|
|
819
|
+
custom_detectors.detect_global_handlers(source, str(file_path))
|
|
820
|
+
)
|
|
821
|
+
except Exception:
|
|
822
|
+
pass
|
|
823
|
+
|
|
824
|
+
if cache:
|
|
825
|
+
cache.close()
|
|
826
|
+
|
|
827
|
+
_inject_drf_dispatch_calls(model)
|
|
828
|
+
|
|
829
|
+
return model
|