bubble-analysis 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. bubble/__init__.py +3 -0
  2. bubble/cache.py +207 -0
  3. bubble/cli.py +470 -0
  4. bubble/config.py +52 -0
  5. bubble/detectors.py +90 -0
  6. bubble/enums.py +65 -0
  7. bubble/extractor.py +829 -0
  8. bubble/formatters.py +887 -0
  9. bubble/integrations/__init__.py +92 -0
  10. bubble/integrations/base.py +98 -0
  11. bubble/integrations/cli_scripts/__init__.py +49 -0
  12. bubble/integrations/cli_scripts/cli.py +108 -0
  13. bubble/integrations/cli_scripts/detector.py +149 -0
  14. bubble/integrations/django/__init__.py +63 -0
  15. bubble/integrations/django/cli.py +111 -0
  16. bubble/integrations/django/detector.py +331 -0
  17. bubble/integrations/django/semantics.py +40 -0
  18. bubble/integrations/fastapi/__init__.py +57 -0
  19. bubble/integrations/fastapi/cli.py +110 -0
  20. bubble/integrations/fastapi/detector.py +176 -0
  21. bubble/integrations/fastapi/semantics.py +14 -0
  22. bubble/integrations/flask/__init__.py +57 -0
  23. bubble/integrations/flask/cli.py +110 -0
  24. bubble/integrations/flask/detector.py +191 -0
  25. bubble/integrations/flask/semantics.py +19 -0
  26. bubble/integrations/formatters.py +268 -0
  27. bubble/integrations/generic/__init__.py +13 -0
  28. bubble/integrations/generic/config.py +106 -0
  29. bubble/integrations/generic/detector.py +346 -0
  30. bubble/integrations/generic/frameworks.py +145 -0
  31. bubble/integrations/models.py +68 -0
  32. bubble/integrations/queries.py +481 -0
  33. bubble/loader.py +118 -0
  34. bubble/models.py +397 -0
  35. bubble/propagation.py +737 -0
  36. bubble/protocols.py +104 -0
  37. bubble/queries.py +627 -0
  38. bubble/results.py +211 -0
  39. bubble/stubs.py +89 -0
  40. bubble/timing.py +144 -0
  41. bubble_analysis-0.2.0.dist-info/METADATA +264 -0
  42. bubble_analysis-0.2.0.dist-info/RECORD +46 -0
  43. bubble_analysis-0.2.0.dist-info/WHEEL +5 -0
  44. bubble_analysis-0.2.0.dist-info/entry_points.txt +2 -0
  45. bubble_analysis-0.2.0.dist-info/licenses/LICENSE +21 -0
  46. bubble_analysis-0.2.0.dist-info/top_level.txt +1 -0
bubble/extractor.py ADDED
@@ -0,0 +1,829 @@
1
+ """Extract structural information from Python source files using libcst."""
2
+
3
+ import os
4
+ from collections.abc import Sequence
5
+ from concurrent.futures import ProcessPoolExecutor, as_completed
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ import libcst as cst
10
+ from libcst.metadata import MetadataWrapper, PositionProvider
11
+
12
+ if TYPE_CHECKING:
13
+ pass
14
+
15
+ from bubble.detectors import detect_entrypoints, detect_global_handlers
16
+ from bubble.enums import ResolutionKind
17
+ from bubble.loader import load_detectors
18
+ from bubble.models import (
19
+ CallSite,
20
+ CatchSite,
21
+ ClassDef,
22
+ Entrypoint,
23
+ FunctionDef,
24
+ GlobalHandler,
25
+ ImportInfo,
26
+ ProgramModel,
27
+ RaiseSite,
28
+ )
29
+
30
+
31
+ class CodeExtractor(cst.CSTVisitor):
32
+ """Extracts structural information from a Python module."""
33
+
34
+ METADATA_DEPENDENCIES = (PositionProvider,)
35
+
36
+ def __init__(self, file_path: str, relative_path: str | None = None) -> None:
37
+ self.file_path = file_path
38
+ self.relative_path = relative_path or file_path
39
+ self.functions: list[FunctionDef] = []
40
+ self.classes: list[ClassDef] = []
41
+ self.raise_sites: list[RaiseSite] = []
42
+ self.catch_sites: list[CatchSite] = []
43
+ self.call_sites: list[CallSite] = []
44
+ self.imports: list[ImportInfo] = []
45
+ self.import_map: dict[str, str] = {}
46
+ self.return_types: dict[str, str] = {}
47
+ self.detected_frameworks: set[str] = set()
48
+
49
+ self._class_stack: list[str] = []
50
+ self._function_stack: list[str] = []
51
+ self._local_types: dict[str, str] = {}
52
+ self._abstract_methods: dict[str, set[str]] = {}
53
+ self._class_bases: dict[str, list[str]] = {}
54
+
55
+ def visit_Import(self, node: cst.Import) -> bool:
56
+ for name in node.names if isinstance(node.names, tuple) else []:
57
+ if isinstance(name, cst.ImportAlias):
58
+ module_name = self._get_name_from_expr(name.name)
59
+ alias = (
60
+ name.asname.name.value
61
+ if name.asname and isinstance(name.asname.name, cst.Name)
62
+ else None
63
+ )
64
+ self.imports.append(
65
+ ImportInfo(
66
+ file=self.file_path,
67
+ module=module_name,
68
+ name=module_name,
69
+ alias=alias,
70
+ is_from_import=False,
71
+ )
72
+ )
73
+ local_name = alias or module_name.split(".")[0]
74
+ self.import_map[local_name] = module_name
75
+ self._detect_framework(module_name)
76
+ return False
77
+
78
+ def visit_ImportFrom(self, node: cst.ImportFrom) -> bool:
79
+ if node.module is None:
80
+ return False
81
+
82
+ module_name = self._get_name_from_expr(node.module)
83
+ self._detect_framework(module_name)
84
+
85
+ if isinstance(node.names, cst.ImportStar):
86
+ self.imports.append(
87
+ ImportInfo(
88
+ file=self.file_path,
89
+ module=module_name,
90
+ name="*",
91
+ alias=None,
92
+ is_from_import=True,
93
+ )
94
+ )
95
+ elif isinstance(node.names, tuple):
96
+ for name in node.names:
97
+ if isinstance(name, cst.ImportAlias):
98
+ imported_name = self._get_name_from_expr(name.name)
99
+ alias = (
100
+ name.asname.name.value
101
+ if name.asname and isinstance(name.asname.name, cst.Name)
102
+ else None
103
+ )
104
+ self.imports.append(
105
+ ImportInfo(
106
+ file=self.file_path,
107
+ module=module_name,
108
+ name=imported_name,
109
+ alias=alias,
110
+ is_from_import=True,
111
+ )
112
+ )
113
+ local_name = alias or imported_name
114
+ self.import_map[local_name] = f"{module_name}.{imported_name}"
115
+ return False
116
+
117
+ def visit_ClassDef(self, node: cst.ClassDef) -> bool:
118
+ class_name = node.name.value
119
+
120
+ bases: list[str] = []
121
+ for arg in node.bases:
122
+ base_name = self._get_name_from_expr(arg.value)
123
+ if base_name:
124
+ bases.append(base_name)
125
+
126
+ self._class_stack.append(class_name)
127
+ self._abstract_methods[class_name] = set()
128
+ self._class_bases[class_name] = bases
129
+
130
+ return True
131
+
132
+ def leave_ClassDef(self, node: cst.ClassDef) -> None:
133
+ class_name = self._class_stack.pop()
134
+ pos = self.get_metadata(PositionProvider, node)
135
+
136
+ bases = self._class_bases.get(class_name, [])
137
+ abstract_methods = self._abstract_methods.get(class_name, set())
138
+
139
+ is_abstract = len(abstract_methods) > 0 or "ABC" in bases or "abc.ABC" in bases
140
+
141
+ qualified_name = (
142
+ ".".join(self._class_stack + [class_name]) if self._class_stack else class_name
143
+ )
144
+
145
+ self.classes.append(
146
+ ClassDef(
147
+ name=class_name,
148
+ qualified_name=qualified_name,
149
+ file=self.file_path,
150
+ line=pos.start.line,
151
+ bases=bases,
152
+ is_abstract=is_abstract,
153
+ abstract_methods=abstract_methods,
154
+ )
155
+ )
156
+
157
+ def visit_FunctionDef(self, node: cst.FunctionDef) -> bool:
158
+ pos = self.get_metadata(PositionProvider, node)
159
+ func_name = node.name.value
160
+
161
+ is_method = len(self._class_stack) > 0
162
+ class_name = self._class_stack[-1] if is_method else None
163
+
164
+ if is_method:
165
+ qualified_name = ".".join(self._class_stack + [func_name])
166
+ else:
167
+ qualified_name = func_name
168
+
169
+ is_async = isinstance(node.asynchronous, cst.Asynchronous)
170
+
171
+ return_type: str | None = None
172
+ if node.returns is not None:
173
+ return_type = self._get_name_from_expr(node.returns.annotation)
174
+ if return_type:
175
+ full_qualified = f"{self.relative_path}::{qualified_name}"
176
+ self.return_types[full_qualified] = return_type
177
+
178
+ if is_method and class_name:
179
+ is_abstract = self._is_abstract_method(node)
180
+ if is_abstract:
181
+ self._abstract_methods[class_name].add(func_name)
182
+
183
+ self.functions.append(
184
+ FunctionDef(
185
+ name=func_name,
186
+ qualified_name=qualified_name,
187
+ file=self.file_path,
188
+ line=pos.start.line,
189
+ is_method=is_method,
190
+ is_async=is_async,
191
+ class_name=class_name,
192
+ return_type=return_type,
193
+ )
194
+ )
195
+
196
+ caller_qualified = f"{self.relative_path}::{qualified_name}"
197
+ self._extract_depends_calls(node.params, func_name, caller_qualified, pos.start.line)
198
+
199
+ self._function_stack.append(func_name)
200
+ self._local_types.clear()
201
+ return True
202
+
203
+ def _is_abstract_method(self, node: cst.FunctionDef) -> bool:
204
+ """Check if a method is abstract."""
205
+ if self._has_abstractmethod_decorator(node):
206
+ return True
207
+
208
+ if self._is_raise_not_implemented(node.body):
209
+ return True
210
+
211
+ if self._is_pass_or_ellipsis(node.body):
212
+ return True
213
+
214
+ return False
215
+
216
+ def _has_abstractmethod_decorator(self, node: cst.FunctionDef) -> bool:
217
+ """Check for @abstractmethod or @abc.abstractmethod decorator."""
218
+ for decorator in node.decorators:
219
+ if isinstance(decorator.decorator, cst.Name):
220
+ if decorator.decorator.value == "abstractmethod":
221
+ return True
222
+ elif isinstance(decorator.decorator, cst.Attribute):
223
+ if decorator.decorator.attr.value == "abstractmethod":
224
+ return True
225
+ return False
226
+
227
+ def _is_raise_not_implemented(self, body: cst.BaseSuite) -> bool:
228
+ """Check if method body ends with 'raise NotImplementedError'.
229
+
230
+ Allows a docstring before the raise statement.
231
+ """
232
+ if not isinstance(body, cst.IndentedBlock):
233
+ return False
234
+
235
+ stmts = [s for s in body.body if not isinstance(s, cst.EmptyLine)]
236
+ if not stmts:
237
+ return False
238
+
239
+ last_stmt = stmts[-1]
240
+ if not isinstance(last_stmt, cst.SimpleStatementLine):
241
+ return False
242
+
243
+ if len(last_stmt.body) != 1:
244
+ return False
245
+
246
+ inner = last_stmt.body[0]
247
+ if not isinstance(inner, cst.Raise):
248
+ return False
249
+
250
+ if inner.exc is None:
251
+ return False
252
+
253
+ exc_name = None
254
+ if isinstance(inner.exc, cst.Name):
255
+ exc_name = inner.exc.value
256
+ elif isinstance(inner.exc, cst.Call):
257
+ if isinstance(inner.exc.func, cst.Name):
258
+ exc_name = inner.exc.func.value
259
+
260
+ return exc_name == "NotImplementedError"
261
+
262
+ def _is_pass_or_ellipsis(self, body: cst.BaseSuite) -> bool:
263
+ """Check if method body is just 'pass' or '...'."""
264
+ if not isinstance(body, cst.IndentedBlock):
265
+ return False
266
+
267
+ stmts = [s for s in body.body if not isinstance(s, cst.EmptyLine)]
268
+ if len(stmts) != 1:
269
+ return False
270
+
271
+ stmt = stmts[0]
272
+ if not isinstance(stmt, cst.SimpleStatementLine):
273
+ return False
274
+
275
+ if len(stmt.body) != 1:
276
+ return False
277
+
278
+ inner = stmt.body[0]
279
+
280
+ if isinstance(inner, cst.Pass):
281
+ return True
282
+
283
+ if isinstance(inner, cst.Expr) and isinstance(inner.value, cst.Ellipsis):
284
+ return True
285
+
286
+ return False
287
+
288
+ def leave_FunctionDef(self, node: cst.FunctionDef) -> None:
289
+ self._function_stack.pop()
290
+ self._local_types.clear()
291
+
292
+ def visit_Raise(self, node: cst.Raise) -> bool:
293
+ pos = self.get_metadata(PositionProvider, node)
294
+
295
+ if self._function_stack:
296
+ if self._class_stack:
297
+ qualified_function = ".".join(self._class_stack + [self._function_stack[-1]])
298
+ else:
299
+ qualified_function = self._function_stack[-1]
300
+ else:
301
+ qualified_function = "<module>"
302
+
303
+ is_bare_raise = node.exc is None
304
+
305
+ exception_type = "Unknown"
306
+ message_expr: str | None = None
307
+ code = ""
308
+
309
+ if node.exc is not None:
310
+ code = cst.parse_module("").code_for_node(node)
311
+
312
+ if isinstance(node.exc, cst.Call):
313
+ exception_type = self._get_name_from_expr(node.exc.func)
314
+ if node.exc.args:
315
+ first_arg = node.exc.args[0].value
316
+ if isinstance(
317
+ first_arg, cst.SimpleString | cst.FormattedString | cst.ConcatenatedString
318
+ ):
319
+ message_expr = cst.parse_module("").code_for_node(first_arg)
320
+ elif isinstance(node.exc, cst.Name):
321
+ exception_type = node.exc.value
322
+
323
+ self.raise_sites.append(
324
+ RaiseSite(
325
+ file=self.relative_path,
326
+ line=pos.start.line,
327
+ function=qualified_function,
328
+ exception_type=exception_type,
329
+ is_bare_raise=is_bare_raise,
330
+ code=code.strip(),
331
+ message_expr=message_expr,
332
+ )
333
+ )
334
+
335
+ return True
336
+
337
+ def visit_Try(self, node: cst.Try) -> bool:
338
+ self.get_metadata(PositionProvider, node)
339
+
340
+ if self._function_stack:
341
+ if self._class_stack:
342
+ qualified_function = ".".join(self._class_stack + [self._function_stack[-1]])
343
+ else:
344
+ qualified_function = self._function_stack[-1]
345
+ else:
346
+ qualified_function = "<module>"
347
+
348
+ for handler in node.handlers:
349
+ caught_types: list[str] = []
350
+ has_bare_except = False
351
+
352
+ if handler.type is None:
353
+ has_bare_except = True
354
+ elif isinstance(handler.type, cst.Tuple):
355
+ for el in handler.type.elements:
356
+ if isinstance(el.value, cst.Name | cst.Attribute):
357
+ name = self._get_name_from_expr(el.value)
358
+ if name:
359
+ caught_types.append(name)
360
+ else:
361
+ name = self._get_name_from_expr(handler.type)
362
+ if name:
363
+ caught_types.append(name)
364
+
365
+ has_reraise = self._block_has_reraise(handler.body)
366
+
367
+ handler_pos = self.get_metadata(PositionProvider, handler)
368
+
369
+ self.catch_sites.append(
370
+ CatchSite(
371
+ file=self.relative_path,
372
+ line=handler_pos.start.line,
373
+ function=qualified_function,
374
+ caught_types=caught_types,
375
+ has_bare_except=has_bare_except,
376
+ has_reraise=has_reraise,
377
+ )
378
+ )
379
+
380
+ return True
381
+
382
+ def visit_Call(self, node: cst.Call) -> bool:
383
+ pos = self.get_metadata(PositionProvider, node)
384
+ current_function = self._function_stack[-1] if self._function_stack else "<module>"
385
+
386
+ caller_qualified = self._get_current_qualified_name()
387
+
388
+ callee_name: str
389
+ callee_qualified: str | None = None
390
+ resolution_kind: ResolutionKind = ResolutionKind.UNRESOLVED
391
+ is_method_call = False
392
+
393
+ if isinstance(node.func, cst.Attribute):
394
+ callee_name = node.func.attr.value
395
+ is_method_call = True
396
+ base_expr = node.func.value
397
+
398
+ if isinstance(base_expr, cst.Name):
399
+ base_name = base_expr.value
400
+ if base_name == "self" and self._class_stack:
401
+ callee_qualified = (
402
+ f"{self.relative_path}::{'.'.join(self._class_stack)}.{callee_name}"
403
+ )
404
+ resolution_kind = ResolutionKind.SELF
405
+ elif base_name in self._local_types:
406
+ type_name = self._local_types[base_name]
407
+ if type_name in self.import_map:
408
+ callee_qualified = f"{self.import_map[type_name]}.{callee_name}"
409
+ resolution_kind = ResolutionKind.CONSTRUCTOR
410
+ else:
411
+ callee_qualified = f"{self.relative_path}::{type_name}.{callee_name}"
412
+ resolution_kind = ResolutionKind.CONSTRUCTOR
413
+ elif base_name in self.import_map:
414
+ module_qualified = self.import_map[base_name]
415
+ callee_qualified = f"{module_qualified}.{callee_name}"
416
+ resolution_kind = ResolutionKind.MODULE_ATTRIBUTE
417
+ is_method_call = False
418
+
419
+ elif isinstance(node.func, cst.Name):
420
+ callee_name = node.func.value
421
+ if callee_name in self.import_map:
422
+ callee_qualified = self.import_map[callee_name]
423
+ resolution_kind = ResolutionKind.IMPORT
424
+ else:
425
+ return True
426
+
427
+ self.call_sites.append(
428
+ CallSite(
429
+ file=self.file_path,
430
+ line=pos.start.line,
431
+ caller_function=current_function,
432
+ callee_name=callee_name,
433
+ is_method_call=is_method_call,
434
+ caller_qualified=caller_qualified,
435
+ callee_qualified=callee_qualified,
436
+ resolution_kind=resolution_kind,
437
+ )
438
+ )
439
+
440
+ return True
441
+
442
+ def _get_current_qualified_name(self) -> str:
443
+ """Get the fully qualified name of the current context."""
444
+ parts = [self.relative_path]
445
+ if self._class_stack:
446
+ parts.append(".".join(self._class_stack))
447
+ if self._function_stack:
448
+ parts.append(self._function_stack[-1])
449
+ return "::".join(parts) if len(parts) > 1 else parts[0]
450
+
451
+ def _extract_depends_calls(
452
+ self,
453
+ params: cst.Parameters,
454
+ caller_function: str,
455
+ caller_qualified: str,
456
+ line: int,
457
+ ) -> None:
458
+ """Extract FastAPI Depends() declarations from function parameters."""
459
+ all_params = list(params.params) + list(params.kwonly_params)
460
+
461
+ for param in all_params:
462
+ if param.default is None:
463
+ continue
464
+
465
+ dep_info = self._parse_depends(param.default)
466
+ if dep_info:
467
+ self.call_sites.append(
468
+ CallSite(
469
+ file=self.file_path,
470
+ line=line,
471
+ caller_function=caller_function,
472
+ callee_name=dep_info["name"],
473
+ is_method_call=False,
474
+ caller_qualified=caller_qualified,
475
+ callee_qualified=dep_info.get("qualified"),
476
+ resolution_kind=ResolutionKind.FASTAPI_DEPENDS,
477
+ )
478
+ )
479
+
480
+ def _parse_depends(self, node: cst.BaseExpression) -> dict[str, str | None] | None:
481
+ """Parse Depends(func) and return dependency info."""
482
+ if not isinstance(node, cst.Call):
483
+ return None
484
+
485
+ func_name = self._get_name_from_expr(node.func)
486
+ if func_name not in ("Depends", "fastapi.Depends"):
487
+ return None
488
+
489
+ if not node.args:
490
+ return None
491
+
492
+ first_arg = node.args[0].value
493
+ dep_name = self._get_name_from_expr(first_arg)
494
+ if not dep_name:
495
+ return None
496
+
497
+ qualified = self.import_map.get(dep_name)
498
+
499
+ return {
500
+ "name": dep_name,
501
+ "qualified": qualified,
502
+ }
503
+
504
+ def visit_Assign(self, node: cst.Assign) -> bool:
505
+ """Track variable assignments for constructor resolution."""
506
+ if not isinstance(node.value, cst.Call):
507
+ return True
508
+
509
+ call = node.value
510
+ if not isinstance(call.func, cst.Name):
511
+ return True
512
+
513
+ type_name = call.func.value
514
+
515
+ for target in node.targets:
516
+ if isinstance(target.target, cst.Name):
517
+ var_name = target.target.value
518
+ self._local_types[var_name] = type_name
519
+
520
+ return True
521
+
522
+ def visit_AnnAssign(self, node: cst.AnnAssign) -> bool:
523
+ """Track annotated assignments for type resolution."""
524
+ if node.target is None or not isinstance(node.target, cst.Name):
525
+ return True
526
+
527
+ var_name = node.target.value
528
+
529
+ if node.annotation and node.annotation.annotation:
530
+ type_name = self._get_name_from_expr(node.annotation.annotation)
531
+ if type_name:
532
+ self._local_types[var_name] = type_name
533
+
534
+ if node.value and isinstance(node.value, cst.Call):
535
+ call = node.value
536
+ if isinstance(call.func, cst.Name):
537
+ self._local_types[var_name] = call.func.value
538
+
539
+ return True
540
+
541
+ def _get_name_from_expr(self, expr: cst.BaseExpression) -> str:
542
+ """Extract a name from an expression (handles Name and Attribute)."""
543
+ if isinstance(expr, cst.Name):
544
+ return expr.value
545
+ elif isinstance(expr, cst.Attribute):
546
+ base = self._get_name_from_expr(expr.value)
547
+ if base:
548
+ return f"{base}.{expr.attr.value}"
549
+ return expr.attr.value
550
+ return ""
551
+
552
+ def _detect_framework(self, module_name: str) -> None:
553
+ """Detect frameworks from import module names."""
554
+ module_lower = module_name.lower()
555
+ if "flask" in module_lower:
556
+ self.detected_frameworks.add("flask")
557
+ elif "fastapi" in module_lower or "starlette" in module_lower:
558
+ self.detected_frameworks.add("fastapi")
559
+ elif "django" in module_lower or "rest_framework" in module_lower:
560
+ self.detected_frameworks.add("django")
561
+
562
+ def _block_has_reraise(self, body: cst.BaseSuite) -> bool:
563
+ """Check if a block contains a raise statement (re-raise)."""
564
+ if isinstance(body, cst.IndentedBlock):
565
+ for stmt in body.body:
566
+ if isinstance(stmt, cst.SimpleStatementLine):
567
+ for s in stmt.body:
568
+ if isinstance(s, cst.Raise):
569
+ return True
570
+ return False
571
+
572
+
573
+ class FileExtraction:
574
+ """Results from extracting a single file."""
575
+
576
+ def __init__(self) -> None:
577
+ self.functions: list[FunctionDef] = []
578
+ self.classes: list[ClassDef] = []
579
+ self.raise_sites: list[RaiseSite] = []
580
+ self.catch_sites: list[CatchSite] = []
581
+ self.call_sites: list[CallSite] = []
582
+ self.imports: list[ImportInfo] = []
583
+ self.entrypoints: list[Entrypoint] = []
584
+ self.global_handlers: list[GlobalHandler] = []
585
+ self.import_map: dict[str, str] = {}
586
+ self.return_types: dict[str, str] = {}
587
+ self.detected_frameworks: set[str] = set()
588
+
589
+
590
+ def extract_from_file(file_path: Path, relative_path: str | None = None) -> FileExtraction:
591
+ """Extract structural information from a single Python file."""
592
+ result = FileExtraction()
593
+
594
+ try:
595
+ source = file_path.read_text()
596
+ module = cst.parse_module(source)
597
+ except Exception:
598
+ return result
599
+
600
+ wrapper = MetadataWrapper(module)
601
+ extractor = CodeExtractor(str(file_path), relative_path)
602
+
603
+ try:
604
+ wrapper.visit(extractor)
605
+ except Exception:
606
+ return result
607
+
608
+ result.functions = extractor.functions
609
+ result.classes = extractor.classes
610
+ result.raise_sites = extractor.raise_sites
611
+ result.catch_sites = extractor.catch_sites
612
+ result.call_sites = extractor.call_sites
613
+ result.imports = extractor.imports
614
+ result.import_map = extractor.import_map
615
+ result.return_types = extractor.return_types
616
+ result.detected_frameworks = extractor.detected_frameworks
617
+
618
+ try:
619
+ result.entrypoints = detect_entrypoints(source, str(file_path))
620
+ except Exception:
621
+ pass
622
+
623
+ try:
624
+ result.global_handlers = detect_global_handlers(source, str(file_path))
625
+ except Exception:
626
+ pass
627
+
628
+ return result
629
+
630
+
631
+ def _should_exclude(path_str: str, exclude_dirs: Sequence[str]) -> bool:
632
+ """Check if a path should be excluded based on directory names."""
633
+ parts = path_str.split("/")
634
+ for part in parts:
635
+ if part in exclude_dirs:
636
+ return True
637
+ if part.startswith(".") and part != ".":
638
+ return True
639
+ return False
640
+
641
+
642
+ DRF_HTTP_METHODS = {"get", "post", "put", "patch", "delete", "head", "options", "trace"}
643
+ DRF_ACTION_METHODS = {"list", "create", "retrieve", "update", "partial_update", "destroy"}
644
+ DRF_DISPATCH_METHODS = DRF_HTTP_METHODS | DRF_ACTION_METHODS
645
+
646
+
647
+ def _extract_single_file_for_process(
648
+ file_path_str: str,
649
+ relative_path: str,
650
+ ) -> tuple[str, FileExtraction]:
651
+ """Extract from a single file without cache access.
652
+
653
+ This function is designed to be called from a ProcessPoolExecutor.
654
+ Cache lookups are done in the main process before dispatching.
655
+ """
656
+ file_path = Path(file_path_str)
657
+ extraction = extract_from_file(file_path, relative_path)
658
+ return (relative_path, extraction)
659
+
660
+
661
+ def _inject_drf_dispatch_calls(model: ProgramModel) -> None:
662
+ """Inject synthetic call edges for Django/DRF class-based view dispatch.
663
+
664
+ When a DRF view class is detected as an entrypoint, this creates CallSite
665
+ entries from the view class to each HTTP method handler (get, post, etc.)
666
+ that exists on the class.
667
+ """
668
+ drf_view_entrypoints = [
669
+ ep
670
+ for ep in model.entrypoints
671
+ if ep.metadata.get("framework") == "django" and ep.metadata.get("view_type") == "class"
672
+ ]
673
+
674
+ for entrypoint in drf_view_entrypoints:
675
+ view_class = entrypoint.function
676
+ view_file = entrypoint.file
677
+ view_line = entrypoint.line
678
+
679
+ for _func_key, func_def in model.functions.items():
680
+ if not func_def.is_method:
681
+ continue
682
+ if func_def.class_name != view_class:
683
+ continue
684
+ if func_def.name not in DRF_DISPATCH_METHODS:
685
+ continue
686
+
687
+ relative_file = view_file
688
+ if "/" in relative_file or "\\" in relative_file:
689
+ pass
690
+ else:
691
+ for key in model.functions:
692
+ if view_class in key and func_def.name in key:
693
+ parts = key.split(":")
694
+ if parts:
695
+ relative_file = parts[0]
696
+ break
697
+
698
+ caller_qualified = f"{relative_file}::{view_class}"
699
+ callee_qualified = f"{relative_file}::{view_class}.{func_def.name}"
700
+
701
+ model.call_sites.append(
702
+ CallSite(
703
+ file=view_file,
704
+ line=view_line,
705
+ caller_function=view_class,
706
+ callee_name=func_def.name,
707
+ is_method_call=True,
708
+ caller_qualified=caller_qualified,
709
+ callee_qualified=callee_qualified,
710
+ resolution_kind=ResolutionKind.IMPLICIT_DISPATCH,
711
+ )
712
+ )
713
+
714
+
715
+ def extract_from_directory(
716
+ directory: Path,
717
+ exclude_dirs: Sequence[str] | None = None,
718
+ use_cache: bool = True,
719
+ ) -> ProgramModel:
720
+ """Extract structural information from all Python files in a directory."""
721
+ from bubble import timing
722
+ from bubble.cache import FileCache
723
+
724
+ if exclude_dirs is None:
725
+ exclude_dirs = [
726
+ "__pycache__",
727
+ ".venv",
728
+ "venv",
729
+ "site-packages",
730
+ "node_modules",
731
+ ".git",
732
+ "dist",
733
+ "build",
734
+ "tests",
735
+ "test",
736
+ ]
737
+
738
+ model = ProgramModel()
739
+
740
+ custom_detectors = load_detectors(directory)
741
+
742
+ cache = None
743
+ if use_cache:
744
+ cache = FileCache(directory / ".flow")
745
+
746
+ with timing.timed("file_discovery"):
747
+ python_files = list(directory.rglob("*.py"))
748
+
749
+ work_items: list[tuple[Path, str]] = []
750
+ for file_path in python_files:
751
+ relative_path = file_path.relative_to(directory)
752
+ path_str = str(relative_path)
753
+ if not _should_exclude(path_str, exclude_dirs):
754
+ work_items.append((file_path, path_str))
755
+
756
+ extractions: list[tuple[str, FileExtraction]] = []
757
+ cache_misses: list[tuple[Path, str, FileExtraction]] = []
758
+ work_to_process: list[tuple[str, str]] = []
759
+
760
+ if cache:
761
+ for file_path, relative_path in work_items:
762
+ cached = cache.get(file_path)
763
+ if cached is not None:
764
+ extractions.append((relative_path, cached))
765
+ else:
766
+ work_to_process.append((str(file_path), relative_path))
767
+ else:
768
+ work_to_process = [(str(fp), rp) for fp, rp in work_items]
769
+
770
+ max_workers = min(32, (os.cpu_count() or 1) + 4)
771
+
772
+ with timing.timed("parallel_extraction"):
773
+ if work_to_process:
774
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
775
+ futures = {
776
+ executor.submit(_extract_single_file_for_process, fp_str, rp): (fp_str, rp)
777
+ for fp_str, rp in work_to_process
778
+ }
779
+ for future in as_completed(futures):
780
+ fp_str, path_str = futures[future]
781
+ result_path, extraction = future.result()
782
+ extractions.append((result_path, extraction))
783
+ cache_misses.append((Path(fp_str), path_str, extraction))
784
+
785
+ with timing.timed("cache_writes"):
786
+ if cache:
787
+ for file_path, _path_str, extraction in cache_misses:
788
+ cache.put(file_path, extraction)
789
+
790
+ with timing.timed("model_aggregation"):
791
+ for path_str, extraction in extractions:
792
+ for func in extraction.functions:
793
+ key = f"{path_str}:{func.qualified_name}"
794
+ model.functions[key] = func
795
+
796
+ for cls in extraction.classes:
797
+ key = f"{path_str}:{cls.qualified_name}"
798
+ model.classes[key] = cls
799
+ model.exception_hierarchy.add_class(cls)
800
+
801
+ model.raise_sites.extend(extraction.raise_sites)
802
+ model.catch_sites.extend(extraction.catch_sites)
803
+ model.call_sites.extend(extraction.call_sites)
804
+ model.imports.extend(extraction.imports)
805
+ model.entrypoints.extend(extraction.entrypoints)
806
+ model.global_handlers.extend(extraction.global_handlers)
807
+ model.import_maps[path_str] = extraction.import_map
808
+ model.return_types.update(extraction.return_types)
809
+ model.detected_frameworks.update(extraction.detected_frameworks)
810
+
811
+ for file_path, _path_str in work_items:
812
+ if custom_detectors.entrypoint_detectors or custom_detectors.global_handler_detectors:
813
+ try:
814
+ source = file_path.read_text()
815
+ model.entrypoints.extend(
816
+ custom_detectors.detect_entrypoints(source, str(file_path))
817
+ )
818
+ model.global_handlers.extend(
819
+ custom_detectors.detect_global_handlers(source, str(file_path))
820
+ )
821
+ except Exception:
822
+ pass
823
+
824
+ if cache:
825
+ cache.close()
826
+
827
+ _inject_drf_dispatch_calls(model)
828
+
829
+ return model