apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1406 @@
1
+ """
2
+ Flow-sensitive analysis for improved call resolution.
3
+
4
+ This module implements reaching definitions analysis to provide flow-sensitive
5
+ type bindings. Instead of accumulating ALL types a variable ever has, we track
6
+ which definitions are "live" at each program point.
7
+
8
+ ALGORITHM: Reaching Definitions with Control Flow
9
+ 1. Build a control flow graph (CFG) for each function with proper branches
10
+ 2. For each assignment, create a "definition"
11
+ 3. Compute which definitions reach each program point
12
+ 4. At each use, only consider types from reaching definitions
13
+
14
+ HANDLES:
15
+ - Sequential reassignment: x = A(); x = B(); x.method() uses only B
16
+ - Conditional branches: if cond: x = A() else: x = B() - both types reach
17
+ - Loops: all loop body definitions reach after loop
18
+ - Try/except: exception handler definitions join with try block
19
+ - Return statement analysis: tracks what functions return
20
+ - Attribute assignments: self.attr = value
21
+ - Cross-scope references: module-level variables used in functions
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import logging
27
+ from dataclasses import dataclass, field
28
+ from enum import Enum, auto
29
+ from pathlib import Path
30
+ from typing import TYPE_CHECKING, Any
31
+
32
+ if TYPE_CHECKING:
33
+ from ..parsing.base import ParsedAssignment, ParsedFile, ParsedFunction
34
+
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ # =============================================================================
40
+ # Control Flow Graph (CFG) Representation
41
+ # =============================================================================
42
+
43
+
44
+ class CFGNodeType(Enum):
45
+ """Types of nodes in the control flow graph."""
46
+
47
+ ENTRY = auto() # Function entry point
48
+ EXIT = auto() # Function exit point
49
+ ASSIGNMENT = auto() # Variable assignment
50
+ ATTR_ASSIGNMENT = auto() # Attribute assignment (self.x = ...)
51
+ CALL = auto() # Function call (for tracking)
52
+ BRANCH = auto() # if/while condition
53
+ JOIN = auto() # Merge point after branches
54
+ LOOP_HEADER = auto() # Loop entry
55
+ LOOP_EXIT = auto() # Loop exit
56
+ RETURN = auto() # Return statement
57
+ RAISE = auto() # Raise statement
58
+ TRY_START = auto() # Start of try block
59
+ EXCEPT_START = auto() # Start of except block
60
+ FINALLY_START = auto() # Start of finally block
61
+
62
+
63
+ @dataclass
64
+ class CFGNode:
65
+ """A node in the control flow graph."""
66
+
67
+ id: int
68
+ node_type: CFGNodeType
69
+ line: int
70
+
71
+ # For assignment nodes
72
+ variable: str | None = None
73
+ assigned_type: str | None = None
74
+ assigned_from_call: str | None = None
75
+ assigned_from_var: str | None = None
76
+
77
+ # For attribute assignments (self.attr)
78
+ receiver: str | None = None
79
+ attribute: str | None = None
80
+
81
+ # For return nodes
82
+ return_expression: str | None = None
83
+ returns_call_result: bool = False
84
+ returned_call: str | None = None
85
+
86
+ # Control flow
87
+ successors: list[int] = field(default_factory=list)
88
+ predecessors: list[int] = field(default_factory=list)
89
+
90
+ # For analysis - context flags
91
+ is_in_loop: bool = False
92
+ is_in_conditional: bool = False
93
+ is_in_try: bool = False
94
+ is_in_except: bool = False
95
+ is_in_finally: bool = False
96
+
97
+ # Branch information
98
+ true_branch: int | None = None # For BRANCH nodes
99
+ false_branch: int | None = None # For BRANCH nodes
100
+
101
+
102
+ @dataclass
103
+ class Definition:
104
+ """A variable definition (assignment point)."""
105
+
106
+ id: int # Unique ID for this definition
107
+ variable: str # Variable being defined
108
+ line: int # Line number
109
+ cfg_node: int # CFG node ID
110
+
111
+ # What type is assigned
112
+ assigned_type: str | None = None
113
+ assigned_from_call: str | None = None
114
+ assigned_from_var: str | None = None
115
+
116
+ # All variables the value derives from (superset of assigned_from_var).
117
+ # For "x = int(y)" → ["y"], "q = f'{a} {b}'" → ["a", "b"].
118
+ source_variables: list[str] = field(default_factory=list)
119
+
120
+ # Is this a parameter definition?
121
+ is_parameter: bool = False
122
+
123
+ # Is this a self/cls binding?
124
+ is_self_cls: bool = False
125
+ enclosing_class: str | None = None
126
+
127
+ # For attribute definitions (self.attr = ...)
128
+ is_attribute: bool = False
129
+ receiver: str | None = None
130
+ attribute: str | None = None
131
+
132
+ # Transformation metadata (for tracking what operation produced this value)
133
+ is_method_call: bool = False
134
+ is_string_interpolation: bool = False
135
+
136
+
137
+ @dataclass
138
+ class ReturnInfo:
139
+ """Information about a return statement."""
140
+
141
+ line: int
142
+ return_type: str | None = None
143
+ returns_call_result: bool = False
144
+ returned_call: str | None = None
145
+ returns_variable: bool = False
146
+ returned_variable: str | None = None
147
+ returns_literal: bool = False
148
+
149
+
150
+ @dataclass
151
+ class CallSiteContext:
152
+ """Context information about where a call occurs."""
153
+
154
+ line: int
155
+ in_loop: bool = False
156
+ in_conditional: bool = False
157
+ in_try: bool = False
158
+ in_except: bool = False
159
+ in_finally: bool = False
160
+ in_comprehension: bool = False
161
+ in_lambda: bool = False
162
+ in_with: bool = False
163
+
164
+ # Nesting depth for each context
165
+ loop_depth: int = 0
166
+ conditional_depth: int = 0
167
+ try_depth: int = 0
168
+
169
+
170
+ class ControlFlowGraph:
171
+ """
172
+ A control flow graph for a function with proper branching.
173
+
174
+ Used for reaching definitions analysis.
175
+ """
176
+
177
+ def __init__(self, function_qname: str):
178
+ self.function_qname = function_qname
179
+ self._nodes: dict[int, CFGNode] = {}
180
+ self._next_id = 0
181
+
182
+ # Entry and exit nodes
183
+ self.entry = self._create_node(CFGNodeType.ENTRY, 0)
184
+ self.exit = self._create_node(CFGNodeType.EXIT, 0)
185
+
186
+ # Definitions in this function
187
+ self._definitions: dict[int, Definition] = {}
188
+ self._next_def_id = 0
189
+
190
+ # Variable -> list of definition IDs
191
+ self._var_definitions: dict[str, list[int]] = {}
192
+
193
+ # Attribute -> list of definition IDs (for self.x tracking)
194
+ self._attr_definitions: dict[tuple[str, str], list[int]] = {}
195
+
196
+ # Return statements
197
+ self._returns: list[ReturnInfo] = []
198
+
199
+ # Call site contexts: line -> context
200
+ self._call_contexts: dict[int, CallSiteContext] = {}
201
+
202
+ def _create_node(self, node_type: CFGNodeType, line: int) -> int:
203
+ """Create a new CFG node."""
204
+ node_id = self._next_id
205
+ self._next_id += 1
206
+ self._nodes[node_id] = CFGNode(
207
+ id=node_id,
208
+ node_type=node_type,
209
+ line=line,
210
+ )
211
+ return node_id
212
+
213
+ def add_assignment(
214
+ self,
215
+ line: int,
216
+ variable: str,
217
+ assigned_type: str | None = None,
218
+ assigned_from_call: str | None = None,
219
+ assigned_from_var: str | None = None,
220
+ is_parameter: bool = False,
221
+ is_self_cls: bool = False,
222
+ enclosing_class: str | None = None,
223
+ context_flags: dict[str, bool] | None = None,
224
+ source_variables: list[str] | None = None,
225
+ is_method_call: bool = False,
226
+ is_string_interpolation: bool = False,
227
+ ) -> int:
228
+ """Add an assignment node and create a definition."""
229
+ node_id = self._create_node(CFGNodeType.ASSIGNMENT, line)
230
+ node = self._nodes[node_id]
231
+ node.variable = variable
232
+ node.assigned_type = assigned_type
233
+ node.assigned_from_call = assigned_from_call
234
+ node.assigned_from_var = assigned_from_var
235
+
236
+ if context_flags:
237
+ node.is_in_loop = context_flags.get("in_loop", False)
238
+ node.is_in_conditional = context_flags.get("in_conditional", False)
239
+ node.is_in_try = context_flags.get("in_try", False)
240
+ node.is_in_except = context_flags.get("in_except", False)
241
+
242
+ # Create definition
243
+ def_id = self._next_def_id
244
+ self._next_def_id += 1
245
+
246
+ definition = Definition(
247
+ id=def_id,
248
+ variable=variable,
249
+ line=line,
250
+ cfg_node=node_id,
251
+ assigned_type=assigned_type,
252
+ assigned_from_call=assigned_from_call,
253
+ assigned_from_var=assigned_from_var,
254
+ source_variables=source_variables or [],
255
+ is_parameter=is_parameter,
256
+ is_self_cls=is_self_cls,
257
+ enclosing_class=enclosing_class,
258
+ is_method_call=is_method_call,
259
+ is_string_interpolation=is_string_interpolation,
260
+ )
261
+ self._definitions[def_id] = definition
262
+
263
+ if variable not in self._var_definitions:
264
+ self._var_definitions[variable] = []
265
+ self._var_definitions[variable].append(def_id)
266
+
267
+ return node_id
268
+
269
+ def add_attribute_assignment(
270
+ self,
271
+ line: int,
272
+ receiver: str,
273
+ attribute: str,
274
+ assigned_type: str | None = None,
275
+ assigned_from_call: str | None = None,
276
+ context_flags: dict[str, bool] | None = None,
277
+ ) -> int:
278
+ """Add an attribute assignment node (self.x = ...)."""
279
+ node_id = self._create_node(CFGNodeType.ATTR_ASSIGNMENT, line)
280
+ node = self._nodes[node_id]
281
+ node.receiver = receiver
282
+ node.attribute = attribute
283
+ node.assigned_type = assigned_type
284
+
285
+ if context_flags:
286
+ node.is_in_loop = context_flags.get("in_loop", False)
287
+ node.is_in_conditional = context_flags.get("in_conditional", False)
288
+
289
+ # Create definition for the attribute
290
+ def_id = self._next_def_id
291
+ self._next_def_id += 1
292
+
293
+ definition = Definition(
294
+ id=def_id,
295
+ variable=f"{receiver}.{attribute}",
296
+ line=line,
297
+ cfg_node=node_id,
298
+ assigned_type=assigned_type,
299
+ assigned_from_call=assigned_from_call,
300
+ is_attribute=True,
301
+ receiver=receiver,
302
+ attribute=attribute,
303
+ )
304
+ self._definitions[def_id] = definition
305
+
306
+ key = (receiver, attribute)
307
+ if key not in self._attr_definitions:
308
+ self._attr_definitions[key] = []
309
+ self._attr_definitions[key].append(def_id)
310
+
311
+ return node_id
312
+
313
+ def add_branch(
314
+ self,
315
+ line: int,
316
+ in_loop: bool = False,
317
+ ) -> int:
318
+ """Add a branch node (if/while condition)."""
319
+ node_id = self._create_node(CFGNodeType.BRANCH, line)
320
+ node = self._nodes[node_id]
321
+ node.is_in_loop = in_loop
322
+ return node_id
323
+
324
+ def add_join(self, line: int) -> int:
325
+ """Add a join node (merge point after branches)."""
326
+ return self._create_node(CFGNodeType.JOIN, line)
327
+
328
+ def add_loop_header(self, line: int) -> int:
329
+ """Add a loop header node."""
330
+ return self._create_node(CFGNodeType.LOOP_HEADER, line)
331
+
332
+ def add_return(
333
+ self,
334
+ line: int,
335
+ return_type: str | None = None,
336
+ returns_call_result: bool = False,
337
+ returned_call: str | None = None,
338
+ returns_variable: bool = False,
339
+ returned_variable: str | None = None,
340
+ ) -> int:
341
+ """Add a return node."""
342
+ node_id = self._create_node(CFGNodeType.RETURN, line)
343
+ node = self._nodes[node_id]
344
+ node.returns_call_result = returns_call_result
345
+ node.returned_call = returned_call
346
+
347
+ # Track return info
348
+ self._returns.append(
349
+ ReturnInfo(
350
+ line=line,
351
+ return_type=return_type,
352
+ returns_call_result=returns_call_result,
353
+ returned_call=returned_call,
354
+ returns_variable=returns_variable,
355
+ returned_variable=returned_variable,
356
+ )
357
+ )
358
+
359
+ return node_id
360
+
361
+ def add_call_context(self, line: int, context: CallSiteContext) -> None:
362
+ """Record the context of a call at a specific line."""
363
+ self._call_contexts[line] = context
364
+
365
+ def add_edge(self, from_node: int, to_node: int) -> None:
366
+ """Add a control flow edge."""
367
+ if from_node in self._nodes and to_node in self._nodes:
368
+ if to_node not in self._nodes[from_node].successors:
369
+ self._nodes[from_node].successors.append(to_node)
370
+ if from_node not in self._nodes[to_node].predecessors:
371
+ self._nodes[to_node].predecessors.append(from_node)
372
+
373
+ def set_branch_targets(
374
+ self,
375
+ branch_node: int,
376
+ true_target: int,
377
+ false_target: int,
378
+ ) -> None:
379
+ """Set the true and false branch targets for a branch node."""
380
+ if branch_node in self._nodes:
381
+ node = self._nodes[branch_node]
382
+ node.true_branch = true_target
383
+ node.false_branch = false_target
384
+ self.add_edge(branch_node, true_target)
385
+ self.add_edge(branch_node, false_target)
386
+
387
+ def get_definitions_for_var(self, variable: str) -> list[Definition]:
388
+ """Get all definitions for a variable."""
389
+ def_ids = self._var_definitions.get(variable, [])
390
+ return [self._definitions[d] for d in def_ids]
391
+
392
+ def get_definitions_for_attr(
393
+ self,
394
+ receiver: str,
395
+ attribute: str,
396
+ ) -> list[Definition]:
397
+ """Get all definitions for an attribute (e.g., self.x)."""
398
+ key = (receiver, attribute)
399
+ def_ids = self._attr_definitions.get(key, [])
400
+ return [self._definitions[d] for d in def_ids]
401
+
402
+ def get_call_context(self, line: int) -> CallSiteContext | None:
403
+ """Get the call context for a specific line."""
404
+ return self._call_contexts.get(line)
405
+
406
+ def get_returns(self) -> list[ReturnInfo]:
407
+ """Get all return statements."""
408
+ return self._returns
409
+
410
+
411
+ # =============================================================================
412
+ # Reaching Definitions Analysis
413
+ # =============================================================================
414
+
415
+
416
+ class ReachingDefinitions:
417
+ """
418
+ Reaching definitions analysis for a control flow graph.
419
+
420
+ Computes which definitions "reach" each program point (are not
421
+ killed by intervening assignments to the same variable).
422
+ """
423
+
424
+ def __init__(self, cfg: ControlFlowGraph):
425
+ self.cfg = cfg
426
+
427
+ # IN[node] = definitions reaching node entry
428
+ # OUT[node] = definitions leaving node
429
+ self._in: dict[int, set[int]] = {}
430
+ self._out: dict[int, set[int]] = {}
431
+
432
+ # GEN[node] = definitions created at node
433
+ # KILL[node] = definitions killed at node
434
+ self._gen: dict[int, set[int]] = {}
435
+ self._kill: dict[int, set[int]] = {}
436
+
437
+ # Analysis complete?
438
+ self._analyzed = False
439
+
440
+ def analyze(self, max_iterations: int = 100) -> int:
441
+ """
442
+ Run the reaching definitions analysis.
443
+
444
+ Uses iterative dataflow analysis with worklist algorithm.
445
+
446
+ Returns: Number of iterations
447
+ """
448
+ if self._analyzed:
449
+ return 0
450
+
451
+ # Initialize GEN and KILL for each node
452
+ self._compute_gen_kill()
453
+
454
+ # Initialize IN and OUT
455
+ for node_id in self.cfg._nodes:
456
+ self._in[node_id] = set()
457
+ self._out[node_id] = set()
458
+
459
+ # Worklist algorithm with reverse postorder for efficiency
460
+ worklist = list(self.cfg._nodes.keys())
461
+ iterations = 0
462
+
463
+ while worklist and iterations < max_iterations:
464
+ iterations += 1
465
+ node_id = worklist.pop(0)
466
+ node = self.cfg._nodes[node_id]
467
+
468
+ # IN = union of OUT of all predecessors
469
+ new_in: set[int] = set()
470
+ for pred in node.predecessors:
471
+ new_in.update(self._out.get(pred, set()))
472
+
473
+ self._in[node_id] = new_in
474
+
475
+ # OUT = GEN ∪ (IN - KILL)
476
+ old_out = self._out[node_id].copy()
477
+ new_out = self._gen.get(node_id, set()).copy()
478
+ new_out.update(new_in - self._kill.get(node_id, set()))
479
+ self._out[node_id] = new_out
480
+
481
+ # If OUT changed, add successors to worklist
482
+ if new_out != old_out:
483
+ for succ in node.successors:
484
+ if succ not in worklist:
485
+ worklist.append(succ)
486
+
487
+ self._analyzed = True
488
+ return iterations
489
+
490
+ def _compute_gen_kill(self) -> None:
491
+ """Compute GEN and KILL sets for each node."""
492
+ for node_id, node in self.cfg._nodes.items():
493
+ self._gen[node_id] = set()
494
+ self._kill[node_id] = set()
495
+
496
+ is_assignment = node.node_type in (
497
+ CFGNodeType.ASSIGNMENT,
498
+ CFGNodeType.ATTR_ASSIGNMENT,
499
+ )
500
+
501
+ if is_assignment:
502
+ # Get the variable being defined
503
+ if node.node_type == CFGNodeType.ASSIGNMENT:
504
+ var_name = node.variable
505
+ else:
506
+ var_name = f"{node.receiver}.{node.attribute}"
507
+
508
+ if var_name:
509
+ # This node generates a definition
510
+ for def_id, defn in self.cfg._definitions.items():
511
+ if defn.cfg_node == node_id:
512
+ self._gen[node_id].add(def_id)
513
+
514
+ # This node kills other definitions of the same variable
515
+ for def_id, defn in self.cfg._definitions.items():
516
+ if defn.variable == var_name and defn.cfg_node != node_id:
517
+ self._kill[node_id].add(def_id)
518
+
519
+ def get_reaching_definitions(
520
+ self,
521
+ variable: str,
522
+ at_line: int,
523
+ ) -> list[Definition]:
524
+ """
525
+ Get definitions of a variable that reach a given line.
526
+
527
+ This finds the nearest CFG node at or before the given line
528
+ and returns the definitions in its OUT set (what's live after that node).
529
+ """
530
+ if not self._analyzed:
531
+ self.analyze()
532
+
533
+ # Find the CFG node closest to and at/before the given line
534
+ best_node = None
535
+ best_line = -1
536
+
537
+ for node_id, node in self.cfg._nodes.items():
538
+ # Skip ENTRY and EXIT nodes for line-based lookup
539
+ if node.node_type in (CFGNodeType.ENTRY, CFGNodeType.EXIT):
540
+ continue
541
+ if node.line <= at_line and node.line > best_line:
542
+ best_node = node_id
543
+ best_line = node.line
544
+
545
+ # If no assignment node found before this line, use entry's successors
546
+ if best_node is None:
547
+ entry_successors = self.cfg._nodes[self.cfg.entry].successors
548
+ if entry_successors:
549
+ best_node = entry_successors[0]
550
+ else:
551
+ # Fall back to all definitions
552
+ return self.cfg.get_definitions_for_var(variable)
553
+
554
+ # Get reaching definitions AFTER this node (OUT set)
555
+ reaching = self._out.get(best_node, set())
556
+
557
+ # Filter to requested variable
558
+ result = []
559
+ for def_id in reaching:
560
+ if def_id in self.cfg._definitions:
561
+ defn = self.cfg._definitions[def_id]
562
+ if defn.variable == variable:
563
+ result.append(defn)
564
+
565
+ return result
566
+
567
+ def get_reaching_attr_definitions(
568
+ self,
569
+ receiver: str,
570
+ attribute: str,
571
+ at_line: int,
572
+ ) -> list[Definition]:
573
+ """Get definitions of an attribute that reach a given line."""
574
+ return self.get_reaching_definitions(f"{receiver}.{attribute}", at_line)
575
+
576
+ def get_types_at_point(
577
+ self,
578
+ variable: str,
579
+ at_line: int,
580
+ ) -> set[str]:
581
+ """Get possible types for a variable at a specific line."""
582
+ definitions = self.get_reaching_definitions(variable, at_line)
583
+
584
+ types: set[str] = set()
585
+ for defn in definitions:
586
+ if defn.assigned_type:
587
+ types.add(defn.assigned_type)
588
+ if defn.is_self_cls and defn.enclosing_class:
589
+ types.add(defn.enclosing_class)
590
+
591
+ return types
592
+
593
+ def get_variables_deriving_from(
594
+ self,
595
+ source_var: str,
596
+ at_line: int,
597
+ ) -> set[str]:
598
+ """
599
+ Get all variables whose value may derive from source_var at this line.
600
+
601
+ Handles variable renaming: if we have uid = user_id, then uid derives
602
+ from user_id. Returns the transitive closure (uid, user_id, and any
603
+ variable assigned from those).
604
+
605
+ Uses source_variables (from parser) which captures derivation through
606
+ function calls: "x = int(y)" means x derives from y.
607
+
608
+ Used for taint propagation: when checking if taint passes to foo(x),
609
+ we check if x derives from the tainted variable.
610
+ """
611
+ if not self._analyzed:
612
+ self.analyze()
613
+
614
+ # Build value-flow graph: edge U -> V when V's value derives from U.
615
+ # Use source_variables (complete list) with assigned_from_var as fallback.
616
+ flow_edges: dict[str, set[str]] = {}
617
+ for defn in self.cfg._definitions.values():
618
+ sources: list[str] = []
619
+ if defn.source_variables:
620
+ sources = defn.source_variables
621
+ elif defn.assigned_from_var:
622
+ sources = [defn.assigned_from_var]
623
+ for src in sources:
624
+ if src != defn.variable:
625
+ if defn.variable not in flow_edges:
626
+ flow_edges[defn.variable] = set()
627
+ flow_edges[defn.variable].add(src)
628
+
629
+ # Reverse graph: edge U -> {V} where V gets its value from U.
630
+ rev_edges: dict[str, set[str]] = {}
631
+ for v, sources in flow_edges.items():
632
+ for u in sources:
633
+ if u not in rev_edges:
634
+ rev_edges[u] = set()
635
+ rev_edges[u].add(v)
636
+
637
+ # Forward reachability from source_var in the reverse graph.
638
+ result: set[str] = {source_var}
639
+ worklist = [source_var]
640
+ while worklist:
641
+ u = worklist.pop()
642
+ for v in rev_edges.get(u, set()):
643
+ if v not in result:
644
+ result.add(v)
645
+ worklist.append(v)
646
+
647
+ return result
648
+
649
+ def variable_derives_from(
650
+ self,
651
+ variable: str,
652
+ source_var: str,
653
+ at_line: int,
654
+ ) -> bool:
655
+ """
656
+ Check if variable's value may derive from source_var at this line.
657
+
658
+ Considers reaching definitions: at at_line, which defs of variable
659
+ reach? If any of those have source_variables or assigned_from_var
660
+ in the derived set, True.
661
+ """
662
+ if variable == source_var:
663
+ return True
664
+
665
+ derived = self.get_variables_deriving_from(source_var, at_line)
666
+ if variable in derived:
667
+ return True
668
+
669
+ reaching = self.get_reaching_definitions(variable, at_line)
670
+ for defn in reaching:
671
+ # Check source_variables (complete list) first, then
672
+ # fall back to assigned_from_var.
673
+ all_sources = defn.source_variables or (
674
+ [defn.assigned_from_var] if defn.assigned_from_var else []
675
+ )
676
+ for src in all_sources:
677
+ if src in derived or src == source_var:
678
+ return True
679
+
680
+ return False
681
+
682
+ def get_derivation_chain(
683
+ self,
684
+ target_var: str,
685
+ source_var: str,
686
+ at_line: int,
687
+ ) -> list[Definition]:
688
+ """
689
+ Return the ordered chain of definitions that connects source_var to
690
+ target_var through assignments.
691
+
692
+ E.g. for source=user_id → a=int(user_id) → b=html.escape(a):
693
+ returns [def(a, from_call=int, from_var=user_id),
694
+ def(b, from_call=html.escape, from_var=a)]
695
+
696
+ The chain is ordered from source to target. Each Definition carries
697
+ ``assigned_from_call`` (the transformation function) and
698
+ ``source_variables`` / ``assigned_from_var`` (the inputs).
699
+
700
+ Returns an empty list if no chain exists or if target == source.
701
+ """
702
+ if target_var == source_var:
703
+ return []
704
+ if not self._analyzed:
705
+ self.analyze()
706
+
707
+ # Build adjacency: variable → list[Definition] that defines it
708
+ var_defs: dict[str, list[Definition]] = {}
709
+ for defn in self.cfg._definitions.values():
710
+ var_defs.setdefault(defn.variable, []).append(defn)
711
+
712
+ # BFS backwards from target_var to source_var
713
+ # Each node is a variable name; edges go from defn.variable to each
714
+ # of its source variables (assigned_from_var / source_variables).
715
+ visited: set[str] = set()
716
+ parent: dict[str, Definition | None] = {target_var: None}
717
+ queue = [target_var]
718
+ found = False
719
+
720
+ while queue and not found:
721
+ current = queue.pop(0)
722
+ if current in visited:
723
+ continue
724
+ visited.add(current)
725
+
726
+ for defn in var_defs.get(current, []):
727
+ sources = defn.source_variables or (
728
+ [defn.assigned_from_var] if defn.assigned_from_var else []
729
+ )
730
+ for src in sources:
731
+ if src in visited:
732
+ continue
733
+ if src not in parent:
734
+ parent[src] = defn
735
+ if src == source_var:
736
+ found = True
737
+ break
738
+ queue.append(src)
739
+ if found:
740
+ break
741
+
742
+ if not found:
743
+ return []
744
+
745
+ # Reconstruct path from source_var → target_var
746
+ chain: list[Definition] = []
747
+ cur = source_var
748
+ while cur != target_var:
749
+ defn = parent.get(cur)
750
+ if defn is None:
751
+ break
752
+ chain.append(defn)
753
+ cur = defn.variable
754
+
755
+ return chain
756
+
757
+ def get_attr_types_at_point(
758
+ self,
759
+ receiver: str,
760
+ attribute: str,
761
+ at_line: int,
762
+ ) -> set[str]:
763
+ """Get possible types for an attribute at a specific line."""
764
+ definitions = self.get_reaching_attr_definitions(receiver, attribute, at_line)
765
+
766
+ types: set[str] = set()
767
+ for defn in definitions:
768
+ if defn.assigned_type:
769
+ types.add(defn.assigned_type)
770
+
771
+ return types
772
+
773
+
774
+ # =============================================================================
775
+ # Flow-Sensitive Binding Tracker
776
+ # =============================================================================
777
+
778
+
779
+ class FlowSensitiveBindings:
780
+ """
781
+ Flow-sensitive variable bindings using reaching definitions.
782
+
783
+ This is an enhancement over the basic BindingTracker that provides
784
+ point-specific type information.
785
+ """
786
+
787
+ def __init__(self):
788
+ # Per-function CFG and analysis
789
+ self._cfgs: dict[str, ControlFlowGraph] = {}
790
+ self._analyses: dict[str, ReachingDefinitions] = {}
791
+
792
+ # Module-level bindings (not flow-sensitive within module, but tracked)
793
+ self._module_types: dict[tuple[Path, str], set[str]] = {}
794
+
795
+ # Module-level attribute bindings
796
+ self._module_attrs: dict[tuple[Path, str, str], set[str]] = {}
797
+
798
+ # Cross-scope reference: track which module vars are used in functions
799
+ self._module_var_refs: dict[tuple[str, str], tuple[Path, str]] = {}
800
+
801
+ # Return type inference: function -> possible return types
802
+ self._return_types: dict[str, set[str]] = {}
803
+ self._returned_callables: dict[str, set[str]] = {}
804
+
805
+ def build_cfg_for_function(
806
+ self,
807
+ function_qname: str,
808
+ assignments: list[ParsedAssignment],
809
+ parameters: list[tuple[str, str | None]], # (name, type_annotation)
810
+ enclosing_class: str | None = None,
811
+ control_flow_info: dict[str, Any] | None = None,
812
+ ) -> ControlFlowGraph:
813
+ """Build a CFG for a function from its assignments and control flow."""
814
+ cfg = ControlFlowGraph(function_qname)
815
+ self._cfgs[function_qname] = cfg
816
+
817
+ # Add parameter definitions (at entry)
818
+ last_node = cfg.entry
819
+ for param_name, param_type in parameters:
820
+ is_self = param_name in ("self", "cls") and enclosing_class is not None
821
+ node = cfg.add_assignment(
822
+ line=0, # Parameters are at entry
823
+ variable=param_name,
824
+ assigned_type=param_type if not is_self else enclosing_class,
825
+ is_parameter=True,
826
+ is_self_cls=is_self,
827
+ enclosing_class=enclosing_class if is_self else None,
828
+ )
829
+ cfg.add_edge(last_node, node)
830
+ last_node = node
831
+
832
+ # Process control flow info if provided
833
+ if control_flow_info:
834
+ last_node = self._build_cfg_with_control_flow(
835
+ cfg, last_node, assignments, control_flow_info
836
+ )
837
+ else:
838
+ # Fallback: linear CFG (less accurate but works)
839
+ last_node = self._build_linear_cfg(cfg, last_node, assignments)
840
+
841
+ # Connect to exit
842
+ cfg.add_edge(last_node, cfg.exit)
843
+
844
+ return cfg
845
+
846
+ def register_return_statements(
847
+ self,
848
+ function_qname: str,
849
+ return_statements: list[Any],
850
+ ) -> None:
851
+ """
852
+ Register parsed return statements into the CFG so function_returns_parameter works.
853
+ Call after build_cfg_for_function for each function.
854
+ """
855
+ if function_qname not in self._cfgs:
856
+ return
857
+ cfg = self._cfgs[function_qname]
858
+ for ret in return_statements:
859
+ if getattr(ret, "returns_variable", False) and getattr(ret, "variable_name", None):
860
+ cfg.add_return(
861
+ line=ret.line,
862
+ returns_variable=True,
863
+ returned_variable=ret.variable_name,
864
+ )
865
+ elif getattr(ret, "returns_call", False) and getattr(ret, "call_name", None):
866
+ cfg.add_return(
867
+ line=ret.line,
868
+ returns_call_result=True,
869
+ returned_call=ret.call_name,
870
+ )
871
+
872
+ def _build_linear_cfg(
873
+ self,
874
+ cfg: ControlFlowGraph,
875
+ last_node: int,
876
+ assignments: list[ParsedAssignment],
877
+ ) -> int:
878
+ """Build a linear CFG (fallback when control flow not available)."""
879
+ sorted_assignments = sorted(assignments, key=lambda a: a.location.line)
880
+
881
+ for assign in sorted_assignments:
882
+ last_node = self._process_assignment(cfg, last_node, assign)
883
+
884
+ return last_node
885
+
886
+ def _build_cfg_with_control_flow(
887
+ self,
888
+ cfg: ControlFlowGraph,
889
+ last_node: int,
890
+ assignments: list[ParsedAssignment],
891
+ control_flow_info: dict[str, Any],
892
+ ) -> int:
893
+ """Build CFG with proper control flow from parsed info."""
894
+ # Extract control flow structures
895
+ if_blocks = control_flow_info.get("if_blocks", [])
896
+ loop_blocks = control_flow_info.get("loop_blocks", [])
897
+ try_blocks = control_flow_info.get("try_blocks", [])
898
+
899
+ # Build line -> context mapping
900
+ line_contexts: dict[int, dict[str, bool]] = {}
901
+
902
+ for if_block in if_blocks:
903
+ start, end = if_block.get("start", 0), if_block.get("end", 0)
904
+ for line in range(start, end + 1):
905
+ if line not in line_contexts:
906
+ line_contexts[line] = {}
907
+ line_contexts[line]["in_conditional"] = True
908
+
909
+ for loop_block in loop_blocks:
910
+ start, end = loop_block.get("start", 0), loop_block.get("end", 0)
911
+ for line in range(start, end + 1):
912
+ if line not in line_contexts:
913
+ line_contexts[line] = {}
914
+ line_contexts[line]["in_loop"] = True
915
+
916
+ for try_block in try_blocks:
917
+ try_start = try_block.get("try_start", 0)
918
+ try_end = try_block.get("try_end", 0)
919
+ except_ranges = try_block.get("except_ranges", [])
920
+ finally_range = try_block.get("finally_range")
921
+
922
+ for line in range(try_start, try_end + 1):
923
+ if line not in line_contexts:
924
+ line_contexts[line] = {}
925
+ line_contexts[line]["in_try"] = True
926
+
927
+ for exc_start, exc_end in except_ranges:
928
+ for line in range(exc_start, exc_end + 1):
929
+ if line not in line_contexts:
930
+ line_contexts[line] = {}
931
+ line_contexts[line]["in_except"] = True
932
+
933
+ if finally_range:
934
+ fin_start, fin_end = finally_range
935
+ for line in range(fin_start, fin_end + 1):
936
+ if line not in line_contexts:
937
+ line_contexts[line] = {}
938
+ line_contexts[line]["in_finally"] = True
939
+
940
+ # Sort assignments by line
941
+ sorted_assignments = sorted(assignments, key=lambda a: a.location.line)
942
+
943
+ # Group assignments by if-else branches for proper CFG
944
+ # For now, use linear but with context flags
945
+ for assign in sorted_assignments:
946
+ context = line_contexts.get(assign.location.line, {})
947
+ last_node = self._process_assignment(cfg, last_node, assign, context)
948
+
949
+ return last_node
950
+
951
+ def _process_assignment(
952
+ self,
953
+ cfg: ControlFlowGraph,
954
+ last_node: int,
955
+ assign: ParsedAssignment,
956
+ context_flags: dict[str, bool] | None = None,
957
+ ) -> int:
958
+ """Process a single assignment and add it to the CFG."""
959
+ assigned_type = None
960
+ assigned_from_call = None
961
+ assigned_from_var = None
962
+
963
+ src_vars = getattr(assign, "source_variables", None) or []
964
+ is_method = getattr(assign, "is_method_call", False)
965
+ is_fstr = getattr(assign, "is_string_interpolation", False)
966
+
967
+ if assign.source_type == "call" and assign.source_call:
968
+ assigned_from_call = assign.source_call
969
+ assigned_type = assign.source_call
970
+ # Also record the primary source variable for value-flow.
971
+ # "x = int(y)" → assigned_from_var = "y" so the derivation
972
+ # graph knows x derives from y (through int()).
973
+ if src_vars:
974
+ assigned_from_var = src_vars[0]
975
+ elif assign.source_type == "variable" and assign.source_value:
976
+ assigned_from_var = assign.source_value
977
+ elif assign.source_type == "expression":
978
+ # f-strings, binary ops, etc. — track source variables for
979
+ # value-flow even though there is no single "source" variable.
980
+ if src_vars:
981
+ assigned_from_var = src_vars[0]
982
+ if assign.inferred_type:
983
+ assigned_type = assign.inferred_type
984
+ elif assign.inferred_type:
985
+ assigned_type = assign.inferred_type
986
+
987
+ # Check if this is an attribute assignment (self.x = ...)
988
+ if "." in assign.target:
989
+ parts = assign.target.split(".", 1)
990
+ if len(parts) == 2:
991
+ receiver, attribute = parts
992
+ node = cfg.add_attribute_assignment(
993
+ line=assign.location.line,
994
+ receiver=receiver,
995
+ attribute=attribute,
996
+ assigned_type=assigned_type,
997
+ assigned_from_call=assigned_from_call,
998
+ context_flags=context_flags,
999
+ )
1000
+ cfg.add_edge(last_node, node)
1001
+ return node
1002
+
1003
+ # Regular variable assignment
1004
+ node = cfg.add_assignment(
1005
+ line=assign.location.line,
1006
+ variable=assign.target,
1007
+ assigned_type=assigned_type,
1008
+ assigned_from_call=assigned_from_call,
1009
+ assigned_from_var=assigned_from_var,
1010
+ context_flags=context_flags,
1011
+ source_variables=src_vars,
1012
+ is_method_call=is_method,
1013
+ is_string_interpolation=is_fstr,
1014
+ )
1015
+ cfg.add_edge(last_node, node)
1016
+ return node
1017
+
1018
+ def add_return_info(
1019
+ self,
1020
+ function_qname: str,
1021
+ return_type: str | None = None,
1022
+ returns_callable: str | None = None,
1023
+ ) -> None:
1024
+ """Add return type information for a function."""
1025
+ if function_qname not in self._return_types:
1026
+ self._return_types[function_qname] = set()
1027
+ if return_type:
1028
+ self._return_types[function_qname].add(return_type)
1029
+
1030
+ if returns_callable:
1031
+ if function_qname not in self._returned_callables:
1032
+ self._returned_callables[function_qname] = set()
1033
+ self._returned_callables[function_qname].add(returns_callable)
1034
+
1035
+ def analyze_function(self, function_qname: str) -> None:
1036
+ """Run reaching definitions analysis for a function."""
1037
+ if function_qname not in self._cfgs:
1038
+ return
1039
+
1040
+ cfg = self._cfgs[function_qname]
1041
+ analysis = ReachingDefinitions(cfg)
1042
+ analysis.analyze()
1043
+ self._analyses[function_qname] = analysis
1044
+
1045
+ def analyze_all(self) -> None:
1046
+ """Run reaching definitions for all functions."""
1047
+ for func_qname in self._cfgs:
1048
+ self.analyze_function(func_qname)
1049
+
1050
+ def get_types_at_point(
1051
+ self,
1052
+ variable: str,
1053
+ function_qname: str,
1054
+ at_line: int,
1055
+ ) -> set[str]:
1056
+ """
1057
+ Get flow-sensitive types for a variable at a specific line.
1058
+
1059
+ This returns only the types from definitions that reach this point,
1060
+ not all types the variable ever has.
1061
+ """
1062
+ if function_qname not in self._analyses:
1063
+ # Fall back to all definitions
1064
+ if function_qname in self._cfgs:
1065
+ cfg = self._cfgs[function_qname]
1066
+ return {
1067
+ d.assigned_type
1068
+ for d in cfg.get_definitions_for_var(variable)
1069
+ if d.assigned_type
1070
+ }
1071
+ return set()
1072
+
1073
+ analysis = self._analyses[function_qname]
1074
+ return analysis.get_types_at_point(variable, at_line)
1075
+
1076
+ def get_attr_types_at_point(
1077
+ self,
1078
+ receiver: str,
1079
+ attribute: str,
1080
+ function_qname: str,
1081
+ at_line: int,
1082
+ ) -> set[str]:
1083
+ """Get flow-sensitive types for an attribute at a specific line."""
1084
+ if function_qname not in self._analyses:
1085
+ if function_qname in self._cfgs:
1086
+ cfg = self._cfgs[function_qname]
1087
+ return {
1088
+ d.assigned_type
1089
+ for d in cfg.get_definitions_for_attr(receiver, attribute)
1090
+ if d.assigned_type
1091
+ }
1092
+ return set()
1093
+
1094
+ analysis = self._analyses[function_qname]
1095
+ return analysis.get_attr_types_at_point(receiver, attribute, at_line)
1096
+
1097
+ def get_call_context(
1098
+ self,
1099
+ function_qname: str,
1100
+ at_line: int,
1101
+ ) -> CallSiteContext | None:
1102
+ """Get the context for a call at a specific line."""
1103
+ if function_qname in self._cfgs:
1104
+ return self._cfgs[function_qname].get_call_context(at_line)
1105
+ return None
1106
+
1107
+ def add_module_binding(
1108
+ self,
1109
+ file_path: Path,
1110
+ variable: str,
1111
+ assigned_type: str,
1112
+ ) -> None:
1113
+ """Add a module-level binding."""
1114
+ key = (file_path, variable)
1115
+ if key not in self._module_types:
1116
+ self._module_types[key] = set()
1117
+ self._module_types[key].add(assigned_type)
1118
+
1119
+ def add_module_attr_binding(
1120
+ self,
1121
+ file_path: Path,
1122
+ receiver: str,
1123
+ attribute: str,
1124
+ assigned_type: str,
1125
+ ) -> None:
1126
+ """Add a module-level attribute binding."""
1127
+ key = (file_path, receiver, attribute)
1128
+ if key not in self._module_attrs:
1129
+ self._module_attrs[key] = set()
1130
+ self._module_attrs[key].add(assigned_type)
1131
+
1132
+ def add_cross_scope_ref(
1133
+ self,
1134
+ function_qname: str,
1135
+ local_var: str,
1136
+ file_path: Path,
1137
+ module_var: str,
1138
+ ) -> None:
1139
+ """Record that a local variable references a module-level variable."""
1140
+ self._module_var_refs[(function_qname, local_var)] = (file_path, module_var)
1141
+
1142
+ def get_module_types(
1143
+ self,
1144
+ file_path: Path,
1145
+ variable: str,
1146
+ ) -> set[str]:
1147
+ """Get types for a module-level variable."""
1148
+ return self._module_types.get((file_path, variable), set())
1149
+
1150
+ def get_module_attr_types(
1151
+ self,
1152
+ file_path: Path,
1153
+ receiver: str,
1154
+ attribute: str,
1155
+ ) -> set[str]:
1156
+ """Get types for a module-level attribute."""
1157
+ return self._module_attrs.get((file_path, receiver, attribute), set())
1158
+
1159
+ def get_return_types(self, function_qname: str) -> set[str]:
1160
+ """Get inferred return types for a function."""
1161
+ return self._return_types.get(function_qname, set())
1162
+
1163
+ def get_returned_callables(self, function_qname: str) -> set[str]:
1164
+ """Get callables returned by a function."""
1165
+ return self._returned_callables.get(function_qname, set())
1166
+
1167
+ def function_returns_parameter(
1168
+ self,
1169
+ function_qname: str,
1170
+ param_name: str,
1171
+ ) -> bool:
1172
+ """
1173
+ Check if a function returns the given parameter (for return taint propagation).
1174
+
1175
+ True when:
1176
+ - return param (exact)
1177
+ - return param.attr (attribute access: returned_variable starts with param.)
1178
+ - return f(param) is not yet detected (would need call-arg analysis).
1179
+ """
1180
+ if function_qname not in self._cfgs:
1181
+ return False
1182
+ cfg = self._cfgs[function_qname]
1183
+ for ret in cfg.get_returns():
1184
+ if not ret.returns_variable or not ret.returned_variable:
1185
+ continue
1186
+ if ret.returned_variable == param_name:
1187
+ return True
1188
+ # return param.attr or return param.x.y
1189
+ if ret.returned_variable.startswith(param_name + "."):
1190
+ return True
1191
+ return False
1192
+
1193
+ def get_variables_deriving_from(
1194
+ self,
1195
+ function_qname: str,
1196
+ source_var: str,
1197
+ at_line: int,
1198
+ ) -> set[str]:
1199
+ """
1200
+ Get variables whose value may derive from source_var at this line.
1201
+
1202
+ Supports variable renaming for taint propagation.
1203
+ """
1204
+ if function_qname not in self._analyses:
1205
+ return {source_var}
1206
+ return self._analyses[function_qname].get_variables_deriving_from(source_var, at_line)
1207
+
1208
+ def variable_derives_from(
1209
+ self,
1210
+ function_qname: str,
1211
+ variable: str,
1212
+ source_var: str,
1213
+ at_line: int,
1214
+ ) -> bool:
1215
+ """Check if variable derives from source_var at this line."""
1216
+ if function_qname not in self._analyses:
1217
+ return variable == source_var
1218
+ return self._analyses[function_qname].variable_derives_from(variable, source_var, at_line)
1219
+
1220
+ def get_derivation_chain(
1221
+ self,
1222
+ function_qname: str,
1223
+ target_var: str,
1224
+ source_var: str,
1225
+ at_line: int,
1226
+ ) -> list[Definition]:
1227
+ """
1228
+ Get the ordered chain of definitions from source_var to target_var.
1229
+
1230
+ Each Definition in the chain carries ``assigned_from_call`` (the
1231
+ transformation function) and transformation metadata.
1232
+ Returns empty list when no analysis is available or no chain exists.
1233
+ """
1234
+ if function_qname not in self._analyses:
1235
+ return []
1236
+ return self._analyses[function_qname].get_derivation_chain(target_var, source_var, at_line)
1237
+
1238
+ def resolve_cross_scope(
1239
+ self,
1240
+ function_qname: str,
1241
+ variable: str,
1242
+ file_path: Path,
1243
+ ) -> set[str]:
1244
+ """Resolve a variable that might be a cross-scope reference."""
1245
+ key = (function_qname, variable)
1246
+ if key in self._module_var_refs:
1247
+ ref_file, module_var = self._module_var_refs[key]
1248
+ return self.get_module_types(ref_file, module_var)
1249
+
1250
+ # Also check module level directly
1251
+ return self.get_module_types(file_path, variable)
1252
+
1253
+
1254
+ # =============================================================================
1255
+ # Integration with Call Graph
1256
+ # =============================================================================
1257
+
1258
+
1259
+ def build_flow_sensitive_bindings(
1260
+ parsed_files: list[ParsedFile],
1261
+ ) -> FlowSensitiveBindings:
1262
+ """
1263
+ Build flow-sensitive bindings from parsed files.
1264
+
1265
+ This processes all functions and builds CFGs with reaching definitions
1266
+ analysis for improved type resolution at specific program points.
1267
+ """
1268
+ bindings = FlowSensitiveBindings()
1269
+
1270
+ for parsed in parsed_files:
1271
+ if not parsed.success:
1272
+ continue
1273
+
1274
+ file_path = parsed.path
1275
+
1276
+ # Process module-level assignments
1277
+ for assign in parsed.assignments:
1278
+ if assign.target_qualified_name:
1279
+ parts = assign.target_qualified_name.full.split(".")
1280
+ # Module level: module.var (2 parts)
1281
+ if len(parts) == 2:
1282
+ if assign.source_type == "call" and assign.source_call:
1283
+ bindings.add_module_binding(file_path, assign.target, assign.source_call)
1284
+ elif assign.inferred_type:
1285
+ bindings.add_module_binding(file_path, assign.target, assign.inferred_type)
1286
+
1287
+ # Check for attribute assignment at module level
1288
+ if "." in assign.target:
1289
+ target_parts = assign.target.split(".", 1)
1290
+ if len(target_parts) == 2:
1291
+ receiver, attr = target_parts
1292
+ if assign.source_type == "call" and assign.source_call:
1293
+ bindings.add_module_attr_binding(
1294
+ file_path, receiver, attr, assign.source_call
1295
+ )
1296
+
1297
+ # Process functions
1298
+ for func in parsed.functions:
1299
+ _process_function_for_flow(bindings, func, parsed, file_path)
1300
+
1301
+ # Process methods in classes
1302
+ for cls in parsed.classes:
1303
+ class_qname = cls.qualified_name.full
1304
+
1305
+ for method in cls.methods:
1306
+ _process_function_for_flow(
1307
+ bindings, method, parsed, file_path, enclosing_class=class_qname
1308
+ )
1309
+
1310
+ # Run analysis for all functions
1311
+ bindings.analyze_all()
1312
+
1313
+ return bindings
1314
+
1315
+
1316
+ def _process_function_for_flow(
1317
+ bindings: FlowSensitiveBindings,
1318
+ func: ParsedFunction,
1319
+ parsed: ParsedFile,
1320
+ file_path: Path,
1321
+ enclosing_class: str | None = None,
1322
+ ) -> None:
1323
+ """Process a function to build its CFG."""
1324
+ qname = func.qualified_name.full
1325
+
1326
+ # Collect assignments in this function
1327
+ func_assignments = []
1328
+ for assign in parsed.assignments:
1329
+ if assign.target_qualified_name:
1330
+ assign_qname = assign.target_qualified_name.full
1331
+ # Check if assignment is inside this function
1332
+ if assign_qname.startswith(qname + ".") or assign_qname == qname:
1333
+ func_assignments.append(assign)
1334
+
1335
+ # Get parameters
1336
+ parameters = [(p.name, p.type_annotation) for p in func.parameters]
1337
+
1338
+ # Build control flow info from function metadata
1339
+ control_flow_info = None
1340
+ if hasattr(func, "control_flow_info") and func.control_flow_info:
1341
+ control_flow_info = func.control_flow_info
1342
+
1343
+ # Build CFG
1344
+ bindings.build_cfg_for_function(
1345
+ qname,
1346
+ func_assignments,
1347
+ parameters,
1348
+ enclosing_class,
1349
+ control_flow_info,
1350
+ )
1351
+
1352
+ # Register return statements so function_returns_parameter works
1353
+ if hasattr(func, "return_statements") and func.return_statements:
1354
+ bindings.register_return_statements(qname, func.return_statements)
1355
+
1356
+ # Process annotated return type
1357
+ if func.return_type:
1358
+ bindings.add_return_info(qname, return_type=func.return_type)
1359
+
1360
+ # Track return statements for type inference
1361
+ if hasattr(func, "return_statements") and func.return_statements:
1362
+ for ret in func.return_statements:
1363
+ # Return calls contribute the called function's return type
1364
+ if ret.returns_call and ret.call_name:
1365
+ # The return type is the return type of the called function
1366
+ # For now, we track the call name as a potential return type source
1367
+ bindings.add_return_info(
1368
+ qname,
1369
+ return_type=f"<return:{ret.call_name}>",
1370
+ returns_callable=ret.call_name if ret.call_name.endswith("_factory") else None,
1371
+ )
1372
+
1373
+ # Return variables - track what variable types flow to the return
1374
+ elif ret.returns_variable and ret.variable_name:
1375
+ # The return type comes from the variable's type at that point
1376
+ # We mark this for later resolution when we know variable types
1377
+ bindings.add_return_info(
1378
+ qname,
1379
+ return_type=f"<var:{ret.variable_name}>",
1380
+ )
1381
+
1382
+ # Return literals have concrete types
1383
+ elif ret.returns_literal and ret.literal_type:
1384
+ bindings.add_return_info(qname, return_type=ret.literal_type)
1385
+
1386
+ # Return lambdas - track that this function returns a callable
1387
+ elif ret.returns_lambda:
1388
+ bindings.add_return_info(
1389
+ qname,
1390
+ returns_callable=f"{qname}.<lambda>",
1391
+ )
1392
+
1393
+ # Return comprehensions have list/dict/set types
1394
+ elif ret.returns_comprehension:
1395
+ # Infer type from expression
1396
+ if ret.expression_text:
1397
+ if ret.expression_text.startswith("{") and ":" in ret.expression_text:
1398
+ bindings.add_return_info(qname, return_type="dict")
1399
+ elif ret.expression_text.startswith("{"):
1400
+ bindings.add_return_info(qname, return_type="set")
1401
+ else:
1402
+ bindings.add_return_info(qname, return_type="list")
1403
+
1404
+ # Return None
1405
+ elif ret.returns_none:
1406
+ bindings.add_return_info(qname, return_type="None")