apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,1406 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flow-sensitive analysis for improved call resolution.
|
|
3
|
+
|
|
4
|
+
This module implements reaching definitions analysis to provide flow-sensitive
|
|
5
|
+
type bindings. Instead of accumulating ALL types a variable ever has, we track
|
|
6
|
+
which definitions are "live" at each program point.
|
|
7
|
+
|
|
8
|
+
ALGORITHM: Reaching Definitions with Control Flow
|
|
9
|
+
1. Build a control flow graph (CFG) for each function with proper branches
|
|
10
|
+
2. For each assignment, create a "definition"
|
|
11
|
+
3. Compute which definitions reach each program point
|
|
12
|
+
4. At each use, only consider types from reaching definitions
|
|
13
|
+
|
|
14
|
+
HANDLES:
|
|
15
|
+
- Sequential reassignment: x = A(); x = B(); x.method() uses only B
|
|
16
|
+
- Conditional branches: if cond: x = A() else: x = B() - both types reach
|
|
17
|
+
- Loops: all loop body definitions reach after loop
|
|
18
|
+
- Try/except: exception handler definitions join with try block
|
|
19
|
+
- Return statement analysis: tracks what functions return
|
|
20
|
+
- Attribute assignments: self.attr = value
|
|
21
|
+
- Cross-scope references: module-level variables used in functions
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
from dataclasses import dataclass, field
|
|
28
|
+
from enum import Enum, auto
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import TYPE_CHECKING, Any
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from ..parsing.base import ParsedAssignment, ParsedFile, ParsedFunction
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# =============================================================================
|
|
40
|
+
# Control Flow Graph (CFG) Representation
|
|
41
|
+
# =============================================================================
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CFGNodeType(Enum):
|
|
45
|
+
"""Types of nodes in the control flow graph."""
|
|
46
|
+
|
|
47
|
+
ENTRY = auto() # Function entry point
|
|
48
|
+
EXIT = auto() # Function exit point
|
|
49
|
+
ASSIGNMENT = auto() # Variable assignment
|
|
50
|
+
ATTR_ASSIGNMENT = auto() # Attribute assignment (self.x = ...)
|
|
51
|
+
CALL = auto() # Function call (for tracking)
|
|
52
|
+
BRANCH = auto() # if/while condition
|
|
53
|
+
JOIN = auto() # Merge point after branches
|
|
54
|
+
LOOP_HEADER = auto() # Loop entry
|
|
55
|
+
LOOP_EXIT = auto() # Loop exit
|
|
56
|
+
RETURN = auto() # Return statement
|
|
57
|
+
RAISE = auto() # Raise statement
|
|
58
|
+
TRY_START = auto() # Start of try block
|
|
59
|
+
EXCEPT_START = auto() # Start of except block
|
|
60
|
+
FINALLY_START = auto() # Start of finally block
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class CFGNode:
|
|
65
|
+
"""A node in the control flow graph."""
|
|
66
|
+
|
|
67
|
+
id: int
|
|
68
|
+
node_type: CFGNodeType
|
|
69
|
+
line: int
|
|
70
|
+
|
|
71
|
+
# For assignment nodes
|
|
72
|
+
variable: str | None = None
|
|
73
|
+
assigned_type: str | None = None
|
|
74
|
+
assigned_from_call: str | None = None
|
|
75
|
+
assigned_from_var: str | None = None
|
|
76
|
+
|
|
77
|
+
# For attribute assignments (self.attr)
|
|
78
|
+
receiver: str | None = None
|
|
79
|
+
attribute: str | None = None
|
|
80
|
+
|
|
81
|
+
# For return nodes
|
|
82
|
+
return_expression: str | None = None
|
|
83
|
+
returns_call_result: bool = False
|
|
84
|
+
returned_call: str | None = None
|
|
85
|
+
|
|
86
|
+
# Control flow
|
|
87
|
+
successors: list[int] = field(default_factory=list)
|
|
88
|
+
predecessors: list[int] = field(default_factory=list)
|
|
89
|
+
|
|
90
|
+
# For analysis - context flags
|
|
91
|
+
is_in_loop: bool = False
|
|
92
|
+
is_in_conditional: bool = False
|
|
93
|
+
is_in_try: bool = False
|
|
94
|
+
is_in_except: bool = False
|
|
95
|
+
is_in_finally: bool = False
|
|
96
|
+
|
|
97
|
+
# Branch information
|
|
98
|
+
true_branch: int | None = None # For BRANCH nodes
|
|
99
|
+
false_branch: int | None = None # For BRANCH nodes
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class Definition:
|
|
104
|
+
"""A variable definition (assignment point)."""
|
|
105
|
+
|
|
106
|
+
id: int # Unique ID for this definition
|
|
107
|
+
variable: str # Variable being defined
|
|
108
|
+
line: int # Line number
|
|
109
|
+
cfg_node: int # CFG node ID
|
|
110
|
+
|
|
111
|
+
# What type is assigned
|
|
112
|
+
assigned_type: str | None = None
|
|
113
|
+
assigned_from_call: str | None = None
|
|
114
|
+
assigned_from_var: str | None = None
|
|
115
|
+
|
|
116
|
+
# All variables the value derives from (superset of assigned_from_var).
|
|
117
|
+
# For "x = int(y)" → ["y"], "q = f'{a} {b}'" → ["a", "b"].
|
|
118
|
+
source_variables: list[str] = field(default_factory=list)
|
|
119
|
+
|
|
120
|
+
# Is this a parameter definition?
|
|
121
|
+
is_parameter: bool = False
|
|
122
|
+
|
|
123
|
+
# Is this a self/cls binding?
|
|
124
|
+
is_self_cls: bool = False
|
|
125
|
+
enclosing_class: str | None = None
|
|
126
|
+
|
|
127
|
+
# For attribute definitions (self.attr = ...)
|
|
128
|
+
is_attribute: bool = False
|
|
129
|
+
receiver: str | None = None
|
|
130
|
+
attribute: str | None = None
|
|
131
|
+
|
|
132
|
+
# Transformation metadata (for tracking what operation produced this value)
|
|
133
|
+
is_method_call: bool = False
|
|
134
|
+
is_string_interpolation: bool = False
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class ReturnInfo:
|
|
139
|
+
"""Information about a return statement."""
|
|
140
|
+
|
|
141
|
+
line: int
|
|
142
|
+
return_type: str | None = None
|
|
143
|
+
returns_call_result: bool = False
|
|
144
|
+
returned_call: str | None = None
|
|
145
|
+
returns_variable: bool = False
|
|
146
|
+
returned_variable: str | None = None
|
|
147
|
+
returns_literal: bool = False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class CallSiteContext:
|
|
152
|
+
"""Context information about where a call occurs."""
|
|
153
|
+
|
|
154
|
+
line: int
|
|
155
|
+
in_loop: bool = False
|
|
156
|
+
in_conditional: bool = False
|
|
157
|
+
in_try: bool = False
|
|
158
|
+
in_except: bool = False
|
|
159
|
+
in_finally: bool = False
|
|
160
|
+
in_comprehension: bool = False
|
|
161
|
+
in_lambda: bool = False
|
|
162
|
+
in_with: bool = False
|
|
163
|
+
|
|
164
|
+
# Nesting depth for each context
|
|
165
|
+
loop_depth: int = 0
|
|
166
|
+
conditional_depth: int = 0
|
|
167
|
+
try_depth: int = 0
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class ControlFlowGraph:
|
|
171
|
+
"""
|
|
172
|
+
A control flow graph for a function with proper branching.
|
|
173
|
+
|
|
174
|
+
Used for reaching definitions analysis.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def __init__(self, function_qname: str):
|
|
178
|
+
self.function_qname = function_qname
|
|
179
|
+
self._nodes: dict[int, CFGNode] = {}
|
|
180
|
+
self._next_id = 0
|
|
181
|
+
|
|
182
|
+
# Entry and exit nodes
|
|
183
|
+
self.entry = self._create_node(CFGNodeType.ENTRY, 0)
|
|
184
|
+
self.exit = self._create_node(CFGNodeType.EXIT, 0)
|
|
185
|
+
|
|
186
|
+
# Definitions in this function
|
|
187
|
+
self._definitions: dict[int, Definition] = {}
|
|
188
|
+
self._next_def_id = 0
|
|
189
|
+
|
|
190
|
+
# Variable -> list of definition IDs
|
|
191
|
+
self._var_definitions: dict[str, list[int]] = {}
|
|
192
|
+
|
|
193
|
+
# Attribute -> list of definition IDs (for self.x tracking)
|
|
194
|
+
self._attr_definitions: dict[tuple[str, str], list[int]] = {}
|
|
195
|
+
|
|
196
|
+
# Return statements
|
|
197
|
+
self._returns: list[ReturnInfo] = []
|
|
198
|
+
|
|
199
|
+
# Call site contexts: line -> context
|
|
200
|
+
self._call_contexts: dict[int, CallSiteContext] = {}
|
|
201
|
+
|
|
202
|
+
def _create_node(self, node_type: CFGNodeType, line: int) -> int:
|
|
203
|
+
"""Create a new CFG node."""
|
|
204
|
+
node_id = self._next_id
|
|
205
|
+
self._next_id += 1
|
|
206
|
+
self._nodes[node_id] = CFGNode(
|
|
207
|
+
id=node_id,
|
|
208
|
+
node_type=node_type,
|
|
209
|
+
line=line,
|
|
210
|
+
)
|
|
211
|
+
return node_id
|
|
212
|
+
|
|
213
|
+
def add_assignment(
|
|
214
|
+
self,
|
|
215
|
+
line: int,
|
|
216
|
+
variable: str,
|
|
217
|
+
assigned_type: str | None = None,
|
|
218
|
+
assigned_from_call: str | None = None,
|
|
219
|
+
assigned_from_var: str | None = None,
|
|
220
|
+
is_parameter: bool = False,
|
|
221
|
+
is_self_cls: bool = False,
|
|
222
|
+
enclosing_class: str | None = None,
|
|
223
|
+
context_flags: dict[str, bool] | None = None,
|
|
224
|
+
source_variables: list[str] | None = None,
|
|
225
|
+
is_method_call: bool = False,
|
|
226
|
+
is_string_interpolation: bool = False,
|
|
227
|
+
) -> int:
|
|
228
|
+
"""Add an assignment node and create a definition."""
|
|
229
|
+
node_id = self._create_node(CFGNodeType.ASSIGNMENT, line)
|
|
230
|
+
node = self._nodes[node_id]
|
|
231
|
+
node.variable = variable
|
|
232
|
+
node.assigned_type = assigned_type
|
|
233
|
+
node.assigned_from_call = assigned_from_call
|
|
234
|
+
node.assigned_from_var = assigned_from_var
|
|
235
|
+
|
|
236
|
+
if context_flags:
|
|
237
|
+
node.is_in_loop = context_flags.get("in_loop", False)
|
|
238
|
+
node.is_in_conditional = context_flags.get("in_conditional", False)
|
|
239
|
+
node.is_in_try = context_flags.get("in_try", False)
|
|
240
|
+
node.is_in_except = context_flags.get("in_except", False)
|
|
241
|
+
|
|
242
|
+
# Create definition
|
|
243
|
+
def_id = self._next_def_id
|
|
244
|
+
self._next_def_id += 1
|
|
245
|
+
|
|
246
|
+
definition = Definition(
|
|
247
|
+
id=def_id,
|
|
248
|
+
variable=variable,
|
|
249
|
+
line=line,
|
|
250
|
+
cfg_node=node_id,
|
|
251
|
+
assigned_type=assigned_type,
|
|
252
|
+
assigned_from_call=assigned_from_call,
|
|
253
|
+
assigned_from_var=assigned_from_var,
|
|
254
|
+
source_variables=source_variables or [],
|
|
255
|
+
is_parameter=is_parameter,
|
|
256
|
+
is_self_cls=is_self_cls,
|
|
257
|
+
enclosing_class=enclosing_class,
|
|
258
|
+
is_method_call=is_method_call,
|
|
259
|
+
is_string_interpolation=is_string_interpolation,
|
|
260
|
+
)
|
|
261
|
+
self._definitions[def_id] = definition
|
|
262
|
+
|
|
263
|
+
if variable not in self._var_definitions:
|
|
264
|
+
self._var_definitions[variable] = []
|
|
265
|
+
self._var_definitions[variable].append(def_id)
|
|
266
|
+
|
|
267
|
+
return node_id
|
|
268
|
+
|
|
269
|
+
def add_attribute_assignment(
|
|
270
|
+
self,
|
|
271
|
+
line: int,
|
|
272
|
+
receiver: str,
|
|
273
|
+
attribute: str,
|
|
274
|
+
assigned_type: str | None = None,
|
|
275
|
+
assigned_from_call: str | None = None,
|
|
276
|
+
context_flags: dict[str, bool] | None = None,
|
|
277
|
+
) -> int:
|
|
278
|
+
"""Add an attribute assignment node (self.x = ...)."""
|
|
279
|
+
node_id = self._create_node(CFGNodeType.ATTR_ASSIGNMENT, line)
|
|
280
|
+
node = self._nodes[node_id]
|
|
281
|
+
node.receiver = receiver
|
|
282
|
+
node.attribute = attribute
|
|
283
|
+
node.assigned_type = assigned_type
|
|
284
|
+
|
|
285
|
+
if context_flags:
|
|
286
|
+
node.is_in_loop = context_flags.get("in_loop", False)
|
|
287
|
+
node.is_in_conditional = context_flags.get("in_conditional", False)
|
|
288
|
+
|
|
289
|
+
# Create definition for the attribute
|
|
290
|
+
def_id = self._next_def_id
|
|
291
|
+
self._next_def_id += 1
|
|
292
|
+
|
|
293
|
+
definition = Definition(
|
|
294
|
+
id=def_id,
|
|
295
|
+
variable=f"{receiver}.{attribute}",
|
|
296
|
+
line=line,
|
|
297
|
+
cfg_node=node_id,
|
|
298
|
+
assigned_type=assigned_type,
|
|
299
|
+
assigned_from_call=assigned_from_call,
|
|
300
|
+
is_attribute=True,
|
|
301
|
+
receiver=receiver,
|
|
302
|
+
attribute=attribute,
|
|
303
|
+
)
|
|
304
|
+
self._definitions[def_id] = definition
|
|
305
|
+
|
|
306
|
+
key = (receiver, attribute)
|
|
307
|
+
if key not in self._attr_definitions:
|
|
308
|
+
self._attr_definitions[key] = []
|
|
309
|
+
self._attr_definitions[key].append(def_id)
|
|
310
|
+
|
|
311
|
+
return node_id
|
|
312
|
+
|
|
313
|
+
def add_branch(
|
|
314
|
+
self,
|
|
315
|
+
line: int,
|
|
316
|
+
in_loop: bool = False,
|
|
317
|
+
) -> int:
|
|
318
|
+
"""Add a branch node (if/while condition)."""
|
|
319
|
+
node_id = self._create_node(CFGNodeType.BRANCH, line)
|
|
320
|
+
node = self._nodes[node_id]
|
|
321
|
+
node.is_in_loop = in_loop
|
|
322
|
+
return node_id
|
|
323
|
+
|
|
324
|
+
def add_join(self, line: int) -> int:
|
|
325
|
+
"""Add a join node (merge point after branches)."""
|
|
326
|
+
return self._create_node(CFGNodeType.JOIN, line)
|
|
327
|
+
|
|
328
|
+
def add_loop_header(self, line: int) -> int:
|
|
329
|
+
"""Add a loop header node."""
|
|
330
|
+
return self._create_node(CFGNodeType.LOOP_HEADER, line)
|
|
331
|
+
|
|
332
|
+
def add_return(
|
|
333
|
+
self,
|
|
334
|
+
line: int,
|
|
335
|
+
return_type: str | None = None,
|
|
336
|
+
returns_call_result: bool = False,
|
|
337
|
+
returned_call: str | None = None,
|
|
338
|
+
returns_variable: bool = False,
|
|
339
|
+
returned_variable: str | None = None,
|
|
340
|
+
) -> int:
|
|
341
|
+
"""Add a return node."""
|
|
342
|
+
node_id = self._create_node(CFGNodeType.RETURN, line)
|
|
343
|
+
node = self._nodes[node_id]
|
|
344
|
+
node.returns_call_result = returns_call_result
|
|
345
|
+
node.returned_call = returned_call
|
|
346
|
+
|
|
347
|
+
# Track return info
|
|
348
|
+
self._returns.append(
|
|
349
|
+
ReturnInfo(
|
|
350
|
+
line=line,
|
|
351
|
+
return_type=return_type,
|
|
352
|
+
returns_call_result=returns_call_result,
|
|
353
|
+
returned_call=returned_call,
|
|
354
|
+
returns_variable=returns_variable,
|
|
355
|
+
returned_variable=returned_variable,
|
|
356
|
+
)
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
return node_id
|
|
360
|
+
|
|
361
|
+
def add_call_context(self, line: int, context: CallSiteContext) -> None:
|
|
362
|
+
"""Record the context of a call at a specific line."""
|
|
363
|
+
self._call_contexts[line] = context
|
|
364
|
+
|
|
365
|
+
def add_edge(self, from_node: int, to_node: int) -> None:
|
|
366
|
+
"""Add a control flow edge."""
|
|
367
|
+
if from_node in self._nodes and to_node in self._nodes:
|
|
368
|
+
if to_node not in self._nodes[from_node].successors:
|
|
369
|
+
self._nodes[from_node].successors.append(to_node)
|
|
370
|
+
if from_node not in self._nodes[to_node].predecessors:
|
|
371
|
+
self._nodes[to_node].predecessors.append(from_node)
|
|
372
|
+
|
|
373
|
+
def set_branch_targets(
|
|
374
|
+
self,
|
|
375
|
+
branch_node: int,
|
|
376
|
+
true_target: int,
|
|
377
|
+
false_target: int,
|
|
378
|
+
) -> None:
|
|
379
|
+
"""Set the true and false branch targets for a branch node."""
|
|
380
|
+
if branch_node in self._nodes:
|
|
381
|
+
node = self._nodes[branch_node]
|
|
382
|
+
node.true_branch = true_target
|
|
383
|
+
node.false_branch = false_target
|
|
384
|
+
self.add_edge(branch_node, true_target)
|
|
385
|
+
self.add_edge(branch_node, false_target)
|
|
386
|
+
|
|
387
|
+
def get_definitions_for_var(self, variable: str) -> list[Definition]:
|
|
388
|
+
"""Get all definitions for a variable."""
|
|
389
|
+
def_ids = self._var_definitions.get(variable, [])
|
|
390
|
+
return [self._definitions[d] for d in def_ids]
|
|
391
|
+
|
|
392
|
+
def get_definitions_for_attr(
|
|
393
|
+
self,
|
|
394
|
+
receiver: str,
|
|
395
|
+
attribute: str,
|
|
396
|
+
) -> list[Definition]:
|
|
397
|
+
"""Get all definitions for an attribute (e.g., self.x)."""
|
|
398
|
+
key = (receiver, attribute)
|
|
399
|
+
def_ids = self._attr_definitions.get(key, [])
|
|
400
|
+
return [self._definitions[d] for d in def_ids]
|
|
401
|
+
|
|
402
|
+
def get_call_context(self, line: int) -> CallSiteContext | None:
|
|
403
|
+
"""Get the call context for a specific line."""
|
|
404
|
+
return self._call_contexts.get(line)
|
|
405
|
+
|
|
406
|
+
def get_returns(self) -> list[ReturnInfo]:
|
|
407
|
+
"""Get all return statements."""
|
|
408
|
+
return self._returns
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
# =============================================================================
|
|
412
|
+
# Reaching Definitions Analysis
|
|
413
|
+
# =============================================================================
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class ReachingDefinitions:
|
|
417
|
+
"""
|
|
418
|
+
Reaching definitions analysis for a control flow graph.
|
|
419
|
+
|
|
420
|
+
Computes which definitions "reach" each program point (are not
|
|
421
|
+
killed by intervening assignments to the same variable).
|
|
422
|
+
"""
|
|
423
|
+
|
|
424
|
+
def __init__(self, cfg: ControlFlowGraph):
|
|
425
|
+
self.cfg = cfg
|
|
426
|
+
|
|
427
|
+
# IN[node] = definitions reaching node entry
|
|
428
|
+
# OUT[node] = definitions leaving node
|
|
429
|
+
self._in: dict[int, set[int]] = {}
|
|
430
|
+
self._out: dict[int, set[int]] = {}
|
|
431
|
+
|
|
432
|
+
# GEN[node] = definitions created at node
|
|
433
|
+
# KILL[node] = definitions killed at node
|
|
434
|
+
self._gen: dict[int, set[int]] = {}
|
|
435
|
+
self._kill: dict[int, set[int]] = {}
|
|
436
|
+
|
|
437
|
+
# Analysis complete?
|
|
438
|
+
self._analyzed = False
|
|
439
|
+
|
|
440
|
+
def analyze(self, max_iterations: int = 100) -> int:
|
|
441
|
+
"""
|
|
442
|
+
Run the reaching definitions analysis.
|
|
443
|
+
|
|
444
|
+
Uses iterative dataflow analysis with worklist algorithm.
|
|
445
|
+
|
|
446
|
+
Returns: Number of iterations
|
|
447
|
+
"""
|
|
448
|
+
if self._analyzed:
|
|
449
|
+
return 0
|
|
450
|
+
|
|
451
|
+
# Initialize GEN and KILL for each node
|
|
452
|
+
self._compute_gen_kill()
|
|
453
|
+
|
|
454
|
+
# Initialize IN and OUT
|
|
455
|
+
for node_id in self.cfg._nodes:
|
|
456
|
+
self._in[node_id] = set()
|
|
457
|
+
self._out[node_id] = set()
|
|
458
|
+
|
|
459
|
+
# Worklist algorithm with reverse postorder for efficiency
|
|
460
|
+
worklist = list(self.cfg._nodes.keys())
|
|
461
|
+
iterations = 0
|
|
462
|
+
|
|
463
|
+
while worklist and iterations < max_iterations:
|
|
464
|
+
iterations += 1
|
|
465
|
+
node_id = worklist.pop(0)
|
|
466
|
+
node = self.cfg._nodes[node_id]
|
|
467
|
+
|
|
468
|
+
# IN = union of OUT of all predecessors
|
|
469
|
+
new_in: set[int] = set()
|
|
470
|
+
for pred in node.predecessors:
|
|
471
|
+
new_in.update(self._out.get(pred, set()))
|
|
472
|
+
|
|
473
|
+
self._in[node_id] = new_in
|
|
474
|
+
|
|
475
|
+
# OUT = GEN ∪ (IN - KILL)
|
|
476
|
+
old_out = self._out[node_id].copy()
|
|
477
|
+
new_out = self._gen.get(node_id, set()).copy()
|
|
478
|
+
new_out.update(new_in - self._kill.get(node_id, set()))
|
|
479
|
+
self._out[node_id] = new_out
|
|
480
|
+
|
|
481
|
+
# If OUT changed, add successors to worklist
|
|
482
|
+
if new_out != old_out:
|
|
483
|
+
for succ in node.successors:
|
|
484
|
+
if succ not in worklist:
|
|
485
|
+
worklist.append(succ)
|
|
486
|
+
|
|
487
|
+
self._analyzed = True
|
|
488
|
+
return iterations
|
|
489
|
+
|
|
490
|
+
def _compute_gen_kill(self) -> None:
|
|
491
|
+
"""Compute GEN and KILL sets for each node."""
|
|
492
|
+
for node_id, node in self.cfg._nodes.items():
|
|
493
|
+
self._gen[node_id] = set()
|
|
494
|
+
self._kill[node_id] = set()
|
|
495
|
+
|
|
496
|
+
is_assignment = node.node_type in (
|
|
497
|
+
CFGNodeType.ASSIGNMENT,
|
|
498
|
+
CFGNodeType.ATTR_ASSIGNMENT,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
if is_assignment:
|
|
502
|
+
# Get the variable being defined
|
|
503
|
+
if node.node_type == CFGNodeType.ASSIGNMENT:
|
|
504
|
+
var_name = node.variable
|
|
505
|
+
else:
|
|
506
|
+
var_name = f"{node.receiver}.{node.attribute}"
|
|
507
|
+
|
|
508
|
+
if var_name:
|
|
509
|
+
# This node generates a definition
|
|
510
|
+
for def_id, defn in self.cfg._definitions.items():
|
|
511
|
+
if defn.cfg_node == node_id:
|
|
512
|
+
self._gen[node_id].add(def_id)
|
|
513
|
+
|
|
514
|
+
# This node kills other definitions of the same variable
|
|
515
|
+
for def_id, defn in self.cfg._definitions.items():
|
|
516
|
+
if defn.variable == var_name and defn.cfg_node != node_id:
|
|
517
|
+
self._kill[node_id].add(def_id)
|
|
518
|
+
|
|
519
|
+
def get_reaching_definitions(
|
|
520
|
+
self,
|
|
521
|
+
variable: str,
|
|
522
|
+
at_line: int,
|
|
523
|
+
) -> list[Definition]:
|
|
524
|
+
"""
|
|
525
|
+
Get definitions of a variable that reach a given line.
|
|
526
|
+
|
|
527
|
+
This finds the nearest CFG node at or before the given line
|
|
528
|
+
and returns the definitions in its OUT set (what's live after that node).
|
|
529
|
+
"""
|
|
530
|
+
if not self._analyzed:
|
|
531
|
+
self.analyze()
|
|
532
|
+
|
|
533
|
+
# Find the CFG node closest to and at/before the given line
|
|
534
|
+
best_node = None
|
|
535
|
+
best_line = -1
|
|
536
|
+
|
|
537
|
+
for node_id, node in self.cfg._nodes.items():
|
|
538
|
+
# Skip ENTRY and EXIT nodes for line-based lookup
|
|
539
|
+
if node.node_type in (CFGNodeType.ENTRY, CFGNodeType.EXIT):
|
|
540
|
+
continue
|
|
541
|
+
if node.line <= at_line and node.line > best_line:
|
|
542
|
+
best_node = node_id
|
|
543
|
+
best_line = node.line
|
|
544
|
+
|
|
545
|
+
# If no assignment node found before this line, use entry's successors
|
|
546
|
+
if best_node is None:
|
|
547
|
+
entry_successors = self.cfg._nodes[self.cfg.entry].successors
|
|
548
|
+
if entry_successors:
|
|
549
|
+
best_node = entry_successors[0]
|
|
550
|
+
else:
|
|
551
|
+
# Fall back to all definitions
|
|
552
|
+
return self.cfg.get_definitions_for_var(variable)
|
|
553
|
+
|
|
554
|
+
# Get reaching definitions AFTER this node (OUT set)
|
|
555
|
+
reaching = self._out.get(best_node, set())
|
|
556
|
+
|
|
557
|
+
# Filter to requested variable
|
|
558
|
+
result = []
|
|
559
|
+
for def_id in reaching:
|
|
560
|
+
if def_id in self.cfg._definitions:
|
|
561
|
+
defn = self.cfg._definitions[def_id]
|
|
562
|
+
if defn.variable == variable:
|
|
563
|
+
result.append(defn)
|
|
564
|
+
|
|
565
|
+
return result
|
|
566
|
+
|
|
567
|
+
def get_reaching_attr_definitions(
|
|
568
|
+
self,
|
|
569
|
+
receiver: str,
|
|
570
|
+
attribute: str,
|
|
571
|
+
at_line: int,
|
|
572
|
+
) -> list[Definition]:
|
|
573
|
+
"""Get definitions of an attribute that reach a given line."""
|
|
574
|
+
return self.get_reaching_definitions(f"{receiver}.{attribute}", at_line)
|
|
575
|
+
|
|
576
|
+
def get_types_at_point(
|
|
577
|
+
self,
|
|
578
|
+
variable: str,
|
|
579
|
+
at_line: int,
|
|
580
|
+
) -> set[str]:
|
|
581
|
+
"""Get possible types for a variable at a specific line."""
|
|
582
|
+
definitions = self.get_reaching_definitions(variable, at_line)
|
|
583
|
+
|
|
584
|
+
types: set[str] = set()
|
|
585
|
+
for defn in definitions:
|
|
586
|
+
if defn.assigned_type:
|
|
587
|
+
types.add(defn.assigned_type)
|
|
588
|
+
if defn.is_self_cls and defn.enclosing_class:
|
|
589
|
+
types.add(defn.enclosing_class)
|
|
590
|
+
|
|
591
|
+
return types
|
|
592
|
+
|
|
593
|
+
def get_variables_deriving_from(
|
|
594
|
+
self,
|
|
595
|
+
source_var: str,
|
|
596
|
+
at_line: int,
|
|
597
|
+
) -> set[str]:
|
|
598
|
+
"""
|
|
599
|
+
Get all variables whose value may derive from source_var at this line.
|
|
600
|
+
|
|
601
|
+
Handles variable renaming: if we have uid = user_id, then uid derives
|
|
602
|
+
from user_id. Returns the transitive closure (uid, user_id, and any
|
|
603
|
+
variable assigned from those).
|
|
604
|
+
|
|
605
|
+
Uses source_variables (from parser) which captures derivation through
|
|
606
|
+
function calls: "x = int(y)" means x derives from y.
|
|
607
|
+
|
|
608
|
+
Used for taint propagation: when checking if taint passes to foo(x),
|
|
609
|
+
we check if x derives from the tainted variable.
|
|
610
|
+
"""
|
|
611
|
+
if not self._analyzed:
|
|
612
|
+
self.analyze()
|
|
613
|
+
|
|
614
|
+
# Build value-flow graph: edge U -> V when V's value derives from U.
|
|
615
|
+
# Use source_variables (complete list) with assigned_from_var as fallback.
|
|
616
|
+
flow_edges: dict[str, set[str]] = {}
|
|
617
|
+
for defn in self.cfg._definitions.values():
|
|
618
|
+
sources: list[str] = []
|
|
619
|
+
if defn.source_variables:
|
|
620
|
+
sources = defn.source_variables
|
|
621
|
+
elif defn.assigned_from_var:
|
|
622
|
+
sources = [defn.assigned_from_var]
|
|
623
|
+
for src in sources:
|
|
624
|
+
if src != defn.variable:
|
|
625
|
+
if defn.variable not in flow_edges:
|
|
626
|
+
flow_edges[defn.variable] = set()
|
|
627
|
+
flow_edges[defn.variable].add(src)
|
|
628
|
+
|
|
629
|
+
# Reverse graph: edge U -> {V} where V gets its value from U.
|
|
630
|
+
rev_edges: dict[str, set[str]] = {}
|
|
631
|
+
for v, sources in flow_edges.items():
|
|
632
|
+
for u in sources:
|
|
633
|
+
if u not in rev_edges:
|
|
634
|
+
rev_edges[u] = set()
|
|
635
|
+
rev_edges[u].add(v)
|
|
636
|
+
|
|
637
|
+
# Forward reachability from source_var in the reverse graph.
|
|
638
|
+
result: set[str] = {source_var}
|
|
639
|
+
worklist = [source_var]
|
|
640
|
+
while worklist:
|
|
641
|
+
u = worklist.pop()
|
|
642
|
+
for v in rev_edges.get(u, set()):
|
|
643
|
+
if v not in result:
|
|
644
|
+
result.add(v)
|
|
645
|
+
worklist.append(v)
|
|
646
|
+
|
|
647
|
+
return result
|
|
648
|
+
|
|
649
|
+
def variable_derives_from(
|
|
650
|
+
self,
|
|
651
|
+
variable: str,
|
|
652
|
+
source_var: str,
|
|
653
|
+
at_line: int,
|
|
654
|
+
) -> bool:
|
|
655
|
+
"""
|
|
656
|
+
Check if variable's value may derive from source_var at this line.
|
|
657
|
+
|
|
658
|
+
Considers reaching definitions: at at_line, which defs of variable
|
|
659
|
+
reach? If any of those have source_variables or assigned_from_var
|
|
660
|
+
in the derived set, True.
|
|
661
|
+
"""
|
|
662
|
+
if variable == source_var:
|
|
663
|
+
return True
|
|
664
|
+
|
|
665
|
+
derived = self.get_variables_deriving_from(source_var, at_line)
|
|
666
|
+
if variable in derived:
|
|
667
|
+
return True
|
|
668
|
+
|
|
669
|
+
reaching = self.get_reaching_definitions(variable, at_line)
|
|
670
|
+
for defn in reaching:
|
|
671
|
+
# Check source_variables (complete list) first, then
|
|
672
|
+
# fall back to assigned_from_var.
|
|
673
|
+
all_sources = defn.source_variables or (
|
|
674
|
+
[defn.assigned_from_var] if defn.assigned_from_var else []
|
|
675
|
+
)
|
|
676
|
+
for src in all_sources:
|
|
677
|
+
if src in derived or src == source_var:
|
|
678
|
+
return True
|
|
679
|
+
|
|
680
|
+
return False
|
|
681
|
+
|
|
682
|
+
def get_derivation_chain(
|
|
683
|
+
self,
|
|
684
|
+
target_var: str,
|
|
685
|
+
source_var: str,
|
|
686
|
+
at_line: int,
|
|
687
|
+
) -> list[Definition]:
|
|
688
|
+
"""
|
|
689
|
+
Return the ordered chain of definitions that connects source_var to
|
|
690
|
+
target_var through assignments.
|
|
691
|
+
|
|
692
|
+
E.g. for source=user_id → a=int(user_id) → b=html.escape(a):
|
|
693
|
+
returns [def(a, from_call=int, from_var=user_id),
|
|
694
|
+
def(b, from_call=html.escape, from_var=a)]
|
|
695
|
+
|
|
696
|
+
The chain is ordered from source to target. Each Definition carries
|
|
697
|
+
``assigned_from_call`` (the transformation function) and
|
|
698
|
+
``source_variables`` / ``assigned_from_var`` (the inputs).
|
|
699
|
+
|
|
700
|
+
Returns an empty list if no chain exists or if target == source.
|
|
701
|
+
"""
|
|
702
|
+
if target_var == source_var:
|
|
703
|
+
return []
|
|
704
|
+
if not self._analyzed:
|
|
705
|
+
self.analyze()
|
|
706
|
+
|
|
707
|
+
# Build adjacency: variable → list[Definition] that defines it
|
|
708
|
+
var_defs: dict[str, list[Definition]] = {}
|
|
709
|
+
for defn in self.cfg._definitions.values():
|
|
710
|
+
var_defs.setdefault(defn.variable, []).append(defn)
|
|
711
|
+
|
|
712
|
+
# BFS backwards from target_var to source_var
|
|
713
|
+
# Each node is a variable name; edges go from defn.variable to each
|
|
714
|
+
# of its source variables (assigned_from_var / source_variables).
|
|
715
|
+
visited: set[str] = set()
|
|
716
|
+
parent: dict[str, Definition | None] = {target_var: None}
|
|
717
|
+
queue = [target_var]
|
|
718
|
+
found = False
|
|
719
|
+
|
|
720
|
+
while queue and not found:
|
|
721
|
+
current = queue.pop(0)
|
|
722
|
+
if current in visited:
|
|
723
|
+
continue
|
|
724
|
+
visited.add(current)
|
|
725
|
+
|
|
726
|
+
for defn in var_defs.get(current, []):
|
|
727
|
+
sources = defn.source_variables or (
|
|
728
|
+
[defn.assigned_from_var] if defn.assigned_from_var else []
|
|
729
|
+
)
|
|
730
|
+
for src in sources:
|
|
731
|
+
if src in visited:
|
|
732
|
+
continue
|
|
733
|
+
if src not in parent:
|
|
734
|
+
parent[src] = defn
|
|
735
|
+
if src == source_var:
|
|
736
|
+
found = True
|
|
737
|
+
break
|
|
738
|
+
queue.append(src)
|
|
739
|
+
if found:
|
|
740
|
+
break
|
|
741
|
+
|
|
742
|
+
if not found:
|
|
743
|
+
return []
|
|
744
|
+
|
|
745
|
+
# Reconstruct path from source_var → target_var
|
|
746
|
+
chain: list[Definition] = []
|
|
747
|
+
cur = source_var
|
|
748
|
+
while cur != target_var:
|
|
749
|
+
defn = parent.get(cur)
|
|
750
|
+
if defn is None:
|
|
751
|
+
break
|
|
752
|
+
chain.append(defn)
|
|
753
|
+
cur = defn.variable
|
|
754
|
+
|
|
755
|
+
return chain
|
|
756
|
+
|
|
757
|
+
def get_attr_types_at_point(
|
|
758
|
+
self,
|
|
759
|
+
receiver: str,
|
|
760
|
+
attribute: str,
|
|
761
|
+
at_line: int,
|
|
762
|
+
) -> set[str]:
|
|
763
|
+
"""Get possible types for an attribute at a specific line."""
|
|
764
|
+
definitions = self.get_reaching_attr_definitions(receiver, attribute, at_line)
|
|
765
|
+
|
|
766
|
+
types: set[str] = set()
|
|
767
|
+
for defn in definitions:
|
|
768
|
+
if defn.assigned_type:
|
|
769
|
+
types.add(defn.assigned_type)
|
|
770
|
+
|
|
771
|
+
return types
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
# =============================================================================
|
|
775
|
+
# Flow-Sensitive Binding Tracker
|
|
776
|
+
# =============================================================================
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
class FlowSensitiveBindings:
|
|
780
|
+
"""
|
|
781
|
+
Flow-sensitive variable bindings using reaching definitions.
|
|
782
|
+
|
|
783
|
+
This is an enhancement over the basic BindingTracker that provides
|
|
784
|
+
point-specific type information.
|
|
785
|
+
"""
|
|
786
|
+
|
|
787
|
+
def __init__(self):
|
|
788
|
+
# Per-function CFG and analysis
|
|
789
|
+
self._cfgs: dict[str, ControlFlowGraph] = {}
|
|
790
|
+
self._analyses: dict[str, ReachingDefinitions] = {}
|
|
791
|
+
|
|
792
|
+
# Module-level bindings (not flow-sensitive within module, but tracked)
|
|
793
|
+
self._module_types: dict[tuple[Path, str], set[str]] = {}
|
|
794
|
+
|
|
795
|
+
# Module-level attribute bindings
|
|
796
|
+
self._module_attrs: dict[tuple[Path, str, str], set[str]] = {}
|
|
797
|
+
|
|
798
|
+
# Cross-scope reference: track which module vars are used in functions
|
|
799
|
+
self._module_var_refs: dict[tuple[str, str], tuple[Path, str]] = {}
|
|
800
|
+
|
|
801
|
+
# Return type inference: function -> possible return types
|
|
802
|
+
self._return_types: dict[str, set[str]] = {}
|
|
803
|
+
self._returned_callables: dict[str, set[str]] = {}
|
|
804
|
+
|
|
805
|
+
def build_cfg_for_function(
|
|
806
|
+
self,
|
|
807
|
+
function_qname: str,
|
|
808
|
+
assignments: list[ParsedAssignment],
|
|
809
|
+
parameters: list[tuple[str, str | None]], # (name, type_annotation)
|
|
810
|
+
enclosing_class: str | None = None,
|
|
811
|
+
control_flow_info: dict[str, Any] | None = None,
|
|
812
|
+
) -> ControlFlowGraph:
|
|
813
|
+
"""Build a CFG for a function from its assignments and control flow."""
|
|
814
|
+
cfg = ControlFlowGraph(function_qname)
|
|
815
|
+
self._cfgs[function_qname] = cfg
|
|
816
|
+
|
|
817
|
+
# Add parameter definitions (at entry)
|
|
818
|
+
last_node = cfg.entry
|
|
819
|
+
for param_name, param_type in parameters:
|
|
820
|
+
is_self = param_name in ("self", "cls") and enclosing_class is not None
|
|
821
|
+
node = cfg.add_assignment(
|
|
822
|
+
line=0, # Parameters are at entry
|
|
823
|
+
variable=param_name,
|
|
824
|
+
assigned_type=param_type if not is_self else enclosing_class,
|
|
825
|
+
is_parameter=True,
|
|
826
|
+
is_self_cls=is_self,
|
|
827
|
+
enclosing_class=enclosing_class if is_self else None,
|
|
828
|
+
)
|
|
829
|
+
cfg.add_edge(last_node, node)
|
|
830
|
+
last_node = node
|
|
831
|
+
|
|
832
|
+
# Process control flow info if provided
|
|
833
|
+
if control_flow_info:
|
|
834
|
+
last_node = self._build_cfg_with_control_flow(
|
|
835
|
+
cfg, last_node, assignments, control_flow_info
|
|
836
|
+
)
|
|
837
|
+
else:
|
|
838
|
+
# Fallback: linear CFG (less accurate but works)
|
|
839
|
+
last_node = self._build_linear_cfg(cfg, last_node, assignments)
|
|
840
|
+
|
|
841
|
+
# Connect to exit
|
|
842
|
+
cfg.add_edge(last_node, cfg.exit)
|
|
843
|
+
|
|
844
|
+
return cfg
|
|
845
|
+
|
|
846
|
+
def register_return_statements(
|
|
847
|
+
self,
|
|
848
|
+
function_qname: str,
|
|
849
|
+
return_statements: list[Any],
|
|
850
|
+
) -> None:
|
|
851
|
+
"""
|
|
852
|
+
Register parsed return statements into the CFG so function_returns_parameter works.
|
|
853
|
+
Call after build_cfg_for_function for each function.
|
|
854
|
+
"""
|
|
855
|
+
if function_qname not in self._cfgs:
|
|
856
|
+
return
|
|
857
|
+
cfg = self._cfgs[function_qname]
|
|
858
|
+
for ret in return_statements:
|
|
859
|
+
if getattr(ret, "returns_variable", False) and getattr(ret, "variable_name", None):
|
|
860
|
+
cfg.add_return(
|
|
861
|
+
line=ret.line,
|
|
862
|
+
returns_variable=True,
|
|
863
|
+
returned_variable=ret.variable_name,
|
|
864
|
+
)
|
|
865
|
+
elif getattr(ret, "returns_call", False) and getattr(ret, "call_name", None):
|
|
866
|
+
cfg.add_return(
|
|
867
|
+
line=ret.line,
|
|
868
|
+
returns_call_result=True,
|
|
869
|
+
returned_call=ret.call_name,
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
def _build_linear_cfg(
|
|
873
|
+
self,
|
|
874
|
+
cfg: ControlFlowGraph,
|
|
875
|
+
last_node: int,
|
|
876
|
+
assignments: list[ParsedAssignment],
|
|
877
|
+
) -> int:
|
|
878
|
+
"""Build a linear CFG (fallback when control flow not available)."""
|
|
879
|
+
sorted_assignments = sorted(assignments, key=lambda a: a.location.line)
|
|
880
|
+
|
|
881
|
+
for assign in sorted_assignments:
|
|
882
|
+
last_node = self._process_assignment(cfg, last_node, assign)
|
|
883
|
+
|
|
884
|
+
return last_node
|
|
885
|
+
|
|
886
|
+
def _build_cfg_with_control_flow(
|
|
887
|
+
self,
|
|
888
|
+
cfg: ControlFlowGraph,
|
|
889
|
+
last_node: int,
|
|
890
|
+
assignments: list[ParsedAssignment],
|
|
891
|
+
control_flow_info: dict[str, Any],
|
|
892
|
+
) -> int:
|
|
893
|
+
"""Build CFG with proper control flow from parsed info."""
|
|
894
|
+
# Extract control flow structures
|
|
895
|
+
if_blocks = control_flow_info.get("if_blocks", [])
|
|
896
|
+
loop_blocks = control_flow_info.get("loop_blocks", [])
|
|
897
|
+
try_blocks = control_flow_info.get("try_blocks", [])
|
|
898
|
+
|
|
899
|
+
# Build line -> context mapping
|
|
900
|
+
line_contexts: dict[int, dict[str, bool]] = {}
|
|
901
|
+
|
|
902
|
+
for if_block in if_blocks:
|
|
903
|
+
start, end = if_block.get("start", 0), if_block.get("end", 0)
|
|
904
|
+
for line in range(start, end + 1):
|
|
905
|
+
if line not in line_contexts:
|
|
906
|
+
line_contexts[line] = {}
|
|
907
|
+
line_contexts[line]["in_conditional"] = True
|
|
908
|
+
|
|
909
|
+
for loop_block in loop_blocks:
|
|
910
|
+
start, end = loop_block.get("start", 0), loop_block.get("end", 0)
|
|
911
|
+
for line in range(start, end + 1):
|
|
912
|
+
if line not in line_contexts:
|
|
913
|
+
line_contexts[line] = {}
|
|
914
|
+
line_contexts[line]["in_loop"] = True
|
|
915
|
+
|
|
916
|
+
for try_block in try_blocks:
|
|
917
|
+
try_start = try_block.get("try_start", 0)
|
|
918
|
+
try_end = try_block.get("try_end", 0)
|
|
919
|
+
except_ranges = try_block.get("except_ranges", [])
|
|
920
|
+
finally_range = try_block.get("finally_range")
|
|
921
|
+
|
|
922
|
+
for line in range(try_start, try_end + 1):
|
|
923
|
+
if line not in line_contexts:
|
|
924
|
+
line_contexts[line] = {}
|
|
925
|
+
line_contexts[line]["in_try"] = True
|
|
926
|
+
|
|
927
|
+
for exc_start, exc_end in except_ranges:
|
|
928
|
+
for line in range(exc_start, exc_end + 1):
|
|
929
|
+
if line not in line_contexts:
|
|
930
|
+
line_contexts[line] = {}
|
|
931
|
+
line_contexts[line]["in_except"] = True
|
|
932
|
+
|
|
933
|
+
if finally_range:
|
|
934
|
+
fin_start, fin_end = finally_range
|
|
935
|
+
for line in range(fin_start, fin_end + 1):
|
|
936
|
+
if line not in line_contexts:
|
|
937
|
+
line_contexts[line] = {}
|
|
938
|
+
line_contexts[line]["in_finally"] = True
|
|
939
|
+
|
|
940
|
+
# Sort assignments by line
|
|
941
|
+
sorted_assignments = sorted(assignments, key=lambda a: a.location.line)
|
|
942
|
+
|
|
943
|
+
# Group assignments by if-else branches for proper CFG
|
|
944
|
+
# For now, use linear but with context flags
|
|
945
|
+
for assign in sorted_assignments:
|
|
946
|
+
context = line_contexts.get(assign.location.line, {})
|
|
947
|
+
last_node = self._process_assignment(cfg, last_node, assign, context)
|
|
948
|
+
|
|
949
|
+
return last_node
|
|
950
|
+
|
|
951
|
+
def _process_assignment(
|
|
952
|
+
self,
|
|
953
|
+
cfg: ControlFlowGraph,
|
|
954
|
+
last_node: int,
|
|
955
|
+
assign: ParsedAssignment,
|
|
956
|
+
context_flags: dict[str, bool] | None = None,
|
|
957
|
+
) -> int:
|
|
958
|
+
"""Process a single assignment and add it to the CFG."""
|
|
959
|
+
assigned_type = None
|
|
960
|
+
assigned_from_call = None
|
|
961
|
+
assigned_from_var = None
|
|
962
|
+
|
|
963
|
+
src_vars = getattr(assign, "source_variables", None) or []
|
|
964
|
+
is_method = getattr(assign, "is_method_call", False)
|
|
965
|
+
is_fstr = getattr(assign, "is_string_interpolation", False)
|
|
966
|
+
|
|
967
|
+
if assign.source_type == "call" and assign.source_call:
|
|
968
|
+
assigned_from_call = assign.source_call
|
|
969
|
+
assigned_type = assign.source_call
|
|
970
|
+
# Also record the primary source variable for value-flow.
|
|
971
|
+
# "x = int(y)" → assigned_from_var = "y" so the derivation
|
|
972
|
+
# graph knows x derives from y (through int()).
|
|
973
|
+
if src_vars:
|
|
974
|
+
assigned_from_var = src_vars[0]
|
|
975
|
+
elif assign.source_type == "variable" and assign.source_value:
|
|
976
|
+
assigned_from_var = assign.source_value
|
|
977
|
+
elif assign.source_type == "expression":
|
|
978
|
+
# f-strings, binary ops, etc. — track source variables for
|
|
979
|
+
# value-flow even though there is no single "source" variable.
|
|
980
|
+
if src_vars:
|
|
981
|
+
assigned_from_var = src_vars[0]
|
|
982
|
+
if assign.inferred_type:
|
|
983
|
+
assigned_type = assign.inferred_type
|
|
984
|
+
elif assign.inferred_type:
|
|
985
|
+
assigned_type = assign.inferred_type
|
|
986
|
+
|
|
987
|
+
# Check if this is an attribute assignment (self.x = ...)
|
|
988
|
+
if "." in assign.target:
|
|
989
|
+
parts = assign.target.split(".", 1)
|
|
990
|
+
if len(parts) == 2:
|
|
991
|
+
receiver, attribute = parts
|
|
992
|
+
node = cfg.add_attribute_assignment(
|
|
993
|
+
line=assign.location.line,
|
|
994
|
+
receiver=receiver,
|
|
995
|
+
attribute=attribute,
|
|
996
|
+
assigned_type=assigned_type,
|
|
997
|
+
assigned_from_call=assigned_from_call,
|
|
998
|
+
context_flags=context_flags,
|
|
999
|
+
)
|
|
1000
|
+
cfg.add_edge(last_node, node)
|
|
1001
|
+
return node
|
|
1002
|
+
|
|
1003
|
+
# Regular variable assignment
|
|
1004
|
+
node = cfg.add_assignment(
|
|
1005
|
+
line=assign.location.line,
|
|
1006
|
+
variable=assign.target,
|
|
1007
|
+
assigned_type=assigned_type,
|
|
1008
|
+
assigned_from_call=assigned_from_call,
|
|
1009
|
+
assigned_from_var=assigned_from_var,
|
|
1010
|
+
context_flags=context_flags,
|
|
1011
|
+
source_variables=src_vars,
|
|
1012
|
+
is_method_call=is_method,
|
|
1013
|
+
is_string_interpolation=is_fstr,
|
|
1014
|
+
)
|
|
1015
|
+
cfg.add_edge(last_node, node)
|
|
1016
|
+
return node
|
|
1017
|
+
|
|
1018
|
+
def add_return_info(
|
|
1019
|
+
self,
|
|
1020
|
+
function_qname: str,
|
|
1021
|
+
return_type: str | None = None,
|
|
1022
|
+
returns_callable: str | None = None,
|
|
1023
|
+
) -> None:
|
|
1024
|
+
"""Add return type information for a function."""
|
|
1025
|
+
if function_qname not in self._return_types:
|
|
1026
|
+
self._return_types[function_qname] = set()
|
|
1027
|
+
if return_type:
|
|
1028
|
+
self._return_types[function_qname].add(return_type)
|
|
1029
|
+
|
|
1030
|
+
if returns_callable:
|
|
1031
|
+
if function_qname not in self._returned_callables:
|
|
1032
|
+
self._returned_callables[function_qname] = set()
|
|
1033
|
+
self._returned_callables[function_qname].add(returns_callable)
|
|
1034
|
+
|
|
1035
|
+
def analyze_function(self, function_qname: str) -> None:
|
|
1036
|
+
"""Run reaching definitions analysis for a function."""
|
|
1037
|
+
if function_qname not in self._cfgs:
|
|
1038
|
+
return
|
|
1039
|
+
|
|
1040
|
+
cfg = self._cfgs[function_qname]
|
|
1041
|
+
analysis = ReachingDefinitions(cfg)
|
|
1042
|
+
analysis.analyze()
|
|
1043
|
+
self._analyses[function_qname] = analysis
|
|
1044
|
+
|
|
1045
|
+
def analyze_all(self) -> None:
|
|
1046
|
+
"""Run reaching definitions for all functions."""
|
|
1047
|
+
for func_qname in self._cfgs:
|
|
1048
|
+
self.analyze_function(func_qname)
|
|
1049
|
+
|
|
1050
|
+
def get_types_at_point(
|
|
1051
|
+
self,
|
|
1052
|
+
variable: str,
|
|
1053
|
+
function_qname: str,
|
|
1054
|
+
at_line: int,
|
|
1055
|
+
) -> set[str]:
|
|
1056
|
+
"""
|
|
1057
|
+
Get flow-sensitive types for a variable at a specific line.
|
|
1058
|
+
|
|
1059
|
+
This returns only the types from definitions that reach this point,
|
|
1060
|
+
not all types the variable ever has.
|
|
1061
|
+
"""
|
|
1062
|
+
if function_qname not in self._analyses:
|
|
1063
|
+
# Fall back to all definitions
|
|
1064
|
+
if function_qname in self._cfgs:
|
|
1065
|
+
cfg = self._cfgs[function_qname]
|
|
1066
|
+
return {
|
|
1067
|
+
d.assigned_type
|
|
1068
|
+
for d in cfg.get_definitions_for_var(variable)
|
|
1069
|
+
if d.assigned_type
|
|
1070
|
+
}
|
|
1071
|
+
return set()
|
|
1072
|
+
|
|
1073
|
+
analysis = self._analyses[function_qname]
|
|
1074
|
+
return analysis.get_types_at_point(variable, at_line)
|
|
1075
|
+
|
|
1076
|
+
def get_attr_types_at_point(
|
|
1077
|
+
self,
|
|
1078
|
+
receiver: str,
|
|
1079
|
+
attribute: str,
|
|
1080
|
+
function_qname: str,
|
|
1081
|
+
at_line: int,
|
|
1082
|
+
) -> set[str]:
|
|
1083
|
+
"""Get flow-sensitive types for an attribute at a specific line."""
|
|
1084
|
+
if function_qname not in self._analyses:
|
|
1085
|
+
if function_qname in self._cfgs:
|
|
1086
|
+
cfg = self._cfgs[function_qname]
|
|
1087
|
+
return {
|
|
1088
|
+
d.assigned_type
|
|
1089
|
+
for d in cfg.get_definitions_for_attr(receiver, attribute)
|
|
1090
|
+
if d.assigned_type
|
|
1091
|
+
}
|
|
1092
|
+
return set()
|
|
1093
|
+
|
|
1094
|
+
analysis = self._analyses[function_qname]
|
|
1095
|
+
return analysis.get_attr_types_at_point(receiver, attribute, at_line)
|
|
1096
|
+
|
|
1097
|
+
def get_call_context(
|
|
1098
|
+
self,
|
|
1099
|
+
function_qname: str,
|
|
1100
|
+
at_line: int,
|
|
1101
|
+
) -> CallSiteContext | None:
|
|
1102
|
+
"""Get the context for a call at a specific line."""
|
|
1103
|
+
if function_qname in self._cfgs:
|
|
1104
|
+
return self._cfgs[function_qname].get_call_context(at_line)
|
|
1105
|
+
return None
|
|
1106
|
+
|
|
1107
|
+
def add_module_binding(
|
|
1108
|
+
self,
|
|
1109
|
+
file_path: Path,
|
|
1110
|
+
variable: str,
|
|
1111
|
+
assigned_type: str,
|
|
1112
|
+
) -> None:
|
|
1113
|
+
"""Add a module-level binding."""
|
|
1114
|
+
key = (file_path, variable)
|
|
1115
|
+
if key not in self._module_types:
|
|
1116
|
+
self._module_types[key] = set()
|
|
1117
|
+
self._module_types[key].add(assigned_type)
|
|
1118
|
+
|
|
1119
|
+
def add_module_attr_binding(
|
|
1120
|
+
self,
|
|
1121
|
+
file_path: Path,
|
|
1122
|
+
receiver: str,
|
|
1123
|
+
attribute: str,
|
|
1124
|
+
assigned_type: str,
|
|
1125
|
+
) -> None:
|
|
1126
|
+
"""Add a module-level attribute binding."""
|
|
1127
|
+
key = (file_path, receiver, attribute)
|
|
1128
|
+
if key not in self._module_attrs:
|
|
1129
|
+
self._module_attrs[key] = set()
|
|
1130
|
+
self._module_attrs[key].add(assigned_type)
|
|
1131
|
+
|
|
1132
|
+
def add_cross_scope_ref(
|
|
1133
|
+
self,
|
|
1134
|
+
function_qname: str,
|
|
1135
|
+
local_var: str,
|
|
1136
|
+
file_path: Path,
|
|
1137
|
+
module_var: str,
|
|
1138
|
+
) -> None:
|
|
1139
|
+
"""Record that a local variable references a module-level variable."""
|
|
1140
|
+
self._module_var_refs[(function_qname, local_var)] = (file_path, module_var)
|
|
1141
|
+
|
|
1142
|
+
def get_module_types(
|
|
1143
|
+
self,
|
|
1144
|
+
file_path: Path,
|
|
1145
|
+
variable: str,
|
|
1146
|
+
) -> set[str]:
|
|
1147
|
+
"""Get types for a module-level variable."""
|
|
1148
|
+
return self._module_types.get((file_path, variable), set())
|
|
1149
|
+
|
|
1150
|
+
def get_module_attr_types(
|
|
1151
|
+
self,
|
|
1152
|
+
file_path: Path,
|
|
1153
|
+
receiver: str,
|
|
1154
|
+
attribute: str,
|
|
1155
|
+
) -> set[str]:
|
|
1156
|
+
"""Get types for a module-level attribute."""
|
|
1157
|
+
return self._module_attrs.get((file_path, receiver, attribute), set())
|
|
1158
|
+
|
|
1159
|
+
def get_return_types(self, function_qname: str) -> set[str]:
|
|
1160
|
+
"""Get inferred return types for a function."""
|
|
1161
|
+
return self._return_types.get(function_qname, set())
|
|
1162
|
+
|
|
1163
|
+
def get_returned_callables(self, function_qname: str) -> set[str]:
|
|
1164
|
+
"""Get callables returned by a function."""
|
|
1165
|
+
return self._returned_callables.get(function_qname, set())
|
|
1166
|
+
|
|
1167
|
+
def function_returns_parameter(
|
|
1168
|
+
self,
|
|
1169
|
+
function_qname: str,
|
|
1170
|
+
param_name: str,
|
|
1171
|
+
) -> bool:
|
|
1172
|
+
"""
|
|
1173
|
+
Check if a function returns the given parameter (for return taint propagation).
|
|
1174
|
+
|
|
1175
|
+
True when:
|
|
1176
|
+
- return param (exact)
|
|
1177
|
+
- return param.attr (attribute access: returned_variable starts with param.)
|
|
1178
|
+
- return f(param) is not yet detected (would need call-arg analysis).
|
|
1179
|
+
"""
|
|
1180
|
+
if function_qname not in self._cfgs:
|
|
1181
|
+
return False
|
|
1182
|
+
cfg = self._cfgs[function_qname]
|
|
1183
|
+
for ret in cfg.get_returns():
|
|
1184
|
+
if not ret.returns_variable or not ret.returned_variable:
|
|
1185
|
+
continue
|
|
1186
|
+
if ret.returned_variable == param_name:
|
|
1187
|
+
return True
|
|
1188
|
+
# return param.attr or return param.x.y
|
|
1189
|
+
if ret.returned_variable.startswith(param_name + "."):
|
|
1190
|
+
return True
|
|
1191
|
+
return False
|
|
1192
|
+
|
|
1193
|
+
def get_variables_deriving_from(
|
|
1194
|
+
self,
|
|
1195
|
+
function_qname: str,
|
|
1196
|
+
source_var: str,
|
|
1197
|
+
at_line: int,
|
|
1198
|
+
) -> set[str]:
|
|
1199
|
+
"""
|
|
1200
|
+
Get variables whose value may derive from source_var at this line.
|
|
1201
|
+
|
|
1202
|
+
Supports variable renaming for taint propagation.
|
|
1203
|
+
"""
|
|
1204
|
+
if function_qname not in self._analyses:
|
|
1205
|
+
return {source_var}
|
|
1206
|
+
return self._analyses[function_qname].get_variables_deriving_from(source_var, at_line)
|
|
1207
|
+
|
|
1208
|
+
def variable_derives_from(
|
|
1209
|
+
self,
|
|
1210
|
+
function_qname: str,
|
|
1211
|
+
variable: str,
|
|
1212
|
+
source_var: str,
|
|
1213
|
+
at_line: int,
|
|
1214
|
+
) -> bool:
|
|
1215
|
+
"""Check if variable derives from source_var at this line."""
|
|
1216
|
+
if function_qname not in self._analyses:
|
|
1217
|
+
return variable == source_var
|
|
1218
|
+
return self._analyses[function_qname].variable_derives_from(variable, source_var, at_line)
|
|
1219
|
+
|
|
1220
|
+
def get_derivation_chain(
|
|
1221
|
+
self,
|
|
1222
|
+
function_qname: str,
|
|
1223
|
+
target_var: str,
|
|
1224
|
+
source_var: str,
|
|
1225
|
+
at_line: int,
|
|
1226
|
+
) -> list[Definition]:
|
|
1227
|
+
"""
|
|
1228
|
+
Get the ordered chain of definitions from source_var to target_var.
|
|
1229
|
+
|
|
1230
|
+
Each Definition in the chain carries ``assigned_from_call`` (the
|
|
1231
|
+
transformation function) and transformation metadata.
|
|
1232
|
+
Returns empty list when no analysis is available or no chain exists.
|
|
1233
|
+
"""
|
|
1234
|
+
if function_qname not in self._analyses:
|
|
1235
|
+
return []
|
|
1236
|
+
return self._analyses[function_qname].get_derivation_chain(target_var, source_var, at_line)
|
|
1237
|
+
|
|
1238
|
+
def resolve_cross_scope(
|
|
1239
|
+
self,
|
|
1240
|
+
function_qname: str,
|
|
1241
|
+
variable: str,
|
|
1242
|
+
file_path: Path,
|
|
1243
|
+
) -> set[str]:
|
|
1244
|
+
"""Resolve a variable that might be a cross-scope reference."""
|
|
1245
|
+
key = (function_qname, variable)
|
|
1246
|
+
if key in self._module_var_refs:
|
|
1247
|
+
ref_file, module_var = self._module_var_refs[key]
|
|
1248
|
+
return self.get_module_types(ref_file, module_var)
|
|
1249
|
+
|
|
1250
|
+
# Also check module level directly
|
|
1251
|
+
return self.get_module_types(file_path, variable)
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
# =============================================================================
|
|
1255
|
+
# Integration with Call Graph
|
|
1256
|
+
# =============================================================================
|
|
1257
|
+
|
|
1258
|
+
|
|
1259
|
+
def build_flow_sensitive_bindings(
|
|
1260
|
+
parsed_files: list[ParsedFile],
|
|
1261
|
+
) -> FlowSensitiveBindings:
|
|
1262
|
+
"""
|
|
1263
|
+
Build flow-sensitive bindings from parsed files.
|
|
1264
|
+
|
|
1265
|
+
This processes all functions and builds CFGs with reaching definitions
|
|
1266
|
+
analysis for improved type resolution at specific program points.
|
|
1267
|
+
"""
|
|
1268
|
+
bindings = FlowSensitiveBindings()
|
|
1269
|
+
|
|
1270
|
+
for parsed in parsed_files:
|
|
1271
|
+
if not parsed.success:
|
|
1272
|
+
continue
|
|
1273
|
+
|
|
1274
|
+
file_path = parsed.path
|
|
1275
|
+
|
|
1276
|
+
# Process module-level assignments
|
|
1277
|
+
for assign in parsed.assignments:
|
|
1278
|
+
if assign.target_qualified_name:
|
|
1279
|
+
parts = assign.target_qualified_name.full.split(".")
|
|
1280
|
+
# Module level: module.var (2 parts)
|
|
1281
|
+
if len(parts) == 2:
|
|
1282
|
+
if assign.source_type == "call" and assign.source_call:
|
|
1283
|
+
bindings.add_module_binding(file_path, assign.target, assign.source_call)
|
|
1284
|
+
elif assign.inferred_type:
|
|
1285
|
+
bindings.add_module_binding(file_path, assign.target, assign.inferred_type)
|
|
1286
|
+
|
|
1287
|
+
# Check for attribute assignment at module level
|
|
1288
|
+
if "." in assign.target:
|
|
1289
|
+
target_parts = assign.target.split(".", 1)
|
|
1290
|
+
if len(target_parts) == 2:
|
|
1291
|
+
receiver, attr = target_parts
|
|
1292
|
+
if assign.source_type == "call" and assign.source_call:
|
|
1293
|
+
bindings.add_module_attr_binding(
|
|
1294
|
+
file_path, receiver, attr, assign.source_call
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1297
|
+
# Process functions
|
|
1298
|
+
for func in parsed.functions:
|
|
1299
|
+
_process_function_for_flow(bindings, func, parsed, file_path)
|
|
1300
|
+
|
|
1301
|
+
# Process methods in classes
|
|
1302
|
+
for cls in parsed.classes:
|
|
1303
|
+
class_qname = cls.qualified_name.full
|
|
1304
|
+
|
|
1305
|
+
for method in cls.methods:
|
|
1306
|
+
_process_function_for_flow(
|
|
1307
|
+
bindings, method, parsed, file_path, enclosing_class=class_qname
|
|
1308
|
+
)
|
|
1309
|
+
|
|
1310
|
+
# Run analysis for all functions
|
|
1311
|
+
bindings.analyze_all()
|
|
1312
|
+
|
|
1313
|
+
return bindings
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
def _process_function_for_flow(
|
|
1317
|
+
bindings: FlowSensitiveBindings,
|
|
1318
|
+
func: ParsedFunction,
|
|
1319
|
+
parsed: ParsedFile,
|
|
1320
|
+
file_path: Path,
|
|
1321
|
+
enclosing_class: str | None = None,
|
|
1322
|
+
) -> None:
|
|
1323
|
+
"""Process a function to build its CFG."""
|
|
1324
|
+
qname = func.qualified_name.full
|
|
1325
|
+
|
|
1326
|
+
# Collect assignments in this function
|
|
1327
|
+
func_assignments = []
|
|
1328
|
+
for assign in parsed.assignments:
|
|
1329
|
+
if assign.target_qualified_name:
|
|
1330
|
+
assign_qname = assign.target_qualified_name.full
|
|
1331
|
+
# Check if assignment is inside this function
|
|
1332
|
+
if assign_qname.startswith(qname + ".") or assign_qname == qname:
|
|
1333
|
+
func_assignments.append(assign)
|
|
1334
|
+
|
|
1335
|
+
# Get parameters
|
|
1336
|
+
parameters = [(p.name, p.type_annotation) for p in func.parameters]
|
|
1337
|
+
|
|
1338
|
+
# Build control flow info from function metadata
|
|
1339
|
+
control_flow_info = None
|
|
1340
|
+
if hasattr(func, "control_flow_info") and func.control_flow_info:
|
|
1341
|
+
control_flow_info = func.control_flow_info
|
|
1342
|
+
|
|
1343
|
+
# Build CFG
|
|
1344
|
+
bindings.build_cfg_for_function(
|
|
1345
|
+
qname,
|
|
1346
|
+
func_assignments,
|
|
1347
|
+
parameters,
|
|
1348
|
+
enclosing_class,
|
|
1349
|
+
control_flow_info,
|
|
1350
|
+
)
|
|
1351
|
+
|
|
1352
|
+
# Register return statements so function_returns_parameter works
|
|
1353
|
+
if hasattr(func, "return_statements") and func.return_statements:
|
|
1354
|
+
bindings.register_return_statements(qname, func.return_statements)
|
|
1355
|
+
|
|
1356
|
+
# Process annotated return type
|
|
1357
|
+
if func.return_type:
|
|
1358
|
+
bindings.add_return_info(qname, return_type=func.return_type)
|
|
1359
|
+
|
|
1360
|
+
# Track return statements for type inference
|
|
1361
|
+
if hasattr(func, "return_statements") and func.return_statements:
|
|
1362
|
+
for ret in func.return_statements:
|
|
1363
|
+
# Return calls contribute the called function's return type
|
|
1364
|
+
if ret.returns_call and ret.call_name:
|
|
1365
|
+
# The return type is the return type of the called function
|
|
1366
|
+
# For now, we track the call name as a potential return type source
|
|
1367
|
+
bindings.add_return_info(
|
|
1368
|
+
qname,
|
|
1369
|
+
return_type=f"<return:{ret.call_name}>",
|
|
1370
|
+
returns_callable=ret.call_name if ret.call_name.endswith("_factory") else None,
|
|
1371
|
+
)
|
|
1372
|
+
|
|
1373
|
+
# Return variables - track what variable types flow to the return
|
|
1374
|
+
elif ret.returns_variable and ret.variable_name:
|
|
1375
|
+
# The return type comes from the variable's type at that point
|
|
1376
|
+
# We mark this for later resolution when we know variable types
|
|
1377
|
+
bindings.add_return_info(
|
|
1378
|
+
qname,
|
|
1379
|
+
return_type=f"<var:{ret.variable_name}>",
|
|
1380
|
+
)
|
|
1381
|
+
|
|
1382
|
+
# Return literals have concrete types
|
|
1383
|
+
elif ret.returns_literal and ret.literal_type:
|
|
1384
|
+
bindings.add_return_info(qname, return_type=ret.literal_type)
|
|
1385
|
+
|
|
1386
|
+
# Return lambdas - track that this function returns a callable
|
|
1387
|
+
elif ret.returns_lambda:
|
|
1388
|
+
bindings.add_return_info(
|
|
1389
|
+
qname,
|
|
1390
|
+
returns_callable=f"{qname}.<lambda>",
|
|
1391
|
+
)
|
|
1392
|
+
|
|
1393
|
+
# Return comprehensions have list/dict/set types
|
|
1394
|
+
elif ret.returns_comprehension:
|
|
1395
|
+
# Infer type from expression
|
|
1396
|
+
if ret.expression_text:
|
|
1397
|
+
if ret.expression_text.startswith("{") and ":" in ret.expression_text:
|
|
1398
|
+
bindings.add_return_info(qname, return_type="dict")
|
|
1399
|
+
elif ret.expression_text.startswith("{"):
|
|
1400
|
+
bindings.add_return_info(qname, return_type="set")
|
|
1401
|
+
else:
|
|
1402
|
+
bindings.add_return_info(qname, return_type="list")
|
|
1403
|
+
|
|
1404
|
+
# Return None
|
|
1405
|
+
elif ret.returns_none:
|
|
1406
|
+
bindings.add_return_info(qname, return_type="None")
|