apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,1197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Call graph construction for inter-procedural analysis.
|
|
3
|
+
|
|
4
|
+
This module builds a directed graph of function calls across an entire project.
|
|
5
|
+
It uses the PyCG algorithm approach: track variable assignments to resolve
|
|
6
|
+
dynamic dispatch (method calls on objects whose type isn't statically known).
|
|
7
|
+
|
|
8
|
+
DESIGN PRINCIPLES:
|
|
9
|
+
1. Language-agnostic graph structure (nodes and edges)
|
|
10
|
+
2. Language-specific resolvers handle symbol resolution
|
|
11
|
+
3. Graph stored in NetworkX for rich algorithm support
|
|
12
|
+
4. Full provenance: every edge knows WHY it exists
|
|
13
|
+
|
|
14
|
+
ALGORITHM OVERVIEW (based on PyCG - ICSE 2021):
|
|
15
|
+
1. Build symbol table: all function/method/class definitions
|
|
16
|
+
2. Collect all call sites from parsed files
|
|
17
|
+
3. Track variable assignments to build type bindings
|
|
18
|
+
4. Use fixed-point iteration for transitive propagation
|
|
19
|
+
5. Resolve each call to zero, one, or multiple targets
|
|
20
|
+
6. Build edges with resolution confidence
|
|
21
|
+
|
|
22
|
+
HANDLES:
|
|
23
|
+
- Direct function calls: foo()
|
|
24
|
+
- Method calls: obj.method()
|
|
25
|
+
- Qualified calls: module.func()
|
|
26
|
+
- Constructor calls: MyClass()
|
|
27
|
+
- Calls through aliases: from x import y as z; z()
|
|
28
|
+
- Higher-order functions: functions passed as arguments
|
|
29
|
+
- Closures and lambdas with return tracking
|
|
30
|
+
- Dynamic dispatch (multiple possible targets)
|
|
31
|
+
- Decorators as calls
|
|
32
|
+
- self/cls implicit binding
|
|
33
|
+
- Chained attribute access
|
|
34
|
+
- Comprehension and generator calls
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import logging
|
|
40
|
+
from collections.abc import Iterator
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
from typing import TYPE_CHECKING, Any
|
|
43
|
+
|
|
44
|
+
from ..core.manifest import stable_id
|
|
45
|
+
from .binding_tracker import BindingTracker # noqa: F401
|
|
46
|
+
|
|
47
|
+
# Re-export everything so existing importers don't break
|
|
48
|
+
from .call_graph_types import ( # noqa: F401
|
|
49
|
+
PYTHON_BUILTINS,
|
|
50
|
+
CallContext,
|
|
51
|
+
CallGraphEdge,
|
|
52
|
+
CallGraphNode,
|
|
53
|
+
EdgeType,
|
|
54
|
+
NodeType,
|
|
55
|
+
ResolutionConfidence,
|
|
56
|
+
TypeBinding,
|
|
57
|
+
)
|
|
58
|
+
from .call_resolver import ( # noqa: F401
|
|
59
|
+
CallContextAnalyzer,
|
|
60
|
+
CallResolver,
|
|
61
|
+
DecoratorAnalyzer,
|
|
62
|
+
LambdaClosureTracker,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
import networkx as nx
|
|
67
|
+
except ImportError:
|
|
68
|
+
nx = None # type: ignore
|
|
69
|
+
|
|
70
|
+
if TYPE_CHECKING:
|
|
71
|
+
from ..parsing.base import (
|
|
72
|
+
ParsedCallSite,
|
|
73
|
+
ParsedClass,
|
|
74
|
+
ParsedFile,
|
|
75
|
+
ParsedFunction,
|
|
76
|
+
)
|
|
77
|
+
from ..parsing.services import AnalysisContext, TypeResolver
|
|
78
|
+
from .flow_analysis import FlowSensitiveBindings
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
logger = logging.getLogger(__name__)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# =============================================================================
|
|
85
|
+
# Call Graph Builder
|
|
86
|
+
# =============================================================================
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class CallGraphBuilder:
|
|
90
|
+
"""
|
|
91
|
+
Builds a complete call graph from parsed files.
|
|
92
|
+
|
|
93
|
+
Usage:
|
|
94
|
+
builder = CallGraphBuilder()
|
|
95
|
+
|
|
96
|
+
for parsed_file in parsed_files:
|
|
97
|
+
builder.add_file(parsed_file)
|
|
98
|
+
|
|
99
|
+
builder.resolve_calls(type_resolver)
|
|
100
|
+
graph = builder.build()
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, project_root: Path | None = None):
|
|
104
|
+
"""Initialize the builder."""
|
|
105
|
+
if nx is None:
|
|
106
|
+
raise ImportError(
|
|
107
|
+
"networkx is required for call graph construction. "
|
|
108
|
+
"Install it with: pip install networkx"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
self._project_root = project_root
|
|
112
|
+
|
|
113
|
+
# Symbol table
|
|
114
|
+
self._symbols: dict[str, CallGraphNode] = {}
|
|
115
|
+
|
|
116
|
+
# Index: (file_path, simple_name) -> [qualified_names] for O(1) lookup
|
|
117
|
+
self._file_name_index: dict[tuple[Path, str], list[str]] = {}
|
|
118
|
+
|
|
119
|
+
# Raw call sites (before resolution)
|
|
120
|
+
self._call_sites: list[tuple[ParsedCallSite, str | None, Path]] = []
|
|
121
|
+
|
|
122
|
+
# Binding tracker for dynamic dispatch (flow-insensitive)
|
|
123
|
+
self._bindings = BindingTracker()
|
|
124
|
+
|
|
125
|
+
# Flow-sensitive bindings (built during resolve_calls)
|
|
126
|
+
self._flow_bindings: FlowSensitiveBindings | None = None
|
|
127
|
+
|
|
128
|
+
# Import mappings
|
|
129
|
+
self._imports: dict[tuple[Path, str], str] = {}
|
|
130
|
+
|
|
131
|
+
# Star imports: file_path -> {local_name: qualified_name}
|
|
132
|
+
self._star_imports: dict[Path, dict[str, str]] = {}
|
|
133
|
+
|
|
134
|
+
# Class hierarchy
|
|
135
|
+
self._class_bases: dict[str, list[str]] = {}
|
|
136
|
+
|
|
137
|
+
# Context analyzer
|
|
138
|
+
self._context_analyzer = CallContextAnalyzer()
|
|
139
|
+
|
|
140
|
+
# Decorator analyzer
|
|
141
|
+
self._decorator_analyzer = DecoratorAnalyzer()
|
|
142
|
+
|
|
143
|
+
# Lambda/closure tracker
|
|
144
|
+
self._lambda_tracker = LambdaClosureTracker()
|
|
145
|
+
|
|
146
|
+
# Decorator calls (separate from regular calls)
|
|
147
|
+
self._decorator_calls: list[tuple[str, str, Path, int]] = []
|
|
148
|
+
|
|
149
|
+
# Resolved edges
|
|
150
|
+
self._edges: list[CallGraphEdge] = []
|
|
151
|
+
|
|
152
|
+
# The graph
|
|
153
|
+
self._graph: nx.DiGraph = nx.DiGraph()
|
|
154
|
+
|
|
155
|
+
# Parsed files for context analysis
|
|
156
|
+
self._parsed_files: dict[Path, ParsedFile] = {}
|
|
157
|
+
|
|
158
|
+
# Protocol/ABC tracking
|
|
159
|
+
self._known_protocols: set[str] = {
|
|
160
|
+
"typing.Protocol",
|
|
161
|
+
"abc.ABC",
|
|
162
|
+
"abc.ABCMeta",
|
|
163
|
+
"collections.abc.Iterable",
|
|
164
|
+
"collections.abc.Iterator",
|
|
165
|
+
"collections.abc.Callable",
|
|
166
|
+
"collections.abc.Mapping",
|
|
167
|
+
"collections.abc.Sequence",
|
|
168
|
+
"collections.abc.Set",
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
def add_file(self, parsed_file: ParsedFile) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Process a parsed file and extract symbols, calls, and bindings.
|
|
174
|
+
"""
|
|
175
|
+
if not parsed_file.success:
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
file_path = parsed_file.path
|
|
179
|
+
self._parsed_files[file_path] = parsed_file
|
|
180
|
+
|
|
181
|
+
# 1. Extract imports and build import map
|
|
182
|
+
self._process_imports(parsed_file)
|
|
183
|
+
|
|
184
|
+
# 2. Extract function definitions
|
|
185
|
+
for func in parsed_file.functions:
|
|
186
|
+
self._add_function(func, file_path, None)
|
|
187
|
+
|
|
188
|
+
# 3. Extract class definitions and methods
|
|
189
|
+
for cls in parsed_file.classes:
|
|
190
|
+
self._add_class(cls, file_path)
|
|
191
|
+
|
|
192
|
+
# 4. Extract call sites
|
|
193
|
+
for call in parsed_file.call_sites:
|
|
194
|
+
caller = self._get_caller(call, file_path)
|
|
195
|
+
self._call_sites.append((call, caller, file_path))
|
|
196
|
+
|
|
197
|
+
# 5. Extract assignments for binding tracking
|
|
198
|
+
self._process_assignments(parsed_file)
|
|
199
|
+
|
|
200
|
+
def _process_imports(self, parsed_file: ParsedFile) -> None:
|
|
201
|
+
"""Process imports to build import map."""
|
|
202
|
+
file_path = parsed_file.path
|
|
203
|
+
|
|
204
|
+
for imp in parsed_file.imports:
|
|
205
|
+
line = imp.location.line if imp.location else 0
|
|
206
|
+
|
|
207
|
+
# Handle star imports
|
|
208
|
+
if imp.is_from_import and "*" in imp.names:
|
|
209
|
+
# Star import - we need to know what's exported
|
|
210
|
+
# For now, mark it for later resolution
|
|
211
|
+
if file_path not in self._star_imports:
|
|
212
|
+
self._star_imports[file_path] = {}
|
|
213
|
+
# We'll try to resolve exported names from the module
|
|
214
|
+
self._bindings.add_import(
|
|
215
|
+
"*",
|
|
216
|
+
f"{imp.module}.*",
|
|
217
|
+
file_path,
|
|
218
|
+
line,
|
|
219
|
+
is_star=True,
|
|
220
|
+
)
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
if imp.is_from_import:
|
|
224
|
+
# from module import name [as alias]
|
|
225
|
+
for name in imp.names:
|
|
226
|
+
local_name = imp.alias if len(imp.names) == 1 and imp.alias else name
|
|
227
|
+
qualified = f"{imp.module}.{name}"
|
|
228
|
+
self._imports[(file_path, local_name)] = qualified
|
|
229
|
+
self._bindings.add_import(local_name, qualified, file_path, line)
|
|
230
|
+
else:
|
|
231
|
+
# import module [as alias]
|
|
232
|
+
local_name = imp.alias or imp.module.split(".")[-1]
|
|
233
|
+
self._imports[(file_path, local_name)] = imp.module
|
|
234
|
+
self._bindings.add_import(local_name, imp.module, file_path, line)
|
|
235
|
+
|
|
236
|
+
def _add_function(
|
|
237
|
+
self,
|
|
238
|
+
func: ParsedFunction,
|
|
239
|
+
file_path: Path,
|
|
240
|
+
enclosing_class: str | None,
|
|
241
|
+
) -> None:
|
|
242
|
+
"""Add a function to the symbol table."""
|
|
243
|
+
qname = func.qualified_name.full
|
|
244
|
+
|
|
245
|
+
# Determine node type
|
|
246
|
+
if func.binding != "free":
|
|
247
|
+
if func.binding == "static":
|
|
248
|
+
is_classmethod = any(d.name == "classmethod" for d in func.decorators)
|
|
249
|
+
node_type = NodeType.CLASS_METHOD if is_classmethod else NodeType.STATIC_METHOD
|
|
250
|
+
elif any(d.name == "property" for d in func.decorators):
|
|
251
|
+
node_type = NodeType.PROPERTY
|
|
252
|
+
elif func.name == "__init__":
|
|
253
|
+
node_type = NodeType.CONSTRUCTOR
|
|
254
|
+
else:
|
|
255
|
+
node_type = NodeType.METHOD
|
|
256
|
+
else:
|
|
257
|
+
# Check if it's a nested function (closure)
|
|
258
|
+
if "." in qname and func.binding == "free":
|
|
259
|
+
parts = qname.split(".")
|
|
260
|
+
# If parent is a function, this is a closure
|
|
261
|
+
parent = ".".join(parts[:-1])
|
|
262
|
+
if parent in self._symbols:
|
|
263
|
+
parent_node = self._symbols[parent]
|
|
264
|
+
if parent_node.node_type in (
|
|
265
|
+
NodeType.FUNCTION,
|
|
266
|
+
NodeType.METHOD,
|
|
267
|
+
NodeType.CLOSURE,
|
|
268
|
+
):
|
|
269
|
+
node_type = NodeType.CLOSURE
|
|
270
|
+
else:
|
|
271
|
+
node_type = NodeType.FUNCTION
|
|
272
|
+
else:
|
|
273
|
+
node_type = NodeType.FUNCTION
|
|
274
|
+
else:
|
|
275
|
+
node_type = NodeType.FUNCTION
|
|
276
|
+
|
|
277
|
+
node = CallGraphNode(
|
|
278
|
+
qualified_name=qname,
|
|
279
|
+
name=func.name,
|
|
280
|
+
node_type=node_type,
|
|
281
|
+
file_path=file_path,
|
|
282
|
+
line=func.location.line,
|
|
283
|
+
end_line=func.location.end_line or func.location.line,
|
|
284
|
+
class_name=func.owner_type,
|
|
285
|
+
class_qualified_name=func.class_qualified_name.full
|
|
286
|
+
if func.class_qualified_name
|
|
287
|
+
else None,
|
|
288
|
+
is_async=func.is_async,
|
|
289
|
+
is_generator=func.has_yield,
|
|
290
|
+
is_abstract=func.is_abstract,
|
|
291
|
+
parameters=[p.name for p in func.parameters],
|
|
292
|
+
return_type=func.return_type,
|
|
293
|
+
docstring=func.docstring,
|
|
294
|
+
decorators=[d.name for d in func.decorators],
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
self._symbols[qname] = node
|
|
298
|
+
|
|
299
|
+
if file_path:
|
|
300
|
+
idx_key = (file_path, func.name)
|
|
301
|
+
if idx_key not in self._file_name_index:
|
|
302
|
+
self._file_name_index[idx_key] = []
|
|
303
|
+
self._file_name_index[idx_key].append(qname)
|
|
304
|
+
|
|
305
|
+
# Handle self/cls binding for methods
|
|
306
|
+
if func.binding != "free" and func.parameters and enclosing_class:
|
|
307
|
+
first_param = func.parameters[0]
|
|
308
|
+
if first_param.name in ("self", "cls"):
|
|
309
|
+
self._bindings.add_self_binding(qname, enclosing_class, first_param.name)
|
|
310
|
+
|
|
311
|
+
# Add other parameters to binding tracker
|
|
312
|
+
for param in func.parameters:
|
|
313
|
+
# Skip self/cls which was already handled
|
|
314
|
+
if func.binding != "free" and param.name in ("self", "cls"):
|
|
315
|
+
continue
|
|
316
|
+
self._bindings.add_parameter(
|
|
317
|
+
param.name,
|
|
318
|
+
qname,
|
|
319
|
+
param.type_annotation,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Track return type for return value propagation
|
|
323
|
+
if func.return_type:
|
|
324
|
+
self._bindings.add_return_binding(qname, return_type=func.return_type)
|
|
325
|
+
|
|
326
|
+
# Extract decorator calls
|
|
327
|
+
if func.decorators:
|
|
328
|
+
dec_calls = self._decorator_analyzer.extract_decorator_calls(
|
|
329
|
+
qname, func.decorators, file_path
|
|
330
|
+
)
|
|
331
|
+
for dec_name, line, _call_type in dec_calls:
|
|
332
|
+
self._decorator_calls.append((qname, dec_name, file_path, line))
|
|
333
|
+
|
|
334
|
+
def _add_class(
|
|
335
|
+
self,
|
|
336
|
+
cls: ParsedClass,
|
|
337
|
+
file_path: Path,
|
|
338
|
+
) -> None:
|
|
339
|
+
"""Add a class and its methods to the symbol table."""
|
|
340
|
+
qname = cls.qualified_name.full
|
|
341
|
+
|
|
342
|
+
# Track class hierarchy
|
|
343
|
+
self._class_bases[qname] = cls.base_classes
|
|
344
|
+
|
|
345
|
+
# Check if this class implements a protocol/ABC
|
|
346
|
+
for base in cls.base_classes:
|
|
347
|
+
if base in self._known_protocols or base.endswith("Protocol"):
|
|
348
|
+
self._bindings.add_protocol_implementation(base, qname)
|
|
349
|
+
|
|
350
|
+
# Add constructor node for the class itself (for Class() calls)
|
|
351
|
+
class_node = CallGraphNode(
|
|
352
|
+
qualified_name=qname,
|
|
353
|
+
name=cls.name,
|
|
354
|
+
node_type=NodeType.CONSTRUCTOR,
|
|
355
|
+
file_path=file_path,
|
|
356
|
+
line=cls.location.line,
|
|
357
|
+
end_line=cls.location.end_line or cls.location.line,
|
|
358
|
+
docstring=cls.docstring,
|
|
359
|
+
decorators=[d.name for d in cls.decorators],
|
|
360
|
+
)
|
|
361
|
+
self._symbols[qname] = class_node
|
|
362
|
+
|
|
363
|
+
idx_key = (file_path, cls.name)
|
|
364
|
+
if idx_key not in self._file_name_index:
|
|
365
|
+
self._file_name_index[idx_key] = []
|
|
366
|
+
self._file_name_index[idx_key].append(qname)
|
|
367
|
+
|
|
368
|
+
# Extract decorator calls for class
|
|
369
|
+
if cls.decorators:
|
|
370
|
+
dec_calls = self._decorator_analyzer.extract_decorator_calls(
|
|
371
|
+
qname, cls.decorators, file_path
|
|
372
|
+
)
|
|
373
|
+
for dec_name, line, _call_type in dec_calls:
|
|
374
|
+
self._decorator_calls.append((qname, dec_name, file_path, line))
|
|
375
|
+
|
|
376
|
+
# Add methods with class context for self/cls binding
|
|
377
|
+
for method in cls.methods:
|
|
378
|
+
self._add_function(method, file_path, qname)
|
|
379
|
+
|
|
380
|
+
# Track class attributes
|
|
381
|
+
for attr_name, attr_info in getattr(cls, "attributes", {}).items():
|
|
382
|
+
if hasattr(attr_info, "type_annotation") and attr_info.type_annotation:
|
|
383
|
+
self._bindings.add_assignment(
|
|
384
|
+
attr_name,
|
|
385
|
+
attr_info.type_annotation,
|
|
386
|
+
file_path,
|
|
387
|
+
cls.location.line,
|
|
388
|
+
scope_class=qname,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
def _process_assignments(self, parsed_file: ParsedFile) -> None:
|
|
392
|
+
"""Process assignments to build type bindings."""
|
|
393
|
+
file_path = parsed_file.path
|
|
394
|
+
|
|
395
|
+
for assign in parsed_file.assignments:
|
|
396
|
+
scope_function = None
|
|
397
|
+
if assign.target_qualified_name:
|
|
398
|
+
# Determine scope
|
|
399
|
+
parts = assign.target_qualified_name.full.split(".")
|
|
400
|
+
if len(parts) > 1:
|
|
401
|
+
scope_function = ".".join(parts[:-1])
|
|
402
|
+
|
|
403
|
+
if assign.source_type == "call" and assign.source_call:
|
|
404
|
+
# x = SomeClass() -> x is bound to SomeClass
|
|
405
|
+
called = assign.source_call
|
|
406
|
+
|
|
407
|
+
# Try to resolve the called class
|
|
408
|
+
import_key = (file_path, called)
|
|
409
|
+
if import_key in self._imports:
|
|
410
|
+
called = self._imports[import_key]
|
|
411
|
+
elif called in self._symbols:
|
|
412
|
+
pass # Already qualified
|
|
413
|
+
|
|
414
|
+
self._bindings.add_assignment(
|
|
415
|
+
assign.target,
|
|
416
|
+
called,
|
|
417
|
+
file_path,
|
|
418
|
+
assign.location.line,
|
|
419
|
+
scope_function=scope_function,
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Also track that the variable holds the return value
|
|
423
|
+
self._bindings.add_assignment(
|
|
424
|
+
assign.target,
|
|
425
|
+
None,
|
|
426
|
+
file_path,
|
|
427
|
+
assign.location.line,
|
|
428
|
+
scope_function=scope_function,
|
|
429
|
+
source_variable=called, # For return type propagation
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
elif assign.source_type == "variable" and assign.source_value:
|
|
433
|
+
# x = y -> x has same types as y (transitive)
|
|
434
|
+
self._bindings.add_assignment(
|
|
435
|
+
assign.target,
|
|
436
|
+
None,
|
|
437
|
+
file_path,
|
|
438
|
+
assign.location.line,
|
|
439
|
+
scope_function=scope_function,
|
|
440
|
+
source_variable=assign.source_value,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
elif assign.source_type == "literal":
|
|
444
|
+
# x = "string" or x = 123
|
|
445
|
+
if assign.inferred_type:
|
|
446
|
+
self._bindings.add_assignment(
|
|
447
|
+
assign.target,
|
|
448
|
+
assign.inferred_type,
|
|
449
|
+
file_path,
|
|
450
|
+
assign.location.line,
|
|
451
|
+
scope_function=scope_function,
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
# Check if assigning a callable (function reference)
|
|
455
|
+
if assign.source_type == "variable":
|
|
456
|
+
source = assign.source_value
|
|
457
|
+
if source and source in self._symbols:
|
|
458
|
+
# Assigning a function to a variable
|
|
459
|
+
self._bindings.add_assignment(
|
|
460
|
+
assign.target,
|
|
461
|
+
None,
|
|
462
|
+
file_path,
|
|
463
|
+
assign.location.line,
|
|
464
|
+
scope_function=scope_function,
|
|
465
|
+
is_callable=True,
|
|
466
|
+
callable_target=source,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
def _get_caller(self, call: ParsedCallSite, file_path: Path | str) -> str:
|
|
470
|
+
"""Get the qualified name of the calling function."""
|
|
471
|
+
if call.caller_function:
|
|
472
|
+
return call.caller_function.full
|
|
473
|
+
# Module-level call - use file-qualified module name
|
|
474
|
+
p = file_path if isinstance(file_path, Path) else Path(str(file_path))
|
|
475
|
+
return f"<module:{p.stem}>"
|
|
476
|
+
|
|
477
|
+
def resolve_calls(
|
|
478
|
+
self,
|
|
479
|
+
type_resolver: TypeResolver | None = None,
|
|
480
|
+
flow_bindings: FlowSensitiveBindings | None = None,
|
|
481
|
+
) -> None:
|
|
482
|
+
"""
|
|
483
|
+
Resolve all collected call sites to their targets.
|
|
484
|
+
|
|
485
|
+
This should be called after all files have been added.
|
|
486
|
+
"""
|
|
487
|
+
# First, propagate flow-insensitive bindings transitively
|
|
488
|
+
iterations = self._bindings.propagate()
|
|
489
|
+
logger.debug("Binding propagation completed in %d iterations", iterations)
|
|
490
|
+
|
|
491
|
+
# Use pre-built flow bindings if provided, otherwise build them
|
|
492
|
+
if flow_bindings is not None:
|
|
493
|
+
self._flow_bindings = flow_bindings
|
|
494
|
+
else:
|
|
495
|
+
try:
|
|
496
|
+
from .flow_analysis import build_flow_sensitive_bindings
|
|
497
|
+
|
|
498
|
+
parsed_files_list = list(self._parsed_files.values())
|
|
499
|
+
self._flow_bindings = build_flow_sensitive_bindings(parsed_files_list)
|
|
500
|
+
logger.debug("Built flow-sensitive bindings for %d files", len(parsed_files_list))
|
|
501
|
+
except Exception as e:
|
|
502
|
+
logger.warning("Could not build flow-sensitive bindings: %s", e)
|
|
503
|
+
self._flow_bindings = None
|
|
504
|
+
|
|
505
|
+
# Extract control flow info for context analysis
|
|
506
|
+
for _file_path, parsed_file in self._parsed_files.items():
|
|
507
|
+
self._context_analyzer.extract_control_flow_from_file(parsed_file)
|
|
508
|
+
|
|
509
|
+
resolver = CallResolver(
|
|
510
|
+
self._symbols,
|
|
511
|
+
self._bindings,
|
|
512
|
+
type_resolver,
|
|
513
|
+
self._flow_bindings,
|
|
514
|
+
file_name_index=self._file_name_index,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Add import mappings to resolver
|
|
518
|
+
for key, value in self._imports.items():
|
|
519
|
+
resolver.add_import_mapping(key[0], key[1], value)
|
|
520
|
+
|
|
521
|
+
# Add star imports
|
|
522
|
+
for file_path, names in self._star_imports.items():
|
|
523
|
+
for local, qualified in names.items():
|
|
524
|
+
resolver.add_star_import(file_path, qualified.rsplit(".", 1)[0], {local: qualified})
|
|
525
|
+
|
|
526
|
+
# Add class hierarchy
|
|
527
|
+
for cls, bases in self._class_bases.items():
|
|
528
|
+
resolver.add_class_hierarchy(cls, bases)
|
|
529
|
+
|
|
530
|
+
# Resolve regular call sites
|
|
531
|
+
for call_site, caller, file_path in self._call_sites:
|
|
532
|
+
targets, confidence, reason = resolver.resolve(call_site, caller, file_path)
|
|
533
|
+
|
|
534
|
+
# Get context information with flow bindings
|
|
535
|
+
parsed_file = self._parsed_files.get(file_path)
|
|
536
|
+
if parsed_file:
|
|
537
|
+
context_info = self._context_analyzer.analyze_call_context(
|
|
538
|
+
call_site, parsed_file, self._flow_bindings
|
|
539
|
+
)
|
|
540
|
+
else:
|
|
541
|
+
context_info = {"context": CallContext.NORMAL}
|
|
542
|
+
|
|
543
|
+
# Create edge for each target
|
|
544
|
+
for target in targets:
|
|
545
|
+
edge = self._create_edge(
|
|
546
|
+
caller=caller,
|
|
547
|
+
callee=target,
|
|
548
|
+
edge_type=self._determine_edge_type(call_site),
|
|
549
|
+
file_path=file_path,
|
|
550
|
+
line=call_site.location.line,
|
|
551
|
+
column=call_site.location.column,
|
|
552
|
+
confidence=confidence,
|
|
553
|
+
targets=targets,
|
|
554
|
+
call_site=call_site,
|
|
555
|
+
context_info=context_info,
|
|
556
|
+
reason=reason,
|
|
557
|
+
)
|
|
558
|
+
self._edges.append(edge)
|
|
559
|
+
|
|
560
|
+
# Resolve decorator calls
|
|
561
|
+
for decorated, decorator_name, file_path, line in self._decorator_calls:
|
|
562
|
+
# Resolve the decorator
|
|
563
|
+
import_key = (file_path, decorator_name.split(".")[0])
|
|
564
|
+
resolved_decorator = decorator_name
|
|
565
|
+
if import_key in self._imports:
|
|
566
|
+
prefix = self._imports[import_key]
|
|
567
|
+
rest = ".".join(decorator_name.split(".")[1:])
|
|
568
|
+
resolved_decorator = f"{prefix}.{rest}" if rest else prefix
|
|
569
|
+
|
|
570
|
+
edge = CallGraphEdge(
|
|
571
|
+
caller=decorated,
|
|
572
|
+
callee=resolved_decorator,
|
|
573
|
+
edge_type=EdgeType.DECORATOR,
|
|
574
|
+
file_path=file_path,
|
|
575
|
+
line=line,
|
|
576
|
+
confidence=ResolutionConfidence.HIGH
|
|
577
|
+
if resolved_decorator in self._symbols
|
|
578
|
+
else ResolutionConfidence.MEDIUM,
|
|
579
|
+
resolution_reason="decorator application",
|
|
580
|
+
)
|
|
581
|
+
self._edges.append(edge)
|
|
582
|
+
|
|
583
|
+
# Post-resolution: propagate call edges through decorators so taint
|
|
584
|
+
# analysis can follow data across decorator boundaries.
|
|
585
|
+
self._propagate_through_decorators()
|
|
586
|
+
|
|
587
|
+
# Post-resolution: emit synthetic __enter__/__exit__ edges for
|
|
588
|
+
# context manager with-statements.
|
|
589
|
+
self._emit_context_manager_edges()
|
|
590
|
+
|
|
591
|
+
def _create_edge(
|
|
592
|
+
self,
|
|
593
|
+
caller: str,
|
|
594
|
+
callee: str,
|
|
595
|
+
edge_type: EdgeType,
|
|
596
|
+
file_path: Path,
|
|
597
|
+
line: int,
|
|
598
|
+
column: int,
|
|
599
|
+
confidence: ResolutionConfidence,
|
|
600
|
+
targets: list[str],
|
|
601
|
+
call_site: ParsedCallSite,
|
|
602
|
+
context_info: dict[str, Any],
|
|
603
|
+
reason: str,
|
|
604
|
+
) -> CallGraphEdge:
|
|
605
|
+
"""Create a call graph edge with full context."""
|
|
606
|
+
return CallGraphEdge(
|
|
607
|
+
caller=caller,
|
|
608
|
+
callee=callee,
|
|
609
|
+
edge_type=edge_type,
|
|
610
|
+
file_path=file_path,
|
|
611
|
+
line=line,
|
|
612
|
+
column=column,
|
|
613
|
+
confidence=confidence,
|
|
614
|
+
possible_callees=targets if len(targets) > 1 else [],
|
|
615
|
+
argument_count=len(call_site.arguments),
|
|
616
|
+
keyword_arguments=[a.name for a in call_site.arguments if a.name is not None],
|
|
617
|
+
has_spread=any(a.is_spread for a in call_site.arguments),
|
|
618
|
+
has_keyword_spread=any(a.is_keyword_spread for a in call_site.arguments),
|
|
619
|
+
context=context_info.get("context", CallContext.NORMAL),
|
|
620
|
+
in_loop=context_info.get("in_loop", False),
|
|
621
|
+
in_conditional=context_info.get("in_conditional", False),
|
|
622
|
+
in_try_block=context_info.get("in_try_block", False),
|
|
623
|
+
in_except_handler=context_info.get("in_except_handler", False),
|
|
624
|
+
in_comprehension=context_info.get("in_comprehension", False),
|
|
625
|
+
resolution_reason=reason,
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
def _determine_edge_type(self, call_site: ParsedCallSite) -> EdgeType:
|
|
629
|
+
"""Determine the type of call edge."""
|
|
630
|
+
if call_site.is_method_call:
|
|
631
|
+
receiver = call_site.receiver_expression or ""
|
|
632
|
+
if receiver == "super()":
|
|
633
|
+
return EdgeType.SUPER_CALL
|
|
634
|
+
return EdgeType.METHOD_CALL
|
|
635
|
+
|
|
636
|
+
callee = call_site.callee_name
|
|
637
|
+
|
|
638
|
+
# Check if it's a class instantiation (starts with uppercase)
|
|
639
|
+
if callee and callee[0].isupper():
|
|
640
|
+
return EdgeType.CONSTRUCTOR_CALL
|
|
641
|
+
|
|
642
|
+
# Check for static call (Class.method)
|
|
643
|
+
if "." in callee:
|
|
644
|
+
parts = callee.split(".")
|
|
645
|
+
if parts[0] and parts[0][0].isupper():
|
|
646
|
+
return EdgeType.STATIC_CALL
|
|
647
|
+
|
|
648
|
+
return EdgeType.DIRECT_CALL
|
|
649
|
+
|
|
650
|
+
# -----------------------------------------------------------------
|
|
651
|
+
# Decorator flow-through
|
|
652
|
+
# -----------------------------------------------------------------
|
|
653
|
+
|
|
654
|
+
_NON_PASSTHROUGH_DECORATORS: set[str] = {
|
|
655
|
+
"property",
|
|
656
|
+
"staticmethod",
|
|
657
|
+
"classmethod",
|
|
658
|
+
"builtins.property",
|
|
659
|
+
"builtins.staticmethod",
|
|
660
|
+
"builtins.classmethod",
|
|
661
|
+
"abc.abstractmethod",
|
|
662
|
+
"functools.cached_property",
|
|
663
|
+
"typing.overload",
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
def _propagate_through_decorators(self) -> None:
|
|
667
|
+
"""Emit synthetic CALLS edges so callers of a decorated name reach
|
|
668
|
+
the original function body.
|
|
669
|
+
|
|
670
|
+
For every function F decorated by D (DECORATOR edge F→D), any
|
|
671
|
+
external caller that calls the *decorated name* should also be
|
|
672
|
+
considered as calling F's body. We materialise this as a
|
|
673
|
+
synthetic DIRECT_CALL edge from D (the wrapper) to F.
|
|
674
|
+
|
|
675
|
+
Skips decorators known NOT to be pass-through (property,
|
|
676
|
+
staticmethod, classmethod, etc.).
|
|
677
|
+
"""
|
|
678
|
+
decorator_edges = [e for e in self._edges if e.edge_type == EdgeType.DECORATOR]
|
|
679
|
+
if not decorator_edges:
|
|
680
|
+
return
|
|
681
|
+
|
|
682
|
+
existing_pairs: set[tuple[str, str]] = {(e.caller, e.callee) for e in self._edges}
|
|
683
|
+
|
|
684
|
+
for dec_edge in decorator_edges:
|
|
685
|
+
decorated_func = dec_edge.caller # the function being decorated
|
|
686
|
+
decorator_name = dec_edge.callee # the decorator
|
|
687
|
+
|
|
688
|
+
# Skip non-pass-through decorators
|
|
689
|
+
simple_name = decorator_name.rsplit(".", 1)[-1]
|
|
690
|
+
if (
|
|
691
|
+
decorator_name in self._NON_PASSTHROUGH_DECORATORS
|
|
692
|
+
or simple_name in self._NON_PASSTHROUGH_DECORATORS
|
|
693
|
+
):
|
|
694
|
+
continue
|
|
695
|
+
|
|
696
|
+
# Emit: decorator_wrapper → decorated_function body
|
|
697
|
+
pair = (decorator_name, decorated_func)
|
|
698
|
+
if pair in existing_pairs:
|
|
699
|
+
continue
|
|
700
|
+
existing_pairs.add(pair)
|
|
701
|
+
|
|
702
|
+
synthetic = CallGraphEdge(
|
|
703
|
+
caller=decorator_name,
|
|
704
|
+
callee=decorated_func,
|
|
705
|
+
edge_type=EdgeType.DIRECT_CALL,
|
|
706
|
+
file_path=dec_edge.file_path,
|
|
707
|
+
line=dec_edge.line,
|
|
708
|
+
confidence=ResolutionConfidence.MEDIUM,
|
|
709
|
+
resolution_reason="decorator flow-through (synthetic)",
|
|
710
|
+
)
|
|
711
|
+
self._edges.append(synthetic)
|
|
712
|
+
|
|
713
|
+
logger.debug(
|
|
714
|
+
"Decorator flow-through: emitted %d synthetic edges",
|
|
715
|
+
len(self._edges) - len(existing_pairs),
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
# -----------------------------------------------------------------
|
|
719
|
+
# Context manager __enter__ / __exit__ modeling
|
|
720
|
+
# -----------------------------------------------------------------
|
|
721
|
+
|
|
722
|
+
def _emit_context_manager_edges(self) -> None:
|
|
723
|
+
"""Emit synthetic call edges for ``with`` statements.
|
|
724
|
+
|
|
725
|
+
For each ``with expr as x:`` inside a function, if we can resolve
|
|
726
|
+
*expr*'s type T (via bindings or imports), we emit two synthetic
|
|
727
|
+
CALLS edges: caller → T.__enter__ and caller → T.__exit__.
|
|
728
|
+
"""
|
|
729
|
+
for file_path, parsed_file in self._parsed_files.items():
|
|
730
|
+
for func in parsed_file.functions:
|
|
731
|
+
self._emit_cm_edges_for_function(func, file_path)
|
|
732
|
+
for cls in parsed_file.classes:
|
|
733
|
+
for method in cls.methods:
|
|
734
|
+
self._emit_cm_edges_for_function(method, file_path)
|
|
735
|
+
|
|
736
|
+
def _emit_cm_edges_for_function(
|
|
737
|
+
self,
|
|
738
|
+
func: ParsedFunction,
|
|
739
|
+
file_path: Path,
|
|
740
|
+
) -> None:
|
|
741
|
+
cf = getattr(func, "control_flow_info", None)
|
|
742
|
+
if not cf:
|
|
743
|
+
return
|
|
744
|
+
|
|
745
|
+
caller = func.qualified_name.full
|
|
746
|
+
with_blocks = cf.get("with_blocks", [])
|
|
747
|
+
if not with_blocks:
|
|
748
|
+
return
|
|
749
|
+
|
|
750
|
+
existing_pairs: set[tuple[str, str]] = {(e.caller, e.callee) for e in self._edges}
|
|
751
|
+
|
|
752
|
+
for wb in with_blocks:
|
|
753
|
+
line: int = wb.get("start_line", 0)
|
|
754
|
+
# Use all with_items if available, fall back to single context_expr
|
|
755
|
+
items: list[str] = wb.get("with_items", [])
|
|
756
|
+
if not items:
|
|
757
|
+
ctx_expr = wb.get("context_expr")
|
|
758
|
+
items = [ctx_expr] if ctx_expr else []
|
|
759
|
+
|
|
760
|
+
for ctx_expr in items:
|
|
761
|
+
possible_types = self._resolve_context_expr_type(ctx_expr, file_path, caller)
|
|
762
|
+
if not possible_types:
|
|
763
|
+
continue
|
|
764
|
+
|
|
765
|
+
for type_name in sorted(possible_types):
|
|
766
|
+
for dunder in ("__enter__", "__exit__"):
|
|
767
|
+
target = f"{type_name}.{dunder}"
|
|
768
|
+
pair = (caller, target)
|
|
769
|
+
if pair in existing_pairs:
|
|
770
|
+
continue
|
|
771
|
+
existing_pairs.add(pair)
|
|
772
|
+
|
|
773
|
+
confidence = (
|
|
774
|
+
ResolutionConfidence.HIGH
|
|
775
|
+
if target in self._symbols
|
|
776
|
+
else ResolutionConfidence.MEDIUM
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
edge = CallGraphEdge(
|
|
780
|
+
caller=caller,
|
|
781
|
+
callee=target,
|
|
782
|
+
edge_type=EdgeType.METHOD_CALL,
|
|
783
|
+
file_path=file_path,
|
|
784
|
+
line=line,
|
|
785
|
+
confidence=confidence,
|
|
786
|
+
resolution_reason=f"context manager {dunder} (synthetic)",
|
|
787
|
+
)
|
|
788
|
+
self._edges.append(edge)
|
|
789
|
+
|
|
790
|
+
def _resolve_context_expr_type(
|
|
791
|
+
self,
|
|
792
|
+
ctx_expr: str,
|
|
793
|
+
file_path: Path,
|
|
794
|
+
caller: str,
|
|
795
|
+
) -> set[str]:
|
|
796
|
+
"""Best-effort type resolution for a context-manager expression."""
|
|
797
|
+
types: set[str] = set()
|
|
798
|
+
|
|
799
|
+
# Direct symbol or import lookup (e.g. ``open(...)`` → skip,
|
|
800
|
+
# but ``SomeClass()`` or ``some_var`` may resolve).
|
|
801
|
+
base = ctx_expr.split("(")[0].split(".")[0].strip()
|
|
802
|
+
|
|
803
|
+
# Check imports
|
|
804
|
+
import_key = (file_path, base)
|
|
805
|
+
if import_key in self._imports:
|
|
806
|
+
types.add(self._imports[import_key])
|
|
807
|
+
|
|
808
|
+
# Check symbols table
|
|
809
|
+
if base in self._symbols:
|
|
810
|
+
types.add(base)
|
|
811
|
+
|
|
812
|
+
# Check bindings
|
|
813
|
+
bound = self._bindings.get_possible_types(base, file_path, caller)
|
|
814
|
+
types.update(bound)
|
|
815
|
+
|
|
816
|
+
return types
|
|
817
|
+
|
|
818
|
+
def build(self) -> CallGraph:
|
|
819
|
+
"""
|
|
820
|
+
Build and return the final call graph.
|
|
821
|
+
|
|
822
|
+
Should be called after resolve_calls().
|
|
823
|
+
"""
|
|
824
|
+
# Add nodes to graph
|
|
825
|
+
for qname, node in self._symbols.items():
|
|
826
|
+
self._graph.add_node(
|
|
827
|
+
qname,
|
|
828
|
+
name=node.name,
|
|
829
|
+
node_type=node.node_type.name,
|
|
830
|
+
file_path=str(node.file_path) if node.file_path else None,
|
|
831
|
+
line=node.line,
|
|
832
|
+
is_async=node.is_async,
|
|
833
|
+
is_generator=node.is_generator,
|
|
834
|
+
is_external=node.is_external,
|
|
835
|
+
class_name=node.class_name,
|
|
836
|
+
parameters=node.parameters,
|
|
837
|
+
return_type=node.return_type,
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
# Add edges to graph
|
|
841
|
+
for edge in self._edges:
|
|
842
|
+
# Ensure both nodes exist (add placeholder for unresolved)
|
|
843
|
+
if edge.caller not in self._graph:
|
|
844
|
+
self._graph.add_node(
|
|
845
|
+
edge.caller,
|
|
846
|
+
name=edge.caller.split(".")[-1],
|
|
847
|
+
node_type=NodeType.UNKNOWN.name,
|
|
848
|
+
is_external="<module:" in edge.caller,
|
|
849
|
+
)
|
|
850
|
+
if edge.callee not in self._graph:
|
|
851
|
+
self._graph.add_node(
|
|
852
|
+
edge.callee,
|
|
853
|
+
name=edge.callee.split(".")[-1],
|
|
854
|
+
node_type=NodeType.EXTERNAL.name
|
|
855
|
+
if "<unresolved>" not in edge.callee
|
|
856
|
+
else NodeType.UNKNOWN.name,
|
|
857
|
+
is_external=True,
|
|
858
|
+
)
|
|
859
|
+
|
|
860
|
+
self._graph.add_edge(
|
|
861
|
+
edge.caller,
|
|
862
|
+
edge.callee,
|
|
863
|
+
edge_type=edge.edge_type.name,
|
|
864
|
+
file_path=str(edge.file_path),
|
|
865
|
+
line=edge.line,
|
|
866
|
+
confidence=edge.confidence.name,
|
|
867
|
+
resolution_reason=edge.resolution_reason,
|
|
868
|
+
argument_count=edge.argument_count,
|
|
869
|
+
context=edge.context.name,
|
|
870
|
+
in_loop=edge.in_loop,
|
|
871
|
+
in_conditional=edge.in_conditional,
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
return CallGraph(
|
|
875
|
+
graph=self._graph,
|
|
876
|
+
symbols=self._symbols,
|
|
877
|
+
edges=self._edges,
|
|
878
|
+
project_root=self._project_root,
|
|
879
|
+
bindings=self._bindings,
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
# =============================================================================
|
|
884
|
+
# Call Graph
|
|
885
|
+
# =============================================================================
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
class CallGraph:
|
|
889
|
+
"""
|
|
890
|
+
A complete call graph for a project.
|
|
891
|
+
|
|
892
|
+
Provides query methods for analysis and conversion to manifest format.
|
|
893
|
+
"""
|
|
894
|
+
|
|
895
|
+
def __init__(
|
|
896
|
+
self,
|
|
897
|
+
graph: nx.DiGraph,
|
|
898
|
+
symbols: dict[str, CallGraphNode],
|
|
899
|
+
edges: list[CallGraphEdge],
|
|
900
|
+
project_root: Path | None = None,
|
|
901
|
+
bindings: BindingTracker | None = None,
|
|
902
|
+
):
|
|
903
|
+
self._graph = graph
|
|
904
|
+
self._symbols = symbols
|
|
905
|
+
self._edges = edges
|
|
906
|
+
self._project_root = project_root
|
|
907
|
+
self._bindings = bindings
|
|
908
|
+
|
|
909
|
+
# Pre-built indexes for O(1) edge lookups
|
|
910
|
+
self._edges_by_caller: dict[str, list[CallGraphEdge]] = {}
|
|
911
|
+
self._edges_by_callee: dict[str, list[CallGraphEdge]] = {}
|
|
912
|
+
for edge in self._edges:
|
|
913
|
+
self._edges_by_caller.setdefault(edge.caller, []).append(edge)
|
|
914
|
+
self._edges_by_callee.setdefault(edge.callee, []).append(edge)
|
|
915
|
+
|
|
916
|
+
@property
|
|
917
|
+
def graph(self) -> nx.DiGraph:
|
|
918
|
+
"""The underlying NetworkX graph."""
|
|
919
|
+
return self._graph
|
|
920
|
+
|
|
921
|
+
@property
|
|
922
|
+
def node_count(self) -> int:
|
|
923
|
+
"""Number of nodes (functions/methods)."""
|
|
924
|
+
return self._graph.number_of_nodes()
|
|
925
|
+
|
|
926
|
+
@property
|
|
927
|
+
def edge_count(self) -> int:
|
|
928
|
+
"""Number of edges (calls)."""
|
|
929
|
+
return self._graph.number_of_edges()
|
|
930
|
+
|
|
931
|
+
def get_node(self, qualified_name: str) -> CallGraphNode | None:
|
|
932
|
+
"""Get a node by qualified name."""
|
|
933
|
+
return self._symbols.get(qualified_name)
|
|
934
|
+
|
|
935
|
+
def get_edges_from(self, qualified_name: str) -> list[CallGraphEdge]:
|
|
936
|
+
"""Get all edges (calls) originating from a function."""
|
|
937
|
+
return self._edges_by_caller.get(qualified_name, [])
|
|
938
|
+
|
|
939
|
+
def get_edges_to(self, qualified_name: str) -> list[CallGraphEdge]:
|
|
940
|
+
"""Get all edges (calls) targeting a function."""
|
|
941
|
+
return self._edges_by_callee.get(qualified_name, [])
|
|
942
|
+
|
|
943
|
+
def get_callers(self, qualified_name: str) -> list[str]:
|
|
944
|
+
"""Get functions that call the given function."""
|
|
945
|
+
if qualified_name not in self._graph:
|
|
946
|
+
return []
|
|
947
|
+
return list(self._graph.predecessors(qualified_name))
|
|
948
|
+
|
|
949
|
+
def get_callees(self, qualified_name: str) -> list[str]:
|
|
950
|
+
"""Get functions called by the given function."""
|
|
951
|
+
if qualified_name not in self._graph:
|
|
952
|
+
return []
|
|
953
|
+
return list(self._graph.successors(qualified_name))
|
|
954
|
+
|
|
955
|
+
def get_all_callers(
|
|
956
|
+
self,
|
|
957
|
+
qualified_name: str,
|
|
958
|
+
max_depth: int = 10,
|
|
959
|
+
) -> set[str]:
|
|
960
|
+
"""Get all functions that transitively call the given function."""
|
|
961
|
+
from collections import deque
|
|
962
|
+
|
|
963
|
+
if qualified_name not in self._graph:
|
|
964
|
+
return set()
|
|
965
|
+
|
|
966
|
+
callers: set[str] = set()
|
|
967
|
+
to_visit: deque[tuple[str, int]] = deque([(qualified_name, 0)])
|
|
968
|
+
visited = {qualified_name}
|
|
969
|
+
|
|
970
|
+
while to_visit:
|
|
971
|
+
current, depth = to_visit.popleft()
|
|
972
|
+
if depth >= max_depth:
|
|
973
|
+
continue
|
|
974
|
+
for pred in self._graph.predecessors(current):
|
|
975
|
+
if pred not in visited:
|
|
976
|
+
visited.add(pred)
|
|
977
|
+
callers.add(pred)
|
|
978
|
+
to_visit.append((pred, depth + 1))
|
|
979
|
+
|
|
980
|
+
return callers
|
|
981
|
+
|
|
982
|
+
def get_all_callees(
|
|
983
|
+
self,
|
|
984
|
+
qualified_name: str,
|
|
985
|
+
max_depth: int = 10,
|
|
986
|
+
) -> set[str]:
|
|
987
|
+
"""Get all functions transitively called by the given function."""
|
|
988
|
+
from collections import deque
|
|
989
|
+
|
|
990
|
+
if qualified_name not in self._graph:
|
|
991
|
+
return set()
|
|
992
|
+
|
|
993
|
+
callees: set[str] = set()
|
|
994
|
+
to_visit: deque[tuple[str, int]] = deque([(qualified_name, 0)])
|
|
995
|
+
visited = {qualified_name}
|
|
996
|
+
|
|
997
|
+
while to_visit:
|
|
998
|
+
current, depth = to_visit.popleft()
|
|
999
|
+
if depth >= max_depth:
|
|
1000
|
+
continue
|
|
1001
|
+
for succ in self._graph.successors(current):
|
|
1002
|
+
if succ not in visited:
|
|
1003
|
+
visited.add(succ)
|
|
1004
|
+
callees.add(succ)
|
|
1005
|
+
to_visit.append((succ, depth + 1))
|
|
1006
|
+
|
|
1007
|
+
return callees
|
|
1008
|
+
|
|
1009
|
+
def get_path(
|
|
1010
|
+
self,
|
|
1011
|
+
source: str,
|
|
1012
|
+
target: str,
|
|
1013
|
+
) -> list[str] | None:
|
|
1014
|
+
"""Get a call path from source to target, if one exists."""
|
|
1015
|
+
if source not in self._graph or target not in self._graph:
|
|
1016
|
+
return None
|
|
1017
|
+
|
|
1018
|
+
try:
|
|
1019
|
+
return nx.shortest_path(self._graph, source, target)
|
|
1020
|
+
except nx.NetworkXNoPath:
|
|
1021
|
+
return None
|
|
1022
|
+
|
|
1023
|
+
def get_all_paths(
|
|
1024
|
+
self,
|
|
1025
|
+
source: str,
|
|
1026
|
+
target: str,
|
|
1027
|
+
max_depth: int = 10,
|
|
1028
|
+
) -> Iterator[list[str]]:
|
|
1029
|
+
"""Get all paths from source to target up to max depth."""
|
|
1030
|
+
if source not in self._graph or target not in self._graph:
|
|
1031
|
+
return iter([])
|
|
1032
|
+
|
|
1033
|
+
return nx.all_simple_paths(self._graph, source, target, cutoff=max_depth)
|
|
1034
|
+
|
|
1035
|
+
def get_strongly_connected_components(self) -> list[set[str]]:
|
|
1036
|
+
"""Get strongly connected components (recursive call cycles)."""
|
|
1037
|
+
return [set(c) for c in nx.strongly_connected_components(self._graph)]
|
|
1038
|
+
|
|
1039
|
+
def get_entry_points(self) -> list[str]:
|
|
1040
|
+
"""Get functions with no callers (potential entry points)."""
|
|
1041
|
+
return [
|
|
1042
|
+
node
|
|
1043
|
+
for node in self._graph.nodes()
|
|
1044
|
+
if self._graph.in_degree(node) == 0
|
|
1045
|
+
and not self._symbols.get(node, CallGraphNode("", "", NodeType.UNKNOWN)).is_external
|
|
1046
|
+
and not node.startswith("<module:")
|
|
1047
|
+
]
|
|
1048
|
+
|
|
1049
|
+
def get_leaf_functions(self) -> list[str]:
|
|
1050
|
+
"""Get functions that don't call anything."""
|
|
1051
|
+
return [node for node in self._graph.nodes() if self._graph.out_degree(node) == 0]
|
|
1052
|
+
|
|
1053
|
+
def get_edges_by_type(self, edge_type: EdgeType) -> list[CallGraphEdge]:
|
|
1054
|
+
"""Get all edges of a specific type."""
|
|
1055
|
+
return [e for e in self._edges if e.edge_type == edge_type]
|
|
1056
|
+
|
|
1057
|
+
def get_edges_by_confidence(
|
|
1058
|
+
self,
|
|
1059
|
+
min_confidence: ResolutionConfidence,
|
|
1060
|
+
) -> list[CallGraphEdge]:
|
|
1061
|
+
"""Get edges with at least the given confidence."""
|
|
1062
|
+
confidence_order = [
|
|
1063
|
+
ResolutionConfidence.EXACT,
|
|
1064
|
+
ResolutionConfidence.HIGH,
|
|
1065
|
+
ResolutionConfidence.MEDIUM,
|
|
1066
|
+
ResolutionConfidence.LOW,
|
|
1067
|
+
ResolutionConfidence.UNRESOLVED,
|
|
1068
|
+
]
|
|
1069
|
+
min_idx = confidence_order.index(min_confidence)
|
|
1070
|
+
|
|
1071
|
+
return [e for e in self._edges if confidence_order.index(e.confidence) <= min_idx]
|
|
1072
|
+
|
|
1073
|
+
def get_unresolved_calls(self) -> list[CallGraphEdge]:
|
|
1074
|
+
"""Get all unresolved call edges."""
|
|
1075
|
+
return [e for e in self._edges if e.confidence == ResolutionConfidence.UNRESOLVED]
|
|
1076
|
+
|
|
1077
|
+
def get_statistics(self) -> dict[str, Any]:
|
|
1078
|
+
"""Get statistics about the call graph."""
|
|
1079
|
+
internal_nodes = [
|
|
1080
|
+
n for n, data in self._graph.nodes(data=True) if not data.get("is_external", False)
|
|
1081
|
+
]
|
|
1082
|
+
external_nodes = [
|
|
1083
|
+
n for n, data in self._graph.nodes(data=True) if data.get("is_external", False)
|
|
1084
|
+
]
|
|
1085
|
+
|
|
1086
|
+
confidence_counts: dict[str, int] = {}
|
|
1087
|
+
edge_type_counts: dict[str, int] = {}
|
|
1088
|
+
context_counts: dict[str, int] = {}
|
|
1089
|
+
|
|
1090
|
+
for edge in self._edges:
|
|
1091
|
+
conf_key = edge.confidence.name
|
|
1092
|
+
confidence_counts[conf_key] = confidence_counts.get(conf_key, 0) + 1
|
|
1093
|
+
|
|
1094
|
+
type_key = edge.edge_type.name
|
|
1095
|
+
edge_type_counts[type_key] = edge_type_counts.get(type_key, 0) + 1
|
|
1096
|
+
|
|
1097
|
+
ctx_key = edge.context.name
|
|
1098
|
+
context_counts[ctx_key] = context_counts.get(ctx_key, 0) + 1
|
|
1099
|
+
|
|
1100
|
+
return {
|
|
1101
|
+
"total_nodes": self.node_count,
|
|
1102
|
+
"internal_nodes": len(internal_nodes),
|
|
1103
|
+
"external_nodes": len(external_nodes),
|
|
1104
|
+
"total_edges": self.edge_count,
|
|
1105
|
+
"entry_points": len(self.get_entry_points()),
|
|
1106
|
+
"leaf_functions": len(self.get_leaf_functions()),
|
|
1107
|
+
"resolution_confidence": confidence_counts,
|
|
1108
|
+
"edge_types": edge_type_counts,
|
|
1109
|
+
"call_contexts": context_counts,
|
|
1110
|
+
"strongly_connected_components": len(self.get_strongly_connected_components()),
|
|
1111
|
+
"decorator_calls": len([e for e in self._edges if e.edge_type == EdgeType.DECORATOR]),
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
def to_manifest_calls(self) -> list[dict[str, Any]]:
|
|
1115
|
+
"""
|
|
1116
|
+
Convert to manifest FunctionCallModel format.
|
|
1117
|
+
|
|
1118
|
+
Returns a list of call dictionaries suitable for the manifest.
|
|
1119
|
+
"""
|
|
1120
|
+
calls = []
|
|
1121
|
+
|
|
1122
|
+
for edge in self._edges:
|
|
1123
|
+
call_dict = {
|
|
1124
|
+
"id": stable_id(
|
|
1125
|
+
"call", edge.caller, str(edge.file_path), str(edge.line), str(edge.column)
|
|
1126
|
+
),
|
|
1127
|
+
"caller": edge.caller,
|
|
1128
|
+
"callee": edge.callee,
|
|
1129
|
+
"callee_resolved": edge.confidence != ResolutionConfidence.UNRESOLVED,
|
|
1130
|
+
"location": {
|
|
1131
|
+
"file": str(edge.file_path),
|
|
1132
|
+
"line": edge.line,
|
|
1133
|
+
"column": edge.column,
|
|
1134
|
+
},
|
|
1135
|
+
"arguments": [], # Would need argument details
|
|
1136
|
+
"context": {
|
|
1137
|
+
"in_try_block": edge.in_try_block,
|
|
1138
|
+
"in_conditional": edge.in_conditional,
|
|
1139
|
+
"in_loop": edge.in_loop,
|
|
1140
|
+
},
|
|
1141
|
+
"possible_callees": edge.possible_callees,
|
|
1142
|
+
"resolution_reason": edge.resolution_reason,
|
|
1143
|
+
}
|
|
1144
|
+
calls.append(call_dict)
|
|
1145
|
+
|
|
1146
|
+
return calls
|
|
1147
|
+
|
|
1148
|
+
|
|
1149
|
+
# =============================================================================
|
|
1150
|
+
# Convenience Functions
|
|
1151
|
+
# =============================================================================
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
def build_call_graph(
|
|
1155
|
+
parsed_files: list[ParsedFile],
|
|
1156
|
+
project_root: Path | None = None,
|
|
1157
|
+
type_resolver: TypeResolver | None = None,
|
|
1158
|
+
flow_bindings: FlowSensitiveBindings | None = None,
|
|
1159
|
+
) -> CallGraph:
|
|
1160
|
+
"""
|
|
1161
|
+
Build a call graph from parsed files.
|
|
1162
|
+
|
|
1163
|
+
Args:
|
|
1164
|
+
parsed_files: List of successfully parsed files
|
|
1165
|
+
project_root: Project root directory
|
|
1166
|
+
type_resolver: Optional type resolver for better resolution
|
|
1167
|
+
flow_bindings: Pre-built flow-sensitive bindings (avoids duplicate work)
|
|
1168
|
+
|
|
1169
|
+
Returns:
|
|
1170
|
+
Complete CallGraph instance
|
|
1171
|
+
"""
|
|
1172
|
+
builder = CallGraphBuilder(project_root)
|
|
1173
|
+
|
|
1174
|
+
for parsed in parsed_files:
|
|
1175
|
+
builder.add_file(parsed)
|
|
1176
|
+
|
|
1177
|
+
builder.resolve_calls(type_resolver, flow_bindings=flow_bindings)
|
|
1178
|
+
return builder.build()
|
|
1179
|
+
|
|
1180
|
+
|
|
1181
|
+
def build_call_graph_with_context(
|
|
1182
|
+
parsed_files: list[ParsedFile],
|
|
1183
|
+
context: AnalysisContext,
|
|
1184
|
+
flow_bindings: FlowSensitiveBindings | None = None,
|
|
1185
|
+
) -> CallGraph:
|
|
1186
|
+
"""
|
|
1187
|
+
Build a call graph using an analysis context.
|
|
1188
|
+
|
|
1189
|
+
This is the preferred method when using the new abstractions.
|
|
1190
|
+
"""
|
|
1191
|
+
type_resolver = context.type_resolver if context else None
|
|
1192
|
+
return build_call_graph(
|
|
1193
|
+
parsed_files,
|
|
1194
|
+
project_root=context.project_root if context else None,
|
|
1195
|
+
type_resolver=type_resolver,
|
|
1196
|
+
flow_bindings=flow_bindings,
|
|
1197
|
+
)
|