apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,1544 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LibCST visitors for Python code analysis.
|
|
3
|
+
|
|
4
|
+
This module contains visitors that traverse Python CST and extract:
|
|
5
|
+
- Function/method definitions
|
|
6
|
+
- Class definitions
|
|
7
|
+
- Import statements
|
|
8
|
+
- Function calls
|
|
9
|
+
- Assignments
|
|
10
|
+
- Decorators
|
|
11
|
+
|
|
12
|
+
These are low-level extraction visitors. Higher-level semantic analysis
|
|
13
|
+
is done in the parser module.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import libcst as cst
|
|
23
|
+
|
|
24
|
+
from ...core.types import CodeLocation
|
|
25
|
+
from .expression_utils import (
|
|
26
|
+
collect_name_nodes,
|
|
27
|
+
detect_concatenation,
|
|
28
|
+
detect_container_type,
|
|
29
|
+
detect_format_call,
|
|
30
|
+
detect_fstring,
|
|
31
|
+
extract_value_metadata,
|
|
32
|
+
is_classvar_annotation,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Re-export dataclasses so existing ``from .visitors import ...`` still works.
|
|
36
|
+
from .extraction_types import ( # noqa: F401 – re-exports
|
|
37
|
+
ExtractedArgument,
|
|
38
|
+
ExtractedAssignment,
|
|
39
|
+
ExtractedCall,
|
|
40
|
+
ExtractedClass,
|
|
41
|
+
ExtractedControlFlowBlock,
|
|
42
|
+
ExtractedDecorator,
|
|
43
|
+
ExtractedField,
|
|
44
|
+
ExtractedFunction,
|
|
45
|
+
ExtractedImport,
|
|
46
|
+
ExtractedParameter,
|
|
47
|
+
ExtractedReturn,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# =============================================================================
|
|
51
|
+
# Position Tracking Mixin
|
|
52
|
+
# =============================================================================
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class PositionProvider:
|
|
56
|
+
"""Mixin for tracking positions in source code."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, source: str) -> None:
|
|
59
|
+
self._source = source
|
|
60
|
+
self._line_offsets = self._compute_line_offsets(source)
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def _compute_line_offsets(source: str) -> list[int]:
|
|
64
|
+
"""Compute byte offset of each line."""
|
|
65
|
+
offsets = [0]
|
|
66
|
+
for i, char in enumerate(source):
|
|
67
|
+
if char == "\n":
|
|
68
|
+
offsets.append(i + 1)
|
|
69
|
+
return offsets
|
|
70
|
+
|
|
71
|
+
def _get_position(self, node: cst.CSTNode) -> tuple[int, int, int, int]:
|
|
72
|
+
"""Get (line, column, end_line, end_column) for a node."""
|
|
73
|
+
# LibCST tracks positions if we have a wrapper
|
|
74
|
+
try:
|
|
75
|
+
pos = self._wrapper.resolve(cst.metadata.PositionProvider)[node]
|
|
76
|
+
return (
|
|
77
|
+
pos.start.line,
|
|
78
|
+
pos.start.column,
|
|
79
|
+
pos.end.line,
|
|
80
|
+
pos.end.column,
|
|
81
|
+
)
|
|
82
|
+
except (KeyError, AttributeError):
|
|
83
|
+
return (0, 0, 0, 0)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# =============================================================================
|
|
87
|
+
# Main Extraction Visitor
|
|
88
|
+
# =============================================================================
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class PythonExtractor(cst.CSTVisitor):
|
|
92
|
+
"""
|
|
93
|
+
Visitor that extracts structural information from Python CST.
|
|
94
|
+
|
|
95
|
+
Extracts:
|
|
96
|
+
- Functions and methods
|
|
97
|
+
- Classes
|
|
98
|
+
- Imports
|
|
99
|
+
- Function calls
|
|
100
|
+
- Assignments
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(
|
|
104
|
+
self,
|
|
105
|
+
source: str,
|
|
106
|
+
file_path: Path | None = None,
|
|
107
|
+
module_name: str | None = None,
|
|
108
|
+
) -> None:
|
|
109
|
+
self._source = source
|
|
110
|
+
self._file_path = file_path
|
|
111
|
+
self._module_name = module_name or ""
|
|
112
|
+
self._wrapper: cst.MetadataWrapper | None = None
|
|
113
|
+
|
|
114
|
+
# Extraction results
|
|
115
|
+
self.functions: list[ExtractedFunction] = []
|
|
116
|
+
self.classes: list[ExtractedClass] = []
|
|
117
|
+
self.imports: list[ExtractedImport] = []
|
|
118
|
+
self.calls: list[ExtractedCall] = []
|
|
119
|
+
self.assignments: list[ExtractedAssignment] = []
|
|
120
|
+
self.module_docstring: str | None = None
|
|
121
|
+
self.module_variables: list[str] = []
|
|
122
|
+
|
|
123
|
+
# Context tracking
|
|
124
|
+
self._current_class: ExtractedClass | None = None
|
|
125
|
+
self._current_function: ExtractedFunction | None = None
|
|
126
|
+
self._class_stack: list[ExtractedClass] = []
|
|
127
|
+
self._function_stack: list[ExtractedFunction] = []
|
|
128
|
+
|
|
129
|
+
# Control flow context tracking
|
|
130
|
+
self._in_loop: int = 0 # Nesting depth
|
|
131
|
+
self._in_conditional: int = 0
|
|
132
|
+
self._in_try: bool = False
|
|
133
|
+
self._in_except: bool = False
|
|
134
|
+
self._in_finally: bool = False
|
|
135
|
+
self._in_with: bool = False
|
|
136
|
+
self._in_comprehension: bool = False
|
|
137
|
+
|
|
138
|
+
# Control flow blocks for current function
|
|
139
|
+
self._current_control_flow: list[ExtractedControlFlowBlock] = []
|
|
140
|
+
|
|
141
|
+
# Position tracking
|
|
142
|
+
self._positions: dict[int, tuple[int, int, int, int]] = {}
|
|
143
|
+
|
|
144
|
+
def set_metadata_wrapper(self, wrapper: cst.MetadataWrapper) -> None:
|
|
145
|
+
"""Set the metadata wrapper for position tracking."""
|
|
146
|
+
self._wrapper = wrapper
|
|
147
|
+
|
|
148
|
+
# =========================================================================
|
|
149
|
+
# Import Extraction
|
|
150
|
+
# =========================================================================
|
|
151
|
+
|
|
152
|
+
def visit_Import(self, node: cst.Import) -> bool:
|
|
153
|
+
"""Extract 'import x' statements."""
|
|
154
|
+
for name_item in node.names if isinstance(node.names, Sequence) else []:
|
|
155
|
+
if isinstance(name_item, cst.ImportAlias):
|
|
156
|
+
module_name = self._get_dotted_name(name_item.name)
|
|
157
|
+
alias = None
|
|
158
|
+
if name_item.asname and isinstance(name_item.asname, cst.AsName):
|
|
159
|
+
alias = self._node_to_code(name_item.asname.name)
|
|
160
|
+
|
|
161
|
+
self.imports.append(
|
|
162
|
+
ExtractedImport(
|
|
163
|
+
module=module_name,
|
|
164
|
+
names=[(module_name, alias)],
|
|
165
|
+
is_from_import=False,
|
|
166
|
+
line=self._get_line(node),
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
def visit_ImportFrom(self, node: cst.ImportFrom) -> bool:
|
|
172
|
+
"""Extract 'from x import y' statements."""
|
|
173
|
+
# Calculate relative import level
|
|
174
|
+
relative_level = 0
|
|
175
|
+
if node.relative:
|
|
176
|
+
for dot in node.relative:
|
|
177
|
+
if isinstance(dot, cst.Dot):
|
|
178
|
+
relative_level += 1
|
|
179
|
+
|
|
180
|
+
# Get module name
|
|
181
|
+
module = ""
|
|
182
|
+
if node.module:
|
|
183
|
+
module = self._get_dotted_name(node.module)
|
|
184
|
+
|
|
185
|
+
is_relative = relative_level > 0
|
|
186
|
+
|
|
187
|
+
# Get imported names
|
|
188
|
+
names: list[tuple[str, str | None]] = []
|
|
189
|
+
if isinstance(node.names, cst.ImportStar):
|
|
190
|
+
names = [("*", None)]
|
|
191
|
+
elif isinstance(node.names, Sequence):
|
|
192
|
+
for name_item in node.names:
|
|
193
|
+
if isinstance(name_item, cst.ImportAlias):
|
|
194
|
+
name = self._get_dotted_name(name_item.name)
|
|
195
|
+
alias = None
|
|
196
|
+
if name_item.asname and isinstance(name_item.asname, cst.AsName):
|
|
197
|
+
alias = self._node_to_code(name_item.asname.name)
|
|
198
|
+
names.append((name, alias))
|
|
199
|
+
|
|
200
|
+
self.imports.append(
|
|
201
|
+
ExtractedImport(
|
|
202
|
+
module=module,
|
|
203
|
+
names=names,
|
|
204
|
+
is_from_import=True,
|
|
205
|
+
is_relative=is_relative,
|
|
206
|
+
relative_level=relative_level,
|
|
207
|
+
line=self._get_line(node),
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
# =========================================================================
|
|
213
|
+
# Function Extraction
|
|
214
|
+
# =========================================================================
|
|
215
|
+
|
|
216
|
+
def visit_FunctionDef(self, node: cst.FunctionDef) -> bool:
|
|
217
|
+
"""Extract function definitions."""
|
|
218
|
+
func = self._extract_function(node)
|
|
219
|
+
|
|
220
|
+
if self._current_class:
|
|
221
|
+
func.owner_type = self._current_class.name
|
|
222
|
+
if any(d.name in ("staticmethod", "classmethod") for d in func.decorators):
|
|
223
|
+
func.binding = "static"
|
|
224
|
+
else:
|
|
225
|
+
func.binding = "instance"
|
|
226
|
+
self._current_class.methods.append(func)
|
|
227
|
+
else:
|
|
228
|
+
self.functions.append(func)
|
|
229
|
+
|
|
230
|
+
# Push onto stack for nested context
|
|
231
|
+
self._function_stack.append(func)
|
|
232
|
+
self._current_function = func
|
|
233
|
+
|
|
234
|
+
# Reset control flow tracking for this function
|
|
235
|
+
self._current_control_flow = []
|
|
236
|
+
|
|
237
|
+
return True # Continue visiting to extract calls/assignments in body
|
|
238
|
+
|
|
239
|
+
def leave_FunctionDef(self, node: cst.FunctionDef) -> None:
|
|
240
|
+
"""Leave function definition."""
|
|
241
|
+
# Store control flow info before leaving
|
|
242
|
+
if self._current_function and self._current_control_flow:
|
|
243
|
+
self._current_function.control_flow_info = self._build_control_flow_info()
|
|
244
|
+
|
|
245
|
+
if self._function_stack:
|
|
246
|
+
self._function_stack.pop()
|
|
247
|
+
self._current_function = self._function_stack[-1] if self._function_stack else None
|
|
248
|
+
|
|
249
|
+
# Restore control flow tracking for enclosing function
|
|
250
|
+
self._current_control_flow = []
|
|
251
|
+
|
|
252
|
+
def _build_control_flow_info(self) -> dict[str, Any]:
|
|
253
|
+
"""Build control flow info dictionary from collected blocks."""
|
|
254
|
+
if_blocks: list[dict[str, Any]] = []
|
|
255
|
+
loops: list[dict[str, Any]] = []
|
|
256
|
+
try_blocks: list[dict[str, Any]] = []
|
|
257
|
+
with_blocks: list[dict[str, Any]] = []
|
|
258
|
+
comprehensions: list[dict[str, Any]] = []
|
|
259
|
+
|
|
260
|
+
for block in self._current_control_flow:
|
|
261
|
+
if block.block_type == "if":
|
|
262
|
+
if_blocks.append(
|
|
263
|
+
{
|
|
264
|
+
"start_line": block.start_line,
|
|
265
|
+
"end_line": block.end_line,
|
|
266
|
+
"has_elif": block.has_elif,
|
|
267
|
+
"has_else": block.has_else,
|
|
268
|
+
"elif_lines": block.elif_lines,
|
|
269
|
+
"else_line": block.else_line,
|
|
270
|
+
}
|
|
271
|
+
)
|
|
272
|
+
elif block.block_type in ("for", "while"):
|
|
273
|
+
loops.append(
|
|
274
|
+
{
|
|
275
|
+
"loop_type": block.block_type,
|
|
276
|
+
"start_line": block.start_line,
|
|
277
|
+
"end_line": block.end_line,
|
|
278
|
+
"has_break": block.has_break,
|
|
279
|
+
"has_continue": block.has_continue,
|
|
280
|
+
}
|
|
281
|
+
)
|
|
282
|
+
elif block.block_type == "try":
|
|
283
|
+
try_blocks.append(
|
|
284
|
+
{
|
|
285
|
+
"try_start": block.start_line,
|
|
286
|
+
"try_end": block.end_line,
|
|
287
|
+
"except_blocks": [
|
|
288
|
+
{"start_line": s, "end_line": e} for s, e in block.except_blocks
|
|
289
|
+
],
|
|
290
|
+
"finally_block": (
|
|
291
|
+
{
|
|
292
|
+
"start_line": block.finally_block[0],
|
|
293
|
+
"end_line": block.finally_block[1],
|
|
294
|
+
}
|
|
295
|
+
if block.finally_block
|
|
296
|
+
else None
|
|
297
|
+
),
|
|
298
|
+
}
|
|
299
|
+
)
|
|
300
|
+
elif block.block_type == "with":
|
|
301
|
+
with_blocks.append(
|
|
302
|
+
{
|
|
303
|
+
"start_line": block.start_line,
|
|
304
|
+
"end_line": block.end_line,
|
|
305
|
+
"context_expr": block.context_expr,
|
|
306
|
+
"with_items": block.with_items,
|
|
307
|
+
}
|
|
308
|
+
)
|
|
309
|
+
elif block.block_type == "comprehension":
|
|
310
|
+
comprehensions.append(
|
|
311
|
+
{
|
|
312
|
+
"line": block.start_line,
|
|
313
|
+
}
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
return {
|
|
317
|
+
"if_blocks": if_blocks,
|
|
318
|
+
"loops": loops,
|
|
319
|
+
"try_blocks": try_blocks,
|
|
320
|
+
"with_blocks": with_blocks,
|
|
321
|
+
"comprehensions": comprehensions,
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
def _extract_function(self, node: cst.FunctionDef) -> ExtractedFunction:
|
|
325
|
+
"""Extract all information from a function definition."""
|
|
326
|
+
name = node.name.value
|
|
327
|
+
|
|
328
|
+
# Build qualified name
|
|
329
|
+
qualified_parts = [self._module_name] if self._module_name else []
|
|
330
|
+
for cls in self._class_stack:
|
|
331
|
+
qualified_parts.append(cls.name)
|
|
332
|
+
qualified_parts.append(name)
|
|
333
|
+
qualified_name = ".".join(filter(None, qualified_parts))
|
|
334
|
+
|
|
335
|
+
# Extract parameters
|
|
336
|
+
params = self._extract_parameters(node.params)
|
|
337
|
+
|
|
338
|
+
# Extract return annotation
|
|
339
|
+
return_annotation = None
|
|
340
|
+
if node.returns:
|
|
341
|
+
return_annotation = self._node_to_code(node.returns.annotation)
|
|
342
|
+
|
|
343
|
+
# Extract decorators
|
|
344
|
+
decorators = self._extract_decorators(node.decorators)
|
|
345
|
+
|
|
346
|
+
# Extract docstring
|
|
347
|
+
docstring = self._extract_docstring(node.body)
|
|
348
|
+
|
|
349
|
+
# Get position
|
|
350
|
+
line, col, end_line, _ = self._get_node_position(node)
|
|
351
|
+
|
|
352
|
+
# Extract local variables
|
|
353
|
+
local_vars = self._extract_local_variables(node.body)
|
|
354
|
+
|
|
355
|
+
# Capture body source for content hashing
|
|
356
|
+
body_source = self._node_to_code(node.body)
|
|
357
|
+
|
|
358
|
+
return ExtractedFunction(
|
|
359
|
+
name=name,
|
|
360
|
+
qualified_name=qualified_name,
|
|
361
|
+
parameters=params,
|
|
362
|
+
return_annotation=return_annotation,
|
|
363
|
+
decorators=decorators,
|
|
364
|
+
is_async=isinstance(node.asynchronous, cst.Asynchronous),
|
|
365
|
+
docstring=docstring,
|
|
366
|
+
body_source=body_source or None,
|
|
367
|
+
line=line,
|
|
368
|
+
end_line=end_line,
|
|
369
|
+
column=col,
|
|
370
|
+
local_variables=local_vars,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
def _extract_parameters(self, params: cst.Parameters) -> list[ExtractedParameter]:
|
|
374
|
+
"""Extract function parameters."""
|
|
375
|
+
result: list[ExtractedParameter] = []
|
|
376
|
+
|
|
377
|
+
# Positional-only parameters (before /)
|
|
378
|
+
for param in params.posonly_params:
|
|
379
|
+
result.append(self._extract_single_param(param, is_positional_only=True))
|
|
380
|
+
|
|
381
|
+
# Regular parameters
|
|
382
|
+
for param in params.params:
|
|
383
|
+
result.append(self._extract_single_param(param))
|
|
384
|
+
|
|
385
|
+
# *args
|
|
386
|
+
if params.star_arg and isinstance(params.star_arg, cst.Param):
|
|
387
|
+
result.append(self._extract_single_param(params.star_arg, is_variadic=True))
|
|
388
|
+
|
|
389
|
+
# Keyword-only parameters (after *)
|
|
390
|
+
for param in params.kwonly_params:
|
|
391
|
+
result.append(self._extract_single_param(param, is_keyword_only=True))
|
|
392
|
+
|
|
393
|
+
# **kwargs
|
|
394
|
+
if params.star_kwarg:
|
|
395
|
+
result.append(self._extract_single_param(params.star_kwarg, is_keyword_variadic=True))
|
|
396
|
+
|
|
397
|
+
return result
|
|
398
|
+
|
|
399
|
+
def _extract_single_param(
|
|
400
|
+
self,
|
|
401
|
+
param: cst.Param,
|
|
402
|
+
is_variadic: bool = False,
|
|
403
|
+
is_keyword_variadic: bool = False,
|
|
404
|
+
is_positional_only: bool = False,
|
|
405
|
+
is_keyword_only: bool = False,
|
|
406
|
+
) -> ExtractedParameter:
|
|
407
|
+
"""Extract a single parameter."""
|
|
408
|
+
name = param.name.value
|
|
409
|
+
|
|
410
|
+
annotation = None
|
|
411
|
+
if param.annotation:
|
|
412
|
+
annotation = self._node_to_code(param.annotation.annotation)
|
|
413
|
+
|
|
414
|
+
default = None
|
|
415
|
+
if param.default:
|
|
416
|
+
default = self._node_to_code(param.default)
|
|
417
|
+
|
|
418
|
+
return ExtractedParameter(
|
|
419
|
+
name=name,
|
|
420
|
+
annotation=annotation,
|
|
421
|
+
default=default,
|
|
422
|
+
is_variadic=is_variadic,
|
|
423
|
+
is_keyword_variadic=is_keyword_variadic,
|
|
424
|
+
is_positional_only=is_positional_only,
|
|
425
|
+
is_keyword_only=is_keyword_only,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
def _extract_local_variables(self, body: cst.BaseSuite) -> list[str]:
|
|
429
|
+
"""Extract local variable names from function body."""
|
|
430
|
+
variables: list[str] = []
|
|
431
|
+
|
|
432
|
+
if isinstance(body, cst.IndentedBlock):
|
|
433
|
+
for stmt in body.body:
|
|
434
|
+
if isinstance(stmt, cst.SimpleStatementLine):
|
|
435
|
+
for item in stmt.body:
|
|
436
|
+
if isinstance(item, (cst.Assign, cst.AnnAssign)):
|
|
437
|
+
vars_in_stmt = self._get_assignment_targets(item)
|
|
438
|
+
variables.extend(vars_in_stmt)
|
|
439
|
+
|
|
440
|
+
return list(set(variables))
|
|
441
|
+
|
|
442
|
+
def _get_assignment_targets(self, node: cst.Assign | cst.AnnAssign) -> list[str]:
|
|
443
|
+
"""Get variable names from an assignment."""
|
|
444
|
+
names: list[str] = []
|
|
445
|
+
|
|
446
|
+
if isinstance(node, cst.AnnAssign):
|
|
447
|
+
if isinstance(node.target, cst.Name):
|
|
448
|
+
names.append(node.target.value)
|
|
449
|
+
elif isinstance(node, cst.Assign):
|
|
450
|
+
for target in node.targets:
|
|
451
|
+
if isinstance(target.target, cst.Name):
|
|
452
|
+
names.append(target.target.value)
|
|
453
|
+
elif isinstance(target.target, cst.Tuple):
|
|
454
|
+
# Unpacking: a, b = ...
|
|
455
|
+
for element in target.target.elements:
|
|
456
|
+
if isinstance(element.value, cst.Name):
|
|
457
|
+
names.append(element.value.value)
|
|
458
|
+
|
|
459
|
+
return names
|
|
460
|
+
|
|
461
|
+
# =========================================================================
|
|
462
|
+
# Class Extraction
|
|
463
|
+
# =========================================================================
|
|
464
|
+
|
|
465
|
+
def visit_ClassDef(self, node: cst.ClassDef) -> bool:
|
|
466
|
+
"""Extract class definitions."""
|
|
467
|
+
cls = self._extract_class(node)
|
|
468
|
+
|
|
469
|
+
if self._current_class:
|
|
470
|
+
# Nested class - not common but handle it
|
|
471
|
+
pass
|
|
472
|
+
else:
|
|
473
|
+
self.classes.append(cls)
|
|
474
|
+
|
|
475
|
+
# Push onto stack
|
|
476
|
+
self._class_stack.append(cls)
|
|
477
|
+
self._current_class = cls
|
|
478
|
+
|
|
479
|
+
return True # Continue to extract methods
|
|
480
|
+
|
|
481
|
+
def leave_ClassDef(self, node: cst.ClassDef) -> None:
|
|
482
|
+
"""Leave class definition."""
|
|
483
|
+
if self._class_stack:
|
|
484
|
+
self._class_stack.pop()
|
|
485
|
+
self._current_class = self._class_stack[-1] if self._class_stack else None
|
|
486
|
+
|
|
487
|
+
def _extract_class(self, node: cst.ClassDef) -> ExtractedClass:
|
|
488
|
+
"""Extract all information from a class definition."""
|
|
489
|
+
name = node.name.value
|
|
490
|
+
|
|
491
|
+
# Build qualified name
|
|
492
|
+
qualified_parts = [self._module_name] if self._module_name else []
|
|
493
|
+
for cls in self._class_stack:
|
|
494
|
+
qualified_parts.append(cls.name)
|
|
495
|
+
qualified_parts.append(name)
|
|
496
|
+
qualified_name = ".".join(filter(None, qualified_parts))
|
|
497
|
+
|
|
498
|
+
# Extract base classes
|
|
499
|
+
bases: list[str] = []
|
|
500
|
+
if node.bases:
|
|
501
|
+
for arg in node.bases:
|
|
502
|
+
if isinstance(arg, cst.Arg):
|
|
503
|
+
bases.append(self._node_to_code(arg.value))
|
|
504
|
+
|
|
505
|
+
# Extract decorators
|
|
506
|
+
decorators = self._extract_decorators(node.decorators)
|
|
507
|
+
|
|
508
|
+
# Check for special decorators
|
|
509
|
+
is_dataclass = any(d.name == "dataclass" for d in decorators)
|
|
510
|
+
is_pydantic = any("BaseModel" in b or "BaseSettings" in b for b in bases) or any(
|
|
511
|
+
d.name in {"validator", "field_validator"} for d in decorators
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Extract docstring
|
|
515
|
+
docstring = self._extract_docstring(node.body)
|
|
516
|
+
|
|
517
|
+
# Extract fields (for Pydantic/dataclass)
|
|
518
|
+
fields = self._extract_class_fields(node.body)
|
|
519
|
+
|
|
520
|
+
# Get position
|
|
521
|
+
line, col, end_line, _ = self._get_node_position(node)
|
|
522
|
+
|
|
523
|
+
return ExtractedClass(
|
|
524
|
+
name=name,
|
|
525
|
+
qualified_name=qualified_name,
|
|
526
|
+
bases=bases,
|
|
527
|
+
decorators=decorators,
|
|
528
|
+
docstring=docstring,
|
|
529
|
+
is_dataclass=is_dataclass,
|
|
530
|
+
is_pydantic_model=is_pydantic,
|
|
531
|
+
fields=fields,
|
|
532
|
+
line=line,
|
|
533
|
+
end_line=end_line,
|
|
534
|
+
column=col,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
_FRAMEWORK_INTERNAL_NAMES = frozenset(
|
|
538
|
+
{
|
|
539
|
+
"model_config",
|
|
540
|
+
"model_fields",
|
|
541
|
+
"model_computed_fields",
|
|
542
|
+
"model_extra",
|
|
543
|
+
"model_fields_set",
|
|
544
|
+
"__private_attributes__",
|
|
545
|
+
"__class_vars__",
|
|
546
|
+
"__validators__",
|
|
547
|
+
"__pre_root_validators__",
|
|
548
|
+
"__post_root_validators__",
|
|
549
|
+
}
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
def _extract_class_fields(self, body: cst.BaseSuite) -> list[ExtractedField]:
|
|
553
|
+
"""Extract class-level field definitions."""
|
|
554
|
+
fields: list[ExtractedField] = []
|
|
555
|
+
|
|
556
|
+
if not isinstance(body, cst.IndentedBlock):
|
|
557
|
+
return fields
|
|
558
|
+
|
|
559
|
+
for stmt in body.body:
|
|
560
|
+
if isinstance(stmt, cst.SimpleStatementLine):
|
|
561
|
+
for item in stmt.body:
|
|
562
|
+
if isinstance(item, cst.AnnAssign):
|
|
563
|
+
if isinstance(item.target, cst.Name):
|
|
564
|
+
field_name = item.target.value
|
|
565
|
+
|
|
566
|
+
if field_name in self._FRAMEWORK_INTERNAL_NAMES:
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
annotation = self._node_to_code(item.annotation.annotation)
|
|
570
|
+
|
|
571
|
+
if is_classvar_annotation(item.annotation.annotation):
|
|
572
|
+
continue
|
|
573
|
+
|
|
574
|
+
default = None
|
|
575
|
+
field_info: dict[str, Any] = {}
|
|
576
|
+
|
|
577
|
+
if item.value:
|
|
578
|
+
default_code = self._node_to_code(item.value)
|
|
579
|
+
default = default_code
|
|
580
|
+
|
|
581
|
+
# Check for Field() calls
|
|
582
|
+
if isinstance(item.value, cst.Call):
|
|
583
|
+
callee = self._node_to_code(item.value.func)
|
|
584
|
+
if callee in {"Field", "field"}:
|
|
585
|
+
field_info = self._extract_field_call_args(item.value)
|
|
586
|
+
|
|
587
|
+
fields.append(
|
|
588
|
+
ExtractedField(
|
|
589
|
+
name=field_name,
|
|
590
|
+
annotation=annotation,
|
|
591
|
+
default=default,
|
|
592
|
+
field_info=field_info,
|
|
593
|
+
)
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
return fields
|
|
597
|
+
|
|
598
|
+
def _extract_field_call_args(self, call: cst.Call) -> dict[str, Any]:
|
|
599
|
+
"""Extract arguments from Field() call."""
|
|
600
|
+
result: dict[str, Any] = {}
|
|
601
|
+
|
|
602
|
+
for arg in call.args:
|
|
603
|
+
if arg.keyword:
|
|
604
|
+
key = arg.keyword.value
|
|
605
|
+
value = self._node_to_code(arg.value)
|
|
606
|
+
result[key] = value
|
|
607
|
+
|
|
608
|
+
return result
|
|
609
|
+
|
|
610
|
+
# =========================================================================
|
|
611
|
+
# Decorator Extraction
|
|
612
|
+
# =========================================================================
|
|
613
|
+
|
|
614
|
+
def _extract_decorators(self, decorators: Sequence[cst.Decorator]) -> list[ExtractedDecorator]:
|
|
615
|
+
"""Extract decorator information."""
|
|
616
|
+
result: list[ExtractedDecorator] = []
|
|
617
|
+
|
|
618
|
+
for dec in decorators:
|
|
619
|
+
extracted = self._extract_single_decorator(dec)
|
|
620
|
+
if extracted:
|
|
621
|
+
result.append(extracted)
|
|
622
|
+
|
|
623
|
+
return result
|
|
624
|
+
|
|
625
|
+
def _extract_single_decorator(self, decorator: cst.Decorator) -> ExtractedDecorator | None:
|
|
626
|
+
"""Extract a single decorator."""
|
|
627
|
+
dec_node = decorator.decorator
|
|
628
|
+
|
|
629
|
+
# Handle @name
|
|
630
|
+
if isinstance(dec_node, cst.Name):
|
|
631
|
+
return ExtractedDecorator(
|
|
632
|
+
name=dec_node.value,
|
|
633
|
+
full_name=dec_node.value,
|
|
634
|
+
arguments={},
|
|
635
|
+
positional_args=[],
|
|
636
|
+
location=self._make_location(decorator),
|
|
637
|
+
raw_source=self._node_to_code(decorator),
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Handle @name(...)
|
|
641
|
+
if isinstance(dec_node, cst.Call):
|
|
642
|
+
func = dec_node.func
|
|
643
|
+
|
|
644
|
+
# Get name
|
|
645
|
+
if isinstance(func, cst.Name):
|
|
646
|
+
name = func.value
|
|
647
|
+
full_name = func.value
|
|
648
|
+
elif isinstance(func, cst.Attribute):
|
|
649
|
+
name = func.attr.value
|
|
650
|
+
full_name = self._node_to_code(func)
|
|
651
|
+
else:
|
|
652
|
+
return None
|
|
653
|
+
|
|
654
|
+
# Extract arguments
|
|
655
|
+
kwargs: dict[str, Any] = {}
|
|
656
|
+
positional: list[Any] = []
|
|
657
|
+
|
|
658
|
+
for arg in dec_node.args:
|
|
659
|
+
if arg.keyword:
|
|
660
|
+
# Keyword argument
|
|
661
|
+
key = arg.keyword.value
|
|
662
|
+
value = self._extract_literal_or_code(arg.value)
|
|
663
|
+
kwargs[key] = value
|
|
664
|
+
else:
|
|
665
|
+
# Positional argument
|
|
666
|
+
value = self._extract_literal_or_code(arg.value)
|
|
667
|
+
positional.append(value)
|
|
668
|
+
|
|
669
|
+
return ExtractedDecorator(
|
|
670
|
+
name=name,
|
|
671
|
+
full_name=full_name,
|
|
672
|
+
arguments=kwargs,
|
|
673
|
+
positional_args=positional,
|
|
674
|
+
location=self._make_location(decorator),
|
|
675
|
+
raw_source=self._node_to_code(decorator),
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
# Handle @obj.method or @obj.method(...)
|
|
679
|
+
if isinstance(dec_node, cst.Attribute):
|
|
680
|
+
full_name = self._node_to_code(dec_node)
|
|
681
|
+
name = dec_node.attr.value
|
|
682
|
+
|
|
683
|
+
return ExtractedDecorator(
|
|
684
|
+
name=name,
|
|
685
|
+
full_name=full_name,
|
|
686
|
+
arguments={},
|
|
687
|
+
positional_args=[],
|
|
688
|
+
location=self._make_location(decorator),
|
|
689
|
+
raw_source=self._node_to_code(decorator),
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
return None
|
|
693
|
+
|
|
694
|
+
# =========================================================================
|
|
695
|
+
# Call Extraction
|
|
696
|
+
# =========================================================================
|
|
697
|
+
|
|
698
|
+
def visit_Call(self, node: cst.Call) -> bool:
|
|
699
|
+
"""Extract function calls."""
|
|
700
|
+
call = self._extract_call(node)
|
|
701
|
+
if call:
|
|
702
|
+
self.calls.append(call)
|
|
703
|
+
return True # Continue visiting nested calls
|
|
704
|
+
|
|
705
|
+
def _extract_call(self, node: cst.Call) -> ExtractedCall | None:
|
|
706
|
+
"""Extract a function call."""
|
|
707
|
+
# Get callee expression
|
|
708
|
+
callee = self._node_to_code(node.func)
|
|
709
|
+
|
|
710
|
+
# Determine if method call and get receiver
|
|
711
|
+
is_method_call = False
|
|
712
|
+
receiver = None
|
|
713
|
+
if isinstance(node.func, cst.Attribute):
|
|
714
|
+
is_method_call = True
|
|
715
|
+
receiver = self._node_to_code(node.func.value)
|
|
716
|
+
|
|
717
|
+
# Extract arguments
|
|
718
|
+
arguments = self._extract_call_arguments(node.args)
|
|
719
|
+
|
|
720
|
+
# Get position
|
|
721
|
+
line, col, end_line, _ = self._get_node_position(node)
|
|
722
|
+
|
|
723
|
+
# Get enclosing function
|
|
724
|
+
in_function = self._current_function.qualified_name if self._current_function else None
|
|
725
|
+
|
|
726
|
+
return ExtractedCall(
|
|
727
|
+
callee=callee,
|
|
728
|
+
arguments=arguments,
|
|
729
|
+
line=line,
|
|
730
|
+
column=col,
|
|
731
|
+
end_line=end_line,
|
|
732
|
+
in_function=in_function,
|
|
733
|
+
is_method_call=is_method_call,
|
|
734
|
+
receiver=receiver,
|
|
735
|
+
# Control flow context
|
|
736
|
+
in_loop=self._in_loop > 0,
|
|
737
|
+
in_conditional=self._in_conditional > 0,
|
|
738
|
+
in_try=self._in_try,
|
|
739
|
+
in_except=self._in_except,
|
|
740
|
+
in_finally=self._in_finally,
|
|
741
|
+
in_with=self._in_with,
|
|
742
|
+
in_comprehension=self._in_comprehension,
|
|
743
|
+
loop_depth=self._in_loop,
|
|
744
|
+
conditional_depth=self._in_conditional,
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
def _extract_call_arguments(self, args: Sequence[cst.Arg]) -> list[ExtractedArgument]:
|
|
748
|
+
"""Extract call arguments with full structural evidence."""
|
|
749
|
+
result: list[ExtractedArgument] = []
|
|
750
|
+
pos = 0
|
|
751
|
+
|
|
752
|
+
for arg in args:
|
|
753
|
+
keyword = None
|
|
754
|
+
position = None
|
|
755
|
+
is_starred = False
|
|
756
|
+
is_double_starred = False
|
|
757
|
+
|
|
758
|
+
if arg.keyword:
|
|
759
|
+
keyword = arg.keyword.value
|
|
760
|
+
elif arg.star == "":
|
|
761
|
+
position = pos
|
|
762
|
+
pos += 1
|
|
763
|
+
|
|
764
|
+
if arg.star == "*":
|
|
765
|
+
is_starred = True
|
|
766
|
+
elif arg.star == "**":
|
|
767
|
+
is_double_starred = True
|
|
768
|
+
|
|
769
|
+
value_source = self._node_to_code(arg.value)
|
|
770
|
+
literal_info = self._extract_literal_info(arg.value)
|
|
771
|
+
name_info = self._extract_name_info(arg.value)
|
|
772
|
+
|
|
773
|
+
is_fstr = detect_fstring(arg.value)
|
|
774
|
+
is_concat = detect_concatenation(arg.value)
|
|
775
|
+
is_fmt = detect_format_call(arg.value)
|
|
776
|
+
container = detect_container_type(arg.value)
|
|
777
|
+
src_vars = collect_name_nodes(arg.value)
|
|
778
|
+
|
|
779
|
+
is_call_res = isinstance(arg.value, cst.Call)
|
|
780
|
+
called_func = None
|
|
781
|
+
if is_call_res:
|
|
782
|
+
called_func = self._node_to_code(arg.value.func)
|
|
783
|
+
|
|
784
|
+
result.append(
|
|
785
|
+
ExtractedArgument(
|
|
786
|
+
position=position,
|
|
787
|
+
keyword=keyword,
|
|
788
|
+
value_source=value_source,
|
|
789
|
+
is_literal=literal_info[0],
|
|
790
|
+
literal_value=literal_info[1],
|
|
791
|
+
literal_type=literal_info[2],
|
|
792
|
+
is_name=name_info[0],
|
|
793
|
+
name_value=name_info[1],
|
|
794
|
+
is_starred=is_starred,
|
|
795
|
+
is_double_starred=is_double_starred,
|
|
796
|
+
is_string_interpolation=is_fstr,
|
|
797
|
+
is_concatenation=is_concat,
|
|
798
|
+
is_format_call=is_fmt,
|
|
799
|
+
container_type=container,
|
|
800
|
+
source_variables=src_vars,
|
|
801
|
+
is_call_result=is_call_res,
|
|
802
|
+
called_function=called_func,
|
|
803
|
+
)
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
return result
|
|
807
|
+
|
|
808
|
+
def _extract_literal_info(self, node: cst.BaseExpression) -> tuple[bool, Any, str | None]:
|
|
809
|
+
"""Extract literal value if node is a literal."""
|
|
810
|
+
if isinstance(node, cst.Integer):
|
|
811
|
+
return True, int(node.value), "int"
|
|
812
|
+
if isinstance(node, cst.Float):
|
|
813
|
+
return True, float(node.value), "float"
|
|
814
|
+
if isinstance(node, (cst.SimpleString, cst.FormattedString, cst.ConcatenatedString)):
|
|
815
|
+
# Extract string value
|
|
816
|
+
try:
|
|
817
|
+
value = self._extract_string_value(node)
|
|
818
|
+
return True, value, "str"
|
|
819
|
+
except Exception:
|
|
820
|
+
return True, self._node_to_code(node), "str"
|
|
821
|
+
if isinstance(node, cst.Name):
|
|
822
|
+
if node.value == "True":
|
|
823
|
+
return True, True, "bool"
|
|
824
|
+
if node.value == "False":
|
|
825
|
+
return True, False, "bool"
|
|
826
|
+
if node.value == "None":
|
|
827
|
+
return True, None, "None"
|
|
828
|
+
if isinstance(node, cst.List):
|
|
829
|
+
return True, self._node_to_code(node), "list"
|
|
830
|
+
if isinstance(node, cst.Dict):
|
|
831
|
+
return True, self._node_to_code(node), "dict"
|
|
832
|
+
if isinstance(node, cst.Tuple):
|
|
833
|
+
return True, self._node_to_code(node), "tuple"
|
|
834
|
+
|
|
835
|
+
return False, None, None
|
|
836
|
+
|
|
837
|
+
def _extract_string_value(self, node: cst.BaseExpression) -> str:
|
|
838
|
+
"""Extract the actual string value from a string node."""
|
|
839
|
+
if isinstance(node, cst.SimpleString):
|
|
840
|
+
# Remove quotes and handle escape sequences
|
|
841
|
+
raw = node.value
|
|
842
|
+
# Determine quote style
|
|
843
|
+
if raw.startswith('"""') or raw.startswith("'''"):
|
|
844
|
+
return raw[3:-3]
|
|
845
|
+
elif raw.startswith('"') or raw.startswith("'"):
|
|
846
|
+
return raw[1:-1]
|
|
847
|
+
# Handle prefixes like r"...", f"...", etc.
|
|
848
|
+
for i, c in enumerate(raw):
|
|
849
|
+
if c in "\"'":
|
|
850
|
+
if raw[i : i + 3] in ('"""', "'''"):
|
|
851
|
+
return raw[i + 3 : -3]
|
|
852
|
+
return raw[i + 1 : -1]
|
|
853
|
+
return self._node_to_code(node)
|
|
854
|
+
|
|
855
|
+
def _extract_name_info(self, node: cst.BaseExpression) -> tuple[bool, str | None]:
|
|
856
|
+
"""Extract name if node is a simple name reference."""
|
|
857
|
+
if isinstance(node, cst.Name) and node.value not in {"True", "False", "None"}:
|
|
858
|
+
return True, node.value
|
|
859
|
+
return False, None
|
|
860
|
+
|
|
861
|
+
# =========================================================================
|
|
862
|
+
# Assignment Extraction
|
|
863
|
+
# =========================================================================
|
|
864
|
+
|
|
865
|
+
def visit_Assign(self, node: cst.Assign) -> bool:
|
|
866
|
+
"""Extract simple assignments, including tuple unpacking."""
|
|
867
|
+
for target in node.targets:
|
|
868
|
+
# Handle tuple/list unpacking: a, b = value or [a, b] = value
|
|
869
|
+
if isinstance(target.target, (cst.Tuple, cst.List)):
|
|
870
|
+
self._extract_unpacking_assignment(target.target, node.value)
|
|
871
|
+
else:
|
|
872
|
+
assignment = self._extract_assignment(target.target, node.value)
|
|
873
|
+
if assignment:
|
|
874
|
+
self.assignments.append(assignment)
|
|
875
|
+
return True
|
|
876
|
+
|
|
877
|
+
def _extract_unpacking_assignment(
|
|
878
|
+
self,
|
|
879
|
+
target: cst.Tuple | cst.List,
|
|
880
|
+
value: cst.BaseExpression,
|
|
881
|
+
) -> None:
|
|
882
|
+
"""
|
|
883
|
+
Extract assignments from tuple/list unpacking.
|
|
884
|
+
|
|
885
|
+
Handles:
|
|
886
|
+
- a, b = 1, 2
|
|
887
|
+
- a, b = func()
|
|
888
|
+
- a, *rest, b = [1, 2, 3, 4]
|
|
889
|
+
"""
|
|
890
|
+
# Get all target elements
|
|
891
|
+
elements = target.elements
|
|
892
|
+
value_source = self._node_to_code(value)
|
|
893
|
+
|
|
894
|
+
for i, element in enumerate(elements):
|
|
895
|
+
# Handle StarredElement for *rest patterns
|
|
896
|
+
if isinstance(element, cst.StarredElement):
|
|
897
|
+
inner = element.value
|
|
898
|
+
else:
|
|
899
|
+
inner = element.value if hasattr(element, "value") else element
|
|
900
|
+
|
|
901
|
+
# Extract the target name
|
|
902
|
+
target_name = self._extract_assignment_target(inner)
|
|
903
|
+
if not target_name:
|
|
904
|
+
continue
|
|
905
|
+
|
|
906
|
+
# Get position
|
|
907
|
+
line = self._get_line(inner)
|
|
908
|
+
|
|
909
|
+
# Get enclosing function
|
|
910
|
+
in_function = self._current_function.qualified_name if self._current_function else None
|
|
911
|
+
|
|
912
|
+
# Check if value is a call (for the whole assignment)
|
|
913
|
+
is_call = isinstance(value, cst.Call)
|
|
914
|
+
called_function = None
|
|
915
|
+
if is_call:
|
|
916
|
+
called_function = self._node_to_code(value.func)
|
|
917
|
+
|
|
918
|
+
source_vars, is_method, is_fstr = extract_value_metadata(value)
|
|
919
|
+
assignment = ExtractedAssignment(
|
|
920
|
+
target=target_name,
|
|
921
|
+
value_source=f"{value_source}[{i}]",
|
|
922
|
+
annotation=None,
|
|
923
|
+
line=line,
|
|
924
|
+
in_function=in_function,
|
|
925
|
+
is_literal=False,
|
|
926
|
+
is_call=is_call,
|
|
927
|
+
called_function=called_function,
|
|
928
|
+
is_name=False,
|
|
929
|
+
referenced_name=None,
|
|
930
|
+
source_variables=source_vars,
|
|
931
|
+
is_method_call=is_method,
|
|
932
|
+
is_string_interpolation=is_fstr,
|
|
933
|
+
)
|
|
934
|
+
self.assignments.append(assignment)
|
|
935
|
+
|
|
936
|
+
# Track module-level variables
|
|
937
|
+
if not self._current_function and not self._current_class:
|
|
938
|
+
self.module_variables.append(target_name)
|
|
939
|
+
|
|
940
|
+
def visit_AnnAssign(self, node: cst.AnnAssign) -> bool:
|
|
941
|
+
"""Extract annotated assignments."""
|
|
942
|
+
annotation = self._node_to_code(node.annotation.annotation)
|
|
943
|
+
value = node.value
|
|
944
|
+
|
|
945
|
+
assignment = self._extract_assignment(node.target, value, annotation)
|
|
946
|
+
if assignment:
|
|
947
|
+
self.assignments.append(assignment)
|
|
948
|
+
return True
|
|
949
|
+
|
|
950
|
+
def _extract_assignment(
|
|
951
|
+
self,
|
|
952
|
+
target: cst.BaseExpression,
|
|
953
|
+
value: cst.BaseExpression | None,
|
|
954
|
+
annotation: str | None = None,
|
|
955
|
+
) -> ExtractedAssignment | None:
|
|
956
|
+
"""Extract an assignment."""
|
|
957
|
+
target_name = self._extract_assignment_target(target)
|
|
958
|
+
if not target_name:
|
|
959
|
+
return None
|
|
960
|
+
|
|
961
|
+
value_source = self._node_to_code(value) if value else ""
|
|
962
|
+
|
|
963
|
+
# Check if value is a call
|
|
964
|
+
is_call = isinstance(value, cst.Call) if value else False
|
|
965
|
+
called_function = None
|
|
966
|
+
if is_call and value:
|
|
967
|
+
called_function = self._node_to_code(value.func)
|
|
968
|
+
|
|
969
|
+
# Check if value is a name reference
|
|
970
|
+
is_name = isinstance(value, cst.Name) if value else False
|
|
971
|
+
referenced_name = value.value if is_name and isinstance(value, cst.Name) else None
|
|
972
|
+
|
|
973
|
+
# Check if literal
|
|
974
|
+
is_literal = False
|
|
975
|
+
if value:
|
|
976
|
+
is_literal, _, _ = self._extract_literal_info(value)
|
|
977
|
+
|
|
978
|
+
# Get position
|
|
979
|
+
line = self._get_line(target)
|
|
980
|
+
|
|
981
|
+
# Get enclosing function
|
|
982
|
+
in_function = self._current_function.qualified_name if self._current_function else None
|
|
983
|
+
|
|
984
|
+
# Track module-level variables
|
|
985
|
+
if not self._current_function and not self._current_class:
|
|
986
|
+
self.module_variables.append(target_name)
|
|
987
|
+
|
|
988
|
+
source_vars, is_method, is_fstr = extract_value_metadata(value)
|
|
989
|
+
|
|
990
|
+
return ExtractedAssignment(
|
|
991
|
+
target=target_name,
|
|
992
|
+
value_source=value_source,
|
|
993
|
+
annotation=annotation,
|
|
994
|
+
line=line,
|
|
995
|
+
in_function=in_function,
|
|
996
|
+
is_literal=is_literal,
|
|
997
|
+
is_call=is_call,
|
|
998
|
+
called_function=called_function,
|
|
999
|
+
is_name=is_name,
|
|
1000
|
+
referenced_name=referenced_name,
|
|
1001
|
+
source_variables=source_vars,
|
|
1002
|
+
is_method_call=is_method,
|
|
1003
|
+
is_string_interpolation=is_fstr,
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
def _extract_assignment_target(self, target: cst.BaseExpression) -> str | None:
|
|
1007
|
+
"""
|
|
1008
|
+
Extract the target name from an assignment target.
|
|
1009
|
+
|
|
1010
|
+
Handles:
|
|
1011
|
+
- Simple names: x = value
|
|
1012
|
+
- Attribute access: self.x = value, obj.attr = value
|
|
1013
|
+
- Subscript: d[key] = value (returns d)
|
|
1014
|
+
"""
|
|
1015
|
+
# Simple name assignment: x = value
|
|
1016
|
+
if isinstance(target, cst.Name):
|
|
1017
|
+
return target.value
|
|
1018
|
+
|
|
1019
|
+
# Attribute assignment: self.x = value, obj.attr = value
|
|
1020
|
+
if isinstance(target, cst.Attribute):
|
|
1021
|
+
# Return the full dotted name
|
|
1022
|
+
return self._get_dotted_name(target)
|
|
1023
|
+
|
|
1024
|
+
# Subscript assignment: d[key] = value
|
|
1025
|
+
if isinstance(target, cst.Subscript):
|
|
1026
|
+
# Return the base name
|
|
1027
|
+
if isinstance(target.value, cst.Name):
|
|
1028
|
+
return target.value.value
|
|
1029
|
+
elif isinstance(target.value, cst.Attribute):
|
|
1030
|
+
return self._get_dotted_name(target.value)
|
|
1031
|
+
|
|
1032
|
+
return None
|
|
1033
|
+
|
|
1034
|
+
# =========================================================================
|
|
1035
|
+
# Control Flow Extraction
|
|
1036
|
+
# =========================================================================
|
|
1037
|
+
|
|
1038
|
+
def visit_If(self, node: cst.If) -> bool:
|
|
1039
|
+
"""Track if statements."""
|
|
1040
|
+
self._in_conditional += 1
|
|
1041
|
+
|
|
1042
|
+
if self._current_function:
|
|
1043
|
+
start_line, _, end_line, _ = self._get_node_position(node)
|
|
1044
|
+
|
|
1045
|
+
# Detect elif and else branches
|
|
1046
|
+
has_elif = False
|
|
1047
|
+
has_else = False
|
|
1048
|
+
elif_lines: list[int] = []
|
|
1049
|
+
else_line: int | None = None
|
|
1050
|
+
|
|
1051
|
+
# Check for else/elif
|
|
1052
|
+
if node.orelse:
|
|
1053
|
+
if isinstance(node.orelse, cst.If):
|
|
1054
|
+
has_elif = True
|
|
1055
|
+
elif_line, _, _, _ = self._get_node_position(node.orelse)
|
|
1056
|
+
elif_lines.append(elif_line)
|
|
1057
|
+
elif isinstance(node.orelse, cst.Else):
|
|
1058
|
+
has_else = True
|
|
1059
|
+
else_line_pos, _, _, _ = self._get_node_position(node.orelse)
|
|
1060
|
+
else_line = else_line_pos
|
|
1061
|
+
|
|
1062
|
+
block = ExtractedControlFlowBlock(
|
|
1063
|
+
block_type="if",
|
|
1064
|
+
start_line=start_line,
|
|
1065
|
+
end_line=end_line,
|
|
1066
|
+
has_elif=has_elif,
|
|
1067
|
+
has_else=has_else,
|
|
1068
|
+
elif_lines=elif_lines,
|
|
1069
|
+
else_line=else_line,
|
|
1070
|
+
)
|
|
1071
|
+
self._current_control_flow.append(block)
|
|
1072
|
+
|
|
1073
|
+
return True
|
|
1074
|
+
|
|
1075
|
+
def leave_If(self, node: cst.If) -> None:
|
|
1076
|
+
"""Leave if statement."""
|
|
1077
|
+
self._in_conditional = max(0, self._in_conditional - 1)
|
|
1078
|
+
|
|
1079
|
+
def visit_IfExp(self, node: cst.IfExp) -> bool:
|
|
1080
|
+
"""Track ternary/conditional expressions (x if cond else y)."""
|
|
1081
|
+
self._in_conditional += 1
|
|
1082
|
+
|
|
1083
|
+
if self._current_function:
|
|
1084
|
+
line, _, end_line, _ = self._get_node_position(node)
|
|
1085
|
+
block = ExtractedControlFlowBlock(
|
|
1086
|
+
block_type="if",
|
|
1087
|
+
start_line=line,
|
|
1088
|
+
end_line=end_line or line,
|
|
1089
|
+
)
|
|
1090
|
+
self._current_control_flow.append(block)
|
|
1091
|
+
|
|
1092
|
+
return True
|
|
1093
|
+
|
|
1094
|
+
def leave_IfExp(self, node: cst.IfExp) -> None:
|
|
1095
|
+
"""Leave ternary/conditional expression."""
|
|
1096
|
+
self._in_conditional = max(0, self._in_conditional - 1)
|
|
1097
|
+
|
|
1098
|
+
def visit_For(self, node: cst.For) -> bool:
|
|
1099
|
+
"""Track for loops."""
|
|
1100
|
+
self._in_loop += 1
|
|
1101
|
+
|
|
1102
|
+
if self._current_function:
|
|
1103
|
+
start_line, _, end_line, _ = self._get_node_position(node)
|
|
1104
|
+
block = ExtractedControlFlowBlock(
|
|
1105
|
+
block_type="for",
|
|
1106
|
+
start_line=start_line,
|
|
1107
|
+
end_line=end_line,
|
|
1108
|
+
has_break=False, # Will be updated by visit_Break
|
|
1109
|
+
has_continue=False,
|
|
1110
|
+
)
|
|
1111
|
+
self._current_control_flow.append(block)
|
|
1112
|
+
|
|
1113
|
+
return True
|
|
1114
|
+
|
|
1115
|
+
def leave_For(self, node: cst.For) -> None:
|
|
1116
|
+
"""Leave for loop."""
|
|
1117
|
+
self._in_loop = max(0, self._in_loop - 1)
|
|
1118
|
+
|
|
1119
|
+
def visit_While(self, node: cst.While) -> bool:
|
|
1120
|
+
"""Track while loops."""
|
|
1121
|
+
self._in_loop += 1
|
|
1122
|
+
|
|
1123
|
+
if self._current_function:
|
|
1124
|
+
start_line, _, end_line, _ = self._get_node_position(node)
|
|
1125
|
+
block = ExtractedControlFlowBlock(
|
|
1126
|
+
block_type="while",
|
|
1127
|
+
start_line=start_line,
|
|
1128
|
+
end_line=end_line,
|
|
1129
|
+
)
|
|
1130
|
+
self._current_control_flow.append(block)
|
|
1131
|
+
|
|
1132
|
+
return True
|
|
1133
|
+
|
|
1134
|
+
def leave_While(self, node: cst.While) -> None:
|
|
1135
|
+
"""Leave while loop."""
|
|
1136
|
+
self._in_loop = max(0, self._in_loop - 1)
|
|
1137
|
+
|
|
1138
|
+
def visit_Try(self, node: cst.Try) -> bool:
|
|
1139
|
+
"""Track try blocks."""
|
|
1140
|
+
self._in_try = True
|
|
1141
|
+
|
|
1142
|
+
if self._current_function:
|
|
1143
|
+
start_line, _, end_line, _ = self._get_node_position(node)
|
|
1144
|
+
|
|
1145
|
+
# Extract except blocks
|
|
1146
|
+
except_blocks: list[tuple[int, int]] = []
|
|
1147
|
+
for handler in node.handlers:
|
|
1148
|
+
exc_start, _, exc_end, _ = self._get_node_position(handler)
|
|
1149
|
+
except_blocks.append((exc_start, exc_end))
|
|
1150
|
+
|
|
1151
|
+
# Extract finally block
|
|
1152
|
+
finally_block: tuple[int, int] | None = None
|
|
1153
|
+
if node.finalbody:
|
|
1154
|
+
fin_start, _, fin_end, _ = self._get_node_position(node.finalbody)
|
|
1155
|
+
finally_block = (fin_start, fin_end)
|
|
1156
|
+
|
|
1157
|
+
block = ExtractedControlFlowBlock(
|
|
1158
|
+
block_type="try",
|
|
1159
|
+
start_line=start_line,
|
|
1160
|
+
end_line=end_line,
|
|
1161
|
+
except_blocks=except_blocks,
|
|
1162
|
+
finally_block=finally_block,
|
|
1163
|
+
)
|
|
1164
|
+
self._current_control_flow.append(block)
|
|
1165
|
+
|
|
1166
|
+
return True
|
|
1167
|
+
|
|
1168
|
+
def leave_Try(self, node: cst.Try) -> None:
|
|
1169
|
+
"""Leave try block."""
|
|
1170
|
+
self._in_try = False
|
|
1171
|
+
|
|
1172
|
+
def visit_ExceptHandler(self, node: cst.ExceptHandler) -> bool:
|
|
1173
|
+
"""Track except handlers."""
|
|
1174
|
+
self._in_except = True
|
|
1175
|
+
return True
|
|
1176
|
+
|
|
1177
|
+
def leave_ExceptHandler(self, node: cst.ExceptHandler) -> None:
|
|
1178
|
+
"""Leave except handler."""
|
|
1179
|
+
self._in_except = False
|
|
1180
|
+
|
|
1181
|
+
def visit_Finally(self, node: cst.Finally) -> bool:
|
|
1182
|
+
"""Track finally blocks."""
|
|
1183
|
+
self._in_finally = True
|
|
1184
|
+
return True
|
|
1185
|
+
|
|
1186
|
+
def leave_Finally(self, node: cst.Finally) -> None:
|
|
1187
|
+
"""Leave finally block."""
|
|
1188
|
+
self._in_finally = False
|
|
1189
|
+
|
|
1190
|
+
def visit_With(self, node: cst.With) -> bool:
|
|
1191
|
+
"""Track with blocks."""
|
|
1192
|
+
self._in_with = True
|
|
1193
|
+
|
|
1194
|
+
if self._current_function:
|
|
1195
|
+
start_line, _, end_line, _ = self._get_node_position(node)
|
|
1196
|
+
|
|
1197
|
+
# Extract all context-manager expressions
|
|
1198
|
+
context_expr = None
|
|
1199
|
+
with_items: list[str] = []
|
|
1200
|
+
if node.items:
|
|
1201
|
+
first_item = node.items[0]
|
|
1202
|
+
context_expr = self._node_to_code(first_item.item)
|
|
1203
|
+
for item in node.items:
|
|
1204
|
+
expr = self._node_to_code(item.item)
|
|
1205
|
+
if expr:
|
|
1206
|
+
with_items.append(expr)
|
|
1207
|
+
|
|
1208
|
+
block = ExtractedControlFlowBlock(
|
|
1209
|
+
block_type="with",
|
|
1210
|
+
start_line=start_line,
|
|
1211
|
+
end_line=end_line,
|
|
1212
|
+
context_expr=context_expr,
|
|
1213
|
+
with_items=with_items,
|
|
1214
|
+
)
|
|
1215
|
+
self._current_control_flow.append(block)
|
|
1216
|
+
|
|
1217
|
+
return True
|
|
1218
|
+
|
|
1219
|
+
def leave_With(self, node: cst.With) -> None:
|
|
1220
|
+
"""Leave with block."""
|
|
1221
|
+
self._in_with = False
|
|
1222
|
+
|
|
1223
|
+
def visit_ListComp(self, node: cst.ListComp) -> bool:
|
|
1224
|
+
"""Track list comprehensions."""
|
|
1225
|
+
self._in_comprehension = True
|
|
1226
|
+
|
|
1227
|
+
if self._current_function:
|
|
1228
|
+
line, _, _, _ = self._get_node_position(node)
|
|
1229
|
+
block = ExtractedControlFlowBlock(
|
|
1230
|
+
block_type="comprehension",
|
|
1231
|
+
start_line=line,
|
|
1232
|
+
end_line=line,
|
|
1233
|
+
)
|
|
1234
|
+
self._current_control_flow.append(block)
|
|
1235
|
+
|
|
1236
|
+
return True
|
|
1237
|
+
|
|
1238
|
+
def leave_ListComp(self, node: cst.ListComp) -> None:
|
|
1239
|
+
"""Leave list comprehension."""
|
|
1240
|
+
self._in_comprehension = False
|
|
1241
|
+
|
|
1242
|
+
def visit_SetComp(self, node: cst.SetComp) -> bool:
|
|
1243
|
+
"""Track set comprehensions."""
|
|
1244
|
+
self._in_comprehension = True
|
|
1245
|
+
|
|
1246
|
+
if self._current_function:
|
|
1247
|
+
line, _, end_line, _ = self._get_node_position(node)
|
|
1248
|
+
block = ExtractedControlFlowBlock(
|
|
1249
|
+
block_type="comprehension",
|
|
1250
|
+
start_line=line,
|
|
1251
|
+
end_line=end_line or line,
|
|
1252
|
+
)
|
|
1253
|
+
self._current_control_flow.append(block)
|
|
1254
|
+
|
|
1255
|
+
return True
|
|
1256
|
+
|
|
1257
|
+
def leave_SetComp(self, node: cst.SetComp) -> None:
|
|
1258
|
+
"""Leave set comprehension."""
|
|
1259
|
+
self._in_comprehension = False
|
|
1260
|
+
|
|
1261
|
+
def visit_DictComp(self, node: cst.DictComp) -> bool:
|
|
1262
|
+
"""Track dict comprehensions."""
|
|
1263
|
+
self._in_comprehension = True
|
|
1264
|
+
|
|
1265
|
+
if self._current_function:
|
|
1266
|
+
line, _, end_line, _ = self._get_node_position(node)
|
|
1267
|
+
block = ExtractedControlFlowBlock(
|
|
1268
|
+
block_type="comprehension",
|
|
1269
|
+
start_line=line,
|
|
1270
|
+
end_line=end_line or line,
|
|
1271
|
+
)
|
|
1272
|
+
self._current_control_flow.append(block)
|
|
1273
|
+
|
|
1274
|
+
return True
|
|
1275
|
+
|
|
1276
|
+
def leave_DictComp(self, node: cst.DictComp) -> None:
|
|
1277
|
+
"""Leave dict comprehension."""
|
|
1278
|
+
self._in_comprehension = False
|
|
1279
|
+
|
|
1280
|
+
def visit_GeneratorExp(self, node: cst.GeneratorExp) -> bool:
|
|
1281
|
+
"""Track generator expressions."""
|
|
1282
|
+
self._in_comprehension = True
|
|
1283
|
+
|
|
1284
|
+
if self._current_function:
|
|
1285
|
+
line, _, end_line, _ = self._get_node_position(node)
|
|
1286
|
+
block = ExtractedControlFlowBlock(
|
|
1287
|
+
block_type="comprehension",
|
|
1288
|
+
start_line=line,
|
|
1289
|
+
end_line=end_line or line,
|
|
1290
|
+
)
|
|
1291
|
+
self._current_control_flow.append(block)
|
|
1292
|
+
|
|
1293
|
+
return True
|
|
1294
|
+
|
|
1295
|
+
def leave_GeneratorExp(self, node: cst.GeneratorExp) -> None:
|
|
1296
|
+
"""Leave generator expression."""
|
|
1297
|
+
self._in_comprehension = False
|
|
1298
|
+
|
|
1299
|
+
def visit_Yield(self, node: cst.Yield) -> bool:
|
|
1300
|
+
"""Track yield statements and analyze what's being yielded."""
|
|
1301
|
+
if self._current_function:
|
|
1302
|
+
self._current_function.has_yield = True
|
|
1303
|
+
|
|
1304
|
+
# For generators, track what's yielded similarly to returns
|
|
1305
|
+
# We treat yields as contributing to the "return type" inference
|
|
1306
|
+
if node.value:
|
|
1307
|
+
ret_info = self._extract_yield_info(node)
|
|
1308
|
+
if ret_info:
|
|
1309
|
+
self._current_function.return_statements.append(ret_info)
|
|
1310
|
+
|
|
1311
|
+
return True # Continue to extract calls in yield expressions
|
|
1312
|
+
|
|
1313
|
+
def _extract_yield_info(self, node: cst.Yield) -> ExtractedReturn | None:
|
|
1314
|
+
"""Extract detailed information about a yield statement."""
|
|
1315
|
+
if node.value is None:
|
|
1316
|
+
return None
|
|
1317
|
+
|
|
1318
|
+
line, _, _, _ = self._get_node_position(node)
|
|
1319
|
+
value = node.value
|
|
1320
|
+
|
|
1321
|
+
# Check for call yield: yield func()
|
|
1322
|
+
if isinstance(value, cst.Call):
|
|
1323
|
+
call_name = self._node_to_code(value.func)
|
|
1324
|
+
return ExtractedReturn(
|
|
1325
|
+
line=line,
|
|
1326
|
+
returns_call=True,
|
|
1327
|
+
call_name=call_name,
|
|
1328
|
+
expression_text=self._node_to_code(value),
|
|
1329
|
+
)
|
|
1330
|
+
|
|
1331
|
+
# Check for variable yield: yield x
|
|
1332
|
+
if isinstance(value, cst.Name):
|
|
1333
|
+
return ExtractedReturn(
|
|
1334
|
+
line=line,
|
|
1335
|
+
returns_variable=True,
|
|
1336
|
+
variable_name=value.value,
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
# Check for literal yield
|
|
1340
|
+
is_literal, lit_value, lit_type = self._extract_literal_info(value)
|
|
1341
|
+
if is_literal:
|
|
1342
|
+
return ExtractedReturn(
|
|
1343
|
+
line=line,
|
|
1344
|
+
returns_literal=True,
|
|
1345
|
+
literal_value=lit_value,
|
|
1346
|
+
literal_type=lit_type,
|
|
1347
|
+
)
|
|
1348
|
+
|
|
1349
|
+
# Generic expression
|
|
1350
|
+
return ExtractedReturn(
|
|
1351
|
+
line=line,
|
|
1352
|
+
returns_expression=True,
|
|
1353
|
+
expression_text=self._node_to_code(value),
|
|
1354
|
+
)
|
|
1355
|
+
|
|
1356
|
+
def visit_Return(self, node: cst.Return) -> bool:
|
|
1357
|
+
"""Track return statements and analyze what's being returned."""
|
|
1358
|
+
if self._current_function:
|
|
1359
|
+
self._current_function.has_return = True
|
|
1360
|
+
|
|
1361
|
+
# Extract detailed return info
|
|
1362
|
+
ret_info = self._extract_return_info(node)
|
|
1363
|
+
if ret_info:
|
|
1364
|
+
self._current_function.return_statements.append(ret_info)
|
|
1365
|
+
|
|
1366
|
+
return True # Continue to extract calls in return expressions
|
|
1367
|
+
|
|
1368
|
+
def _extract_return_info(self, node: cst.Return) -> ExtractedReturn:
|
|
1369
|
+
"""Extract detailed information about a return statement."""
|
|
1370
|
+
line, _, _, _ = self._get_node_position(node)
|
|
1371
|
+
|
|
1372
|
+
# Check if returning None or bare return
|
|
1373
|
+
if node.value is None:
|
|
1374
|
+
return ExtractedReturn(
|
|
1375
|
+
line=line,
|
|
1376
|
+
returns_none=True,
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1379
|
+
value = node.value
|
|
1380
|
+
|
|
1381
|
+
# Check for return None explicitly
|
|
1382
|
+
if isinstance(value, cst.Name) and value.value == "None":
|
|
1383
|
+
return ExtractedReturn(
|
|
1384
|
+
line=line,
|
|
1385
|
+
returns_none=True,
|
|
1386
|
+
)
|
|
1387
|
+
|
|
1388
|
+
# Check for call return: return func()
|
|
1389
|
+
if isinstance(value, cst.Call):
|
|
1390
|
+
call_name = self._node_to_code(value.func)
|
|
1391
|
+
return ExtractedReturn(
|
|
1392
|
+
line=line,
|
|
1393
|
+
returns_call=True,
|
|
1394
|
+
call_name=call_name,
|
|
1395
|
+
expression_text=self._node_to_code(value),
|
|
1396
|
+
)
|
|
1397
|
+
|
|
1398
|
+
# Check for variable return: return x
|
|
1399
|
+
if isinstance(value, cst.Name):
|
|
1400
|
+
return ExtractedReturn(
|
|
1401
|
+
line=line,
|
|
1402
|
+
returns_variable=True,
|
|
1403
|
+
variable_name=value.value,
|
|
1404
|
+
)
|
|
1405
|
+
|
|
1406
|
+
# Check for attribute return: return self.x or return obj.attr
|
|
1407
|
+
if isinstance(value, cst.Attribute):
|
|
1408
|
+
return ExtractedReturn(
|
|
1409
|
+
line=line,
|
|
1410
|
+
returns_variable=True,
|
|
1411
|
+
variable_name=self._get_dotted_name(value),
|
|
1412
|
+
)
|
|
1413
|
+
|
|
1414
|
+
# Check for literal return
|
|
1415
|
+
is_literal, lit_value, lit_type = self._extract_literal_info(value)
|
|
1416
|
+
if is_literal:
|
|
1417
|
+
return ExtractedReturn(
|
|
1418
|
+
line=line,
|
|
1419
|
+
returns_literal=True,
|
|
1420
|
+
literal_value=lit_value,
|
|
1421
|
+
literal_type=lit_type,
|
|
1422
|
+
)
|
|
1423
|
+
|
|
1424
|
+
# Check for comprehension return
|
|
1425
|
+
if isinstance(value, (cst.ListComp, cst.SetComp, cst.DictComp, cst.GeneratorExp)):
|
|
1426
|
+
return ExtractedReturn(
|
|
1427
|
+
line=line,
|
|
1428
|
+
returns_comprehension=True,
|
|
1429
|
+
expression_text=self._node_to_code(value),
|
|
1430
|
+
)
|
|
1431
|
+
|
|
1432
|
+
# Check for lambda return
|
|
1433
|
+
if isinstance(value, cst.Lambda):
|
|
1434
|
+
return ExtractedReturn(
|
|
1435
|
+
line=line,
|
|
1436
|
+
returns_lambda=True,
|
|
1437
|
+
expression_text=self._node_to_code(value),
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
# Generic expression
|
|
1441
|
+
return ExtractedReturn(
|
|
1442
|
+
line=line,
|
|
1443
|
+
returns_expression=True,
|
|
1444
|
+
expression_text=self._node_to_code(value),
|
|
1445
|
+
)
|
|
1446
|
+
|
|
1447
|
+
# =========================================================================
|
|
1448
|
+
# Module-level Extraction
|
|
1449
|
+
# =========================================================================
|
|
1450
|
+
|
|
1451
|
+
def visit_Module(self, node: cst.Module) -> bool:
|
|
1452
|
+
"""Extract module-level information."""
|
|
1453
|
+
# Extract module docstring
|
|
1454
|
+
if node.body:
|
|
1455
|
+
first_stmt = node.body[0]
|
|
1456
|
+
if isinstance(first_stmt, cst.SimpleStatementLine):
|
|
1457
|
+
if first_stmt.body and isinstance(first_stmt.body[0], cst.Expr):
|
|
1458
|
+
expr = first_stmt.body[0]
|
|
1459
|
+
if isinstance(expr.value, (cst.SimpleString, cst.ConcatenatedString)):
|
|
1460
|
+
self.module_docstring = self._extract_string_value(expr.value)
|
|
1461
|
+
|
|
1462
|
+
return True
|
|
1463
|
+
|
|
1464
|
+
# =========================================================================
|
|
1465
|
+
# Utility Methods
|
|
1466
|
+
# =========================================================================
|
|
1467
|
+
|
|
1468
|
+
def _node_to_code(self, node: cst.CSTNode | None) -> str:
|
|
1469
|
+
"""Convert CST node back to source code."""
|
|
1470
|
+
if node is None:
|
|
1471
|
+
return ""
|
|
1472
|
+
try:
|
|
1473
|
+
return node.code if hasattr(node, "code") else cst.parse_module("").code_for_node(node)
|
|
1474
|
+
except Exception:
|
|
1475
|
+
# Fallback: use the module's code_for_node
|
|
1476
|
+
try:
|
|
1477
|
+
module = cst.parse_module("")
|
|
1478
|
+
return module.code_for_node(node)
|
|
1479
|
+
except Exception:
|
|
1480
|
+
return str(node)
|
|
1481
|
+
|
|
1482
|
+
def _get_dotted_name(self, node: cst.BaseExpression) -> str:
|
|
1483
|
+
"""Get a dotted name from an expression (e.g., a.b.c)."""
|
|
1484
|
+
if isinstance(node, cst.Name):
|
|
1485
|
+
return node.value
|
|
1486
|
+
if isinstance(node, cst.Attribute):
|
|
1487
|
+
base = self._get_dotted_name(node.value)
|
|
1488
|
+
return f"{base}.{node.attr.value}"
|
|
1489
|
+
return self._node_to_code(node)
|
|
1490
|
+
|
|
1491
|
+
def _extract_docstring(self, body: cst.BaseSuite) -> str | None:
|
|
1492
|
+
"""Extract docstring from function/class body."""
|
|
1493
|
+
if not isinstance(body, cst.IndentedBlock):
|
|
1494
|
+
return None
|
|
1495
|
+
|
|
1496
|
+
if not body.body:
|
|
1497
|
+
return None
|
|
1498
|
+
|
|
1499
|
+
first_stmt = body.body[0]
|
|
1500
|
+
if isinstance(first_stmt, cst.SimpleStatementLine):
|
|
1501
|
+
if first_stmt.body and isinstance(first_stmt.body[0], cst.Expr):
|
|
1502
|
+
expr = first_stmt.body[0]
|
|
1503
|
+
if isinstance(expr.value, (cst.SimpleString, cst.ConcatenatedString)):
|
|
1504
|
+
return self._extract_string_value(expr.value)
|
|
1505
|
+
|
|
1506
|
+
return None
|
|
1507
|
+
|
|
1508
|
+
def _extract_literal_or_code(self, node: cst.BaseExpression) -> Any:
|
|
1509
|
+
"""Extract literal value or return source code."""
|
|
1510
|
+
is_lit, value, _ = self._extract_literal_info(node)
|
|
1511
|
+
if is_lit:
|
|
1512
|
+
return value
|
|
1513
|
+
return self._node_to_code(node)
|
|
1514
|
+
|
|
1515
|
+
def _make_location(self, node: cst.CSTNode) -> CodeLocation:
|
|
1516
|
+
"""Make a CodeLocation from a node."""
|
|
1517
|
+
line, col, end_line, end_col = self._get_node_position(node)
|
|
1518
|
+
return CodeLocation(
|
|
1519
|
+
file=self._file_path or Path("unknown"),
|
|
1520
|
+
line=line,
|
|
1521
|
+
column=col,
|
|
1522
|
+
end_line=end_line,
|
|
1523
|
+
end_column=end_col,
|
|
1524
|
+
)
|
|
1525
|
+
|
|
1526
|
+
def _get_node_position(self, node: cst.CSTNode) -> tuple[int, int, int, int]:
|
|
1527
|
+
"""Get position of a node."""
|
|
1528
|
+
if self._wrapper:
|
|
1529
|
+
try:
|
|
1530
|
+
pos = self._wrapper.resolve(cst.metadata.PositionProvider).get(node)
|
|
1531
|
+
if pos:
|
|
1532
|
+
return (
|
|
1533
|
+
pos.start.line,
|
|
1534
|
+
pos.start.column,
|
|
1535
|
+
pos.end.line,
|
|
1536
|
+
pos.end.column,
|
|
1537
|
+
)
|
|
1538
|
+
except Exception:
|
|
1539
|
+
pass
|
|
1540
|
+
return (0, 0, 0, 0)
|
|
1541
|
+
|
|
1542
|
+
def _get_line(self, node: cst.CSTNode) -> int:
|
|
1543
|
+
"""Get just the line number of a node."""
|
|
1544
|
+
return self._get_node_position(node)[0]
|