apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1544 @@
1
+ """
2
+ LibCST visitors for Python code analysis.
3
+
4
+ This module contains visitors that traverse Python CST and extract:
5
+ - Function/method definitions
6
+ - Class definitions
7
+ - Import statements
8
+ - Function calls
9
+ - Assignments
10
+ - Decorators
11
+
12
+ These are low-level extraction visitors. Higher-level semantic analysis
13
+ is done in the parser module.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from collections.abc import Sequence
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ import libcst as cst
23
+
24
+ from ...core.types import CodeLocation
25
+ from .expression_utils import (
26
+ collect_name_nodes,
27
+ detect_concatenation,
28
+ detect_container_type,
29
+ detect_format_call,
30
+ detect_fstring,
31
+ extract_value_metadata,
32
+ is_classvar_annotation,
33
+ )
34
+
35
+ # Re-export dataclasses so existing ``from .visitors import ...`` still works.
36
+ from .extraction_types import ( # noqa: F401 – re-exports
37
+ ExtractedArgument,
38
+ ExtractedAssignment,
39
+ ExtractedCall,
40
+ ExtractedClass,
41
+ ExtractedControlFlowBlock,
42
+ ExtractedDecorator,
43
+ ExtractedField,
44
+ ExtractedFunction,
45
+ ExtractedImport,
46
+ ExtractedParameter,
47
+ ExtractedReturn,
48
+ )
49
+
50
+ # =============================================================================
51
+ # Position Tracking Mixin
52
+ # =============================================================================
53
+
54
+
55
+ class PositionProvider:
56
+ """Mixin for tracking positions in source code."""
57
+
58
+ def __init__(self, source: str) -> None:
59
+ self._source = source
60
+ self._line_offsets = self._compute_line_offsets(source)
61
+
62
+ @staticmethod
63
+ def _compute_line_offsets(source: str) -> list[int]:
64
+ """Compute byte offset of each line."""
65
+ offsets = [0]
66
+ for i, char in enumerate(source):
67
+ if char == "\n":
68
+ offsets.append(i + 1)
69
+ return offsets
70
+
71
+ def _get_position(self, node: cst.CSTNode) -> tuple[int, int, int, int]:
72
+ """Get (line, column, end_line, end_column) for a node."""
73
+ # LibCST tracks positions if we have a wrapper
74
+ try:
75
+ pos = self._wrapper.resolve(cst.metadata.PositionProvider)[node]
76
+ return (
77
+ pos.start.line,
78
+ pos.start.column,
79
+ pos.end.line,
80
+ pos.end.column,
81
+ )
82
+ except (KeyError, AttributeError):
83
+ return (0, 0, 0, 0)
84
+
85
+
86
+ # =============================================================================
87
+ # Main Extraction Visitor
88
+ # =============================================================================
89
+
90
+
91
+ class PythonExtractor(cst.CSTVisitor):
92
+ """
93
+ Visitor that extracts structural information from Python CST.
94
+
95
+ Extracts:
96
+ - Functions and methods
97
+ - Classes
98
+ - Imports
99
+ - Function calls
100
+ - Assignments
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ source: str,
106
+ file_path: Path | None = None,
107
+ module_name: str | None = None,
108
+ ) -> None:
109
+ self._source = source
110
+ self._file_path = file_path
111
+ self._module_name = module_name or ""
112
+ self._wrapper: cst.MetadataWrapper | None = None
113
+
114
+ # Extraction results
115
+ self.functions: list[ExtractedFunction] = []
116
+ self.classes: list[ExtractedClass] = []
117
+ self.imports: list[ExtractedImport] = []
118
+ self.calls: list[ExtractedCall] = []
119
+ self.assignments: list[ExtractedAssignment] = []
120
+ self.module_docstring: str | None = None
121
+ self.module_variables: list[str] = []
122
+
123
+ # Context tracking
124
+ self._current_class: ExtractedClass | None = None
125
+ self._current_function: ExtractedFunction | None = None
126
+ self._class_stack: list[ExtractedClass] = []
127
+ self._function_stack: list[ExtractedFunction] = []
128
+
129
+ # Control flow context tracking
130
+ self._in_loop: int = 0 # Nesting depth
131
+ self._in_conditional: int = 0
132
+ self._in_try: bool = False
133
+ self._in_except: bool = False
134
+ self._in_finally: bool = False
135
+ self._in_with: bool = False
136
+ self._in_comprehension: bool = False
137
+
138
+ # Control flow blocks for current function
139
+ self._current_control_flow: list[ExtractedControlFlowBlock] = []
140
+
141
+ # Position tracking
142
+ self._positions: dict[int, tuple[int, int, int, int]] = {}
143
+
144
+ def set_metadata_wrapper(self, wrapper: cst.MetadataWrapper) -> None:
145
+ """Set the metadata wrapper for position tracking."""
146
+ self._wrapper = wrapper
147
+
148
+ # =========================================================================
149
+ # Import Extraction
150
+ # =========================================================================
151
+
152
+ def visit_Import(self, node: cst.Import) -> bool:
153
+ """Extract 'import x' statements."""
154
+ for name_item in node.names if isinstance(node.names, Sequence) else []:
155
+ if isinstance(name_item, cst.ImportAlias):
156
+ module_name = self._get_dotted_name(name_item.name)
157
+ alias = None
158
+ if name_item.asname and isinstance(name_item.asname, cst.AsName):
159
+ alias = self._node_to_code(name_item.asname.name)
160
+
161
+ self.imports.append(
162
+ ExtractedImport(
163
+ module=module_name,
164
+ names=[(module_name, alias)],
165
+ is_from_import=False,
166
+ line=self._get_line(node),
167
+ )
168
+ )
169
+ return False
170
+
171
+ def visit_ImportFrom(self, node: cst.ImportFrom) -> bool:
172
+ """Extract 'from x import y' statements."""
173
+ # Calculate relative import level
174
+ relative_level = 0
175
+ if node.relative:
176
+ for dot in node.relative:
177
+ if isinstance(dot, cst.Dot):
178
+ relative_level += 1
179
+
180
+ # Get module name
181
+ module = ""
182
+ if node.module:
183
+ module = self._get_dotted_name(node.module)
184
+
185
+ is_relative = relative_level > 0
186
+
187
+ # Get imported names
188
+ names: list[tuple[str, str | None]] = []
189
+ if isinstance(node.names, cst.ImportStar):
190
+ names = [("*", None)]
191
+ elif isinstance(node.names, Sequence):
192
+ for name_item in node.names:
193
+ if isinstance(name_item, cst.ImportAlias):
194
+ name = self._get_dotted_name(name_item.name)
195
+ alias = None
196
+ if name_item.asname and isinstance(name_item.asname, cst.AsName):
197
+ alias = self._node_to_code(name_item.asname.name)
198
+ names.append((name, alias))
199
+
200
+ self.imports.append(
201
+ ExtractedImport(
202
+ module=module,
203
+ names=names,
204
+ is_from_import=True,
205
+ is_relative=is_relative,
206
+ relative_level=relative_level,
207
+ line=self._get_line(node),
208
+ )
209
+ )
210
+ return False
211
+
212
+ # =========================================================================
213
+ # Function Extraction
214
+ # =========================================================================
215
+
216
+ def visit_FunctionDef(self, node: cst.FunctionDef) -> bool:
217
+ """Extract function definitions."""
218
+ func = self._extract_function(node)
219
+
220
+ if self._current_class:
221
+ func.owner_type = self._current_class.name
222
+ if any(d.name in ("staticmethod", "classmethod") for d in func.decorators):
223
+ func.binding = "static"
224
+ else:
225
+ func.binding = "instance"
226
+ self._current_class.methods.append(func)
227
+ else:
228
+ self.functions.append(func)
229
+
230
+ # Push onto stack for nested context
231
+ self._function_stack.append(func)
232
+ self._current_function = func
233
+
234
+ # Reset control flow tracking for this function
235
+ self._current_control_flow = []
236
+
237
+ return True # Continue visiting to extract calls/assignments in body
238
+
239
+ def leave_FunctionDef(self, node: cst.FunctionDef) -> None:
240
+ """Leave function definition."""
241
+ # Store control flow info before leaving
242
+ if self._current_function and self._current_control_flow:
243
+ self._current_function.control_flow_info = self._build_control_flow_info()
244
+
245
+ if self._function_stack:
246
+ self._function_stack.pop()
247
+ self._current_function = self._function_stack[-1] if self._function_stack else None
248
+
249
+ # Restore control flow tracking for enclosing function
250
+ self._current_control_flow = []
251
+
252
+ def _build_control_flow_info(self) -> dict[str, Any]:
253
+ """Build control flow info dictionary from collected blocks."""
254
+ if_blocks: list[dict[str, Any]] = []
255
+ loops: list[dict[str, Any]] = []
256
+ try_blocks: list[dict[str, Any]] = []
257
+ with_blocks: list[dict[str, Any]] = []
258
+ comprehensions: list[dict[str, Any]] = []
259
+
260
+ for block in self._current_control_flow:
261
+ if block.block_type == "if":
262
+ if_blocks.append(
263
+ {
264
+ "start_line": block.start_line,
265
+ "end_line": block.end_line,
266
+ "has_elif": block.has_elif,
267
+ "has_else": block.has_else,
268
+ "elif_lines": block.elif_lines,
269
+ "else_line": block.else_line,
270
+ }
271
+ )
272
+ elif block.block_type in ("for", "while"):
273
+ loops.append(
274
+ {
275
+ "loop_type": block.block_type,
276
+ "start_line": block.start_line,
277
+ "end_line": block.end_line,
278
+ "has_break": block.has_break,
279
+ "has_continue": block.has_continue,
280
+ }
281
+ )
282
+ elif block.block_type == "try":
283
+ try_blocks.append(
284
+ {
285
+ "try_start": block.start_line,
286
+ "try_end": block.end_line,
287
+ "except_blocks": [
288
+ {"start_line": s, "end_line": e} for s, e in block.except_blocks
289
+ ],
290
+ "finally_block": (
291
+ {
292
+ "start_line": block.finally_block[0],
293
+ "end_line": block.finally_block[1],
294
+ }
295
+ if block.finally_block
296
+ else None
297
+ ),
298
+ }
299
+ )
300
+ elif block.block_type == "with":
301
+ with_blocks.append(
302
+ {
303
+ "start_line": block.start_line,
304
+ "end_line": block.end_line,
305
+ "context_expr": block.context_expr,
306
+ "with_items": block.with_items,
307
+ }
308
+ )
309
+ elif block.block_type == "comprehension":
310
+ comprehensions.append(
311
+ {
312
+ "line": block.start_line,
313
+ }
314
+ )
315
+
316
+ return {
317
+ "if_blocks": if_blocks,
318
+ "loops": loops,
319
+ "try_blocks": try_blocks,
320
+ "with_blocks": with_blocks,
321
+ "comprehensions": comprehensions,
322
+ }
323
+
324
+ def _extract_function(self, node: cst.FunctionDef) -> ExtractedFunction:
325
+ """Extract all information from a function definition."""
326
+ name = node.name.value
327
+
328
+ # Build qualified name
329
+ qualified_parts = [self._module_name] if self._module_name else []
330
+ for cls in self._class_stack:
331
+ qualified_parts.append(cls.name)
332
+ qualified_parts.append(name)
333
+ qualified_name = ".".join(filter(None, qualified_parts))
334
+
335
+ # Extract parameters
336
+ params = self._extract_parameters(node.params)
337
+
338
+ # Extract return annotation
339
+ return_annotation = None
340
+ if node.returns:
341
+ return_annotation = self._node_to_code(node.returns.annotation)
342
+
343
+ # Extract decorators
344
+ decorators = self._extract_decorators(node.decorators)
345
+
346
+ # Extract docstring
347
+ docstring = self._extract_docstring(node.body)
348
+
349
+ # Get position
350
+ line, col, end_line, _ = self._get_node_position(node)
351
+
352
+ # Extract local variables
353
+ local_vars = self._extract_local_variables(node.body)
354
+
355
+ # Capture body source for content hashing
356
+ body_source = self._node_to_code(node.body)
357
+
358
+ return ExtractedFunction(
359
+ name=name,
360
+ qualified_name=qualified_name,
361
+ parameters=params,
362
+ return_annotation=return_annotation,
363
+ decorators=decorators,
364
+ is_async=isinstance(node.asynchronous, cst.Asynchronous),
365
+ docstring=docstring,
366
+ body_source=body_source or None,
367
+ line=line,
368
+ end_line=end_line,
369
+ column=col,
370
+ local_variables=local_vars,
371
+ )
372
+
373
+ def _extract_parameters(self, params: cst.Parameters) -> list[ExtractedParameter]:
374
+ """Extract function parameters."""
375
+ result: list[ExtractedParameter] = []
376
+
377
+ # Positional-only parameters (before /)
378
+ for param in params.posonly_params:
379
+ result.append(self._extract_single_param(param, is_positional_only=True))
380
+
381
+ # Regular parameters
382
+ for param in params.params:
383
+ result.append(self._extract_single_param(param))
384
+
385
+ # *args
386
+ if params.star_arg and isinstance(params.star_arg, cst.Param):
387
+ result.append(self._extract_single_param(params.star_arg, is_variadic=True))
388
+
389
+ # Keyword-only parameters (after *)
390
+ for param in params.kwonly_params:
391
+ result.append(self._extract_single_param(param, is_keyword_only=True))
392
+
393
+ # **kwargs
394
+ if params.star_kwarg:
395
+ result.append(self._extract_single_param(params.star_kwarg, is_keyword_variadic=True))
396
+
397
+ return result
398
+
399
+ def _extract_single_param(
400
+ self,
401
+ param: cst.Param,
402
+ is_variadic: bool = False,
403
+ is_keyword_variadic: bool = False,
404
+ is_positional_only: bool = False,
405
+ is_keyword_only: bool = False,
406
+ ) -> ExtractedParameter:
407
+ """Extract a single parameter."""
408
+ name = param.name.value
409
+
410
+ annotation = None
411
+ if param.annotation:
412
+ annotation = self._node_to_code(param.annotation.annotation)
413
+
414
+ default = None
415
+ if param.default:
416
+ default = self._node_to_code(param.default)
417
+
418
+ return ExtractedParameter(
419
+ name=name,
420
+ annotation=annotation,
421
+ default=default,
422
+ is_variadic=is_variadic,
423
+ is_keyword_variadic=is_keyword_variadic,
424
+ is_positional_only=is_positional_only,
425
+ is_keyword_only=is_keyword_only,
426
+ )
427
+
428
+ def _extract_local_variables(self, body: cst.BaseSuite) -> list[str]:
429
+ """Extract local variable names from function body."""
430
+ variables: list[str] = []
431
+
432
+ if isinstance(body, cst.IndentedBlock):
433
+ for stmt in body.body:
434
+ if isinstance(stmt, cst.SimpleStatementLine):
435
+ for item in stmt.body:
436
+ if isinstance(item, (cst.Assign, cst.AnnAssign)):
437
+ vars_in_stmt = self._get_assignment_targets(item)
438
+ variables.extend(vars_in_stmt)
439
+
440
+ return list(set(variables))
441
+
442
+ def _get_assignment_targets(self, node: cst.Assign | cst.AnnAssign) -> list[str]:
443
+ """Get variable names from an assignment."""
444
+ names: list[str] = []
445
+
446
+ if isinstance(node, cst.AnnAssign):
447
+ if isinstance(node.target, cst.Name):
448
+ names.append(node.target.value)
449
+ elif isinstance(node, cst.Assign):
450
+ for target in node.targets:
451
+ if isinstance(target.target, cst.Name):
452
+ names.append(target.target.value)
453
+ elif isinstance(target.target, cst.Tuple):
454
+ # Unpacking: a, b = ...
455
+ for element in target.target.elements:
456
+ if isinstance(element.value, cst.Name):
457
+ names.append(element.value.value)
458
+
459
+ return names
460
+
461
+ # =========================================================================
462
+ # Class Extraction
463
+ # =========================================================================
464
+
465
+ def visit_ClassDef(self, node: cst.ClassDef) -> bool:
466
+ """Extract class definitions."""
467
+ cls = self._extract_class(node)
468
+
469
+ if self._current_class:
470
+ # Nested class - not common but handle it
471
+ pass
472
+ else:
473
+ self.classes.append(cls)
474
+
475
+ # Push onto stack
476
+ self._class_stack.append(cls)
477
+ self._current_class = cls
478
+
479
+ return True # Continue to extract methods
480
+
481
+ def leave_ClassDef(self, node: cst.ClassDef) -> None:
482
+ """Leave class definition."""
483
+ if self._class_stack:
484
+ self._class_stack.pop()
485
+ self._current_class = self._class_stack[-1] if self._class_stack else None
486
+
487
+ def _extract_class(self, node: cst.ClassDef) -> ExtractedClass:
488
+ """Extract all information from a class definition."""
489
+ name = node.name.value
490
+
491
+ # Build qualified name
492
+ qualified_parts = [self._module_name] if self._module_name else []
493
+ for cls in self._class_stack:
494
+ qualified_parts.append(cls.name)
495
+ qualified_parts.append(name)
496
+ qualified_name = ".".join(filter(None, qualified_parts))
497
+
498
+ # Extract base classes
499
+ bases: list[str] = []
500
+ if node.bases:
501
+ for arg in node.bases:
502
+ if isinstance(arg, cst.Arg):
503
+ bases.append(self._node_to_code(arg.value))
504
+
505
+ # Extract decorators
506
+ decorators = self._extract_decorators(node.decorators)
507
+
508
+ # Check for special decorators
509
+ is_dataclass = any(d.name == "dataclass" for d in decorators)
510
+ is_pydantic = any("BaseModel" in b or "BaseSettings" in b for b in bases) or any(
511
+ d.name in {"validator", "field_validator"} for d in decorators
512
+ )
513
+
514
+ # Extract docstring
515
+ docstring = self._extract_docstring(node.body)
516
+
517
+ # Extract fields (for Pydantic/dataclass)
518
+ fields = self._extract_class_fields(node.body)
519
+
520
+ # Get position
521
+ line, col, end_line, _ = self._get_node_position(node)
522
+
523
+ return ExtractedClass(
524
+ name=name,
525
+ qualified_name=qualified_name,
526
+ bases=bases,
527
+ decorators=decorators,
528
+ docstring=docstring,
529
+ is_dataclass=is_dataclass,
530
+ is_pydantic_model=is_pydantic,
531
+ fields=fields,
532
+ line=line,
533
+ end_line=end_line,
534
+ column=col,
535
+ )
536
+
537
+ _FRAMEWORK_INTERNAL_NAMES = frozenset(
538
+ {
539
+ "model_config",
540
+ "model_fields",
541
+ "model_computed_fields",
542
+ "model_extra",
543
+ "model_fields_set",
544
+ "__private_attributes__",
545
+ "__class_vars__",
546
+ "__validators__",
547
+ "__pre_root_validators__",
548
+ "__post_root_validators__",
549
+ }
550
+ )
551
+
552
+ def _extract_class_fields(self, body: cst.BaseSuite) -> list[ExtractedField]:
553
+ """Extract class-level field definitions."""
554
+ fields: list[ExtractedField] = []
555
+
556
+ if not isinstance(body, cst.IndentedBlock):
557
+ return fields
558
+
559
+ for stmt in body.body:
560
+ if isinstance(stmt, cst.SimpleStatementLine):
561
+ for item in stmt.body:
562
+ if isinstance(item, cst.AnnAssign):
563
+ if isinstance(item.target, cst.Name):
564
+ field_name = item.target.value
565
+
566
+ if field_name in self._FRAMEWORK_INTERNAL_NAMES:
567
+ continue
568
+
569
+ annotation = self._node_to_code(item.annotation.annotation)
570
+
571
+ if is_classvar_annotation(item.annotation.annotation):
572
+ continue
573
+
574
+ default = None
575
+ field_info: dict[str, Any] = {}
576
+
577
+ if item.value:
578
+ default_code = self._node_to_code(item.value)
579
+ default = default_code
580
+
581
+ # Check for Field() calls
582
+ if isinstance(item.value, cst.Call):
583
+ callee = self._node_to_code(item.value.func)
584
+ if callee in {"Field", "field"}:
585
+ field_info = self._extract_field_call_args(item.value)
586
+
587
+ fields.append(
588
+ ExtractedField(
589
+ name=field_name,
590
+ annotation=annotation,
591
+ default=default,
592
+ field_info=field_info,
593
+ )
594
+ )
595
+
596
+ return fields
597
+
598
+ def _extract_field_call_args(self, call: cst.Call) -> dict[str, Any]:
599
+ """Extract arguments from Field() call."""
600
+ result: dict[str, Any] = {}
601
+
602
+ for arg in call.args:
603
+ if arg.keyword:
604
+ key = arg.keyword.value
605
+ value = self._node_to_code(arg.value)
606
+ result[key] = value
607
+
608
+ return result
609
+
610
+ # =========================================================================
611
+ # Decorator Extraction
612
+ # =========================================================================
613
+
614
+ def _extract_decorators(self, decorators: Sequence[cst.Decorator]) -> list[ExtractedDecorator]:
615
+ """Extract decorator information."""
616
+ result: list[ExtractedDecorator] = []
617
+
618
+ for dec in decorators:
619
+ extracted = self._extract_single_decorator(dec)
620
+ if extracted:
621
+ result.append(extracted)
622
+
623
+ return result
624
+
625
+ def _extract_single_decorator(self, decorator: cst.Decorator) -> ExtractedDecorator | None:
626
+ """Extract a single decorator."""
627
+ dec_node = decorator.decorator
628
+
629
+ # Handle @name
630
+ if isinstance(dec_node, cst.Name):
631
+ return ExtractedDecorator(
632
+ name=dec_node.value,
633
+ full_name=dec_node.value,
634
+ arguments={},
635
+ positional_args=[],
636
+ location=self._make_location(decorator),
637
+ raw_source=self._node_to_code(decorator),
638
+ )
639
+
640
+ # Handle @name(...)
641
+ if isinstance(dec_node, cst.Call):
642
+ func = dec_node.func
643
+
644
+ # Get name
645
+ if isinstance(func, cst.Name):
646
+ name = func.value
647
+ full_name = func.value
648
+ elif isinstance(func, cst.Attribute):
649
+ name = func.attr.value
650
+ full_name = self._node_to_code(func)
651
+ else:
652
+ return None
653
+
654
+ # Extract arguments
655
+ kwargs: dict[str, Any] = {}
656
+ positional: list[Any] = []
657
+
658
+ for arg in dec_node.args:
659
+ if arg.keyword:
660
+ # Keyword argument
661
+ key = arg.keyword.value
662
+ value = self._extract_literal_or_code(arg.value)
663
+ kwargs[key] = value
664
+ else:
665
+ # Positional argument
666
+ value = self._extract_literal_or_code(arg.value)
667
+ positional.append(value)
668
+
669
+ return ExtractedDecorator(
670
+ name=name,
671
+ full_name=full_name,
672
+ arguments=kwargs,
673
+ positional_args=positional,
674
+ location=self._make_location(decorator),
675
+ raw_source=self._node_to_code(decorator),
676
+ )
677
+
678
+ # Handle @obj.method or @obj.method(...)
679
+ if isinstance(dec_node, cst.Attribute):
680
+ full_name = self._node_to_code(dec_node)
681
+ name = dec_node.attr.value
682
+
683
+ return ExtractedDecorator(
684
+ name=name,
685
+ full_name=full_name,
686
+ arguments={},
687
+ positional_args=[],
688
+ location=self._make_location(decorator),
689
+ raw_source=self._node_to_code(decorator),
690
+ )
691
+
692
+ return None
693
+
694
+ # =========================================================================
695
+ # Call Extraction
696
+ # =========================================================================
697
+
698
+ def visit_Call(self, node: cst.Call) -> bool:
699
+ """Extract function calls."""
700
+ call = self._extract_call(node)
701
+ if call:
702
+ self.calls.append(call)
703
+ return True # Continue visiting nested calls
704
+
705
+ def _extract_call(self, node: cst.Call) -> ExtractedCall | None:
706
+ """Extract a function call."""
707
+ # Get callee expression
708
+ callee = self._node_to_code(node.func)
709
+
710
+ # Determine if method call and get receiver
711
+ is_method_call = False
712
+ receiver = None
713
+ if isinstance(node.func, cst.Attribute):
714
+ is_method_call = True
715
+ receiver = self._node_to_code(node.func.value)
716
+
717
+ # Extract arguments
718
+ arguments = self._extract_call_arguments(node.args)
719
+
720
+ # Get position
721
+ line, col, end_line, _ = self._get_node_position(node)
722
+
723
+ # Get enclosing function
724
+ in_function = self._current_function.qualified_name if self._current_function else None
725
+
726
+ return ExtractedCall(
727
+ callee=callee,
728
+ arguments=arguments,
729
+ line=line,
730
+ column=col,
731
+ end_line=end_line,
732
+ in_function=in_function,
733
+ is_method_call=is_method_call,
734
+ receiver=receiver,
735
+ # Control flow context
736
+ in_loop=self._in_loop > 0,
737
+ in_conditional=self._in_conditional > 0,
738
+ in_try=self._in_try,
739
+ in_except=self._in_except,
740
+ in_finally=self._in_finally,
741
+ in_with=self._in_with,
742
+ in_comprehension=self._in_comprehension,
743
+ loop_depth=self._in_loop,
744
+ conditional_depth=self._in_conditional,
745
+ )
746
+
747
+ def _extract_call_arguments(self, args: Sequence[cst.Arg]) -> list[ExtractedArgument]:
748
+ """Extract call arguments with full structural evidence."""
749
+ result: list[ExtractedArgument] = []
750
+ pos = 0
751
+
752
+ for arg in args:
753
+ keyword = None
754
+ position = None
755
+ is_starred = False
756
+ is_double_starred = False
757
+
758
+ if arg.keyword:
759
+ keyword = arg.keyword.value
760
+ elif arg.star == "":
761
+ position = pos
762
+ pos += 1
763
+
764
+ if arg.star == "*":
765
+ is_starred = True
766
+ elif arg.star == "**":
767
+ is_double_starred = True
768
+
769
+ value_source = self._node_to_code(arg.value)
770
+ literal_info = self._extract_literal_info(arg.value)
771
+ name_info = self._extract_name_info(arg.value)
772
+
773
+ is_fstr = detect_fstring(arg.value)
774
+ is_concat = detect_concatenation(arg.value)
775
+ is_fmt = detect_format_call(arg.value)
776
+ container = detect_container_type(arg.value)
777
+ src_vars = collect_name_nodes(arg.value)
778
+
779
+ is_call_res = isinstance(arg.value, cst.Call)
780
+ called_func = None
781
+ if is_call_res:
782
+ called_func = self._node_to_code(arg.value.func)
783
+
784
+ result.append(
785
+ ExtractedArgument(
786
+ position=position,
787
+ keyword=keyword,
788
+ value_source=value_source,
789
+ is_literal=literal_info[0],
790
+ literal_value=literal_info[1],
791
+ literal_type=literal_info[2],
792
+ is_name=name_info[0],
793
+ name_value=name_info[1],
794
+ is_starred=is_starred,
795
+ is_double_starred=is_double_starred,
796
+ is_string_interpolation=is_fstr,
797
+ is_concatenation=is_concat,
798
+ is_format_call=is_fmt,
799
+ container_type=container,
800
+ source_variables=src_vars,
801
+ is_call_result=is_call_res,
802
+ called_function=called_func,
803
+ )
804
+ )
805
+
806
+ return result
807
+
808
+ def _extract_literal_info(self, node: cst.BaseExpression) -> tuple[bool, Any, str | None]:
809
+ """Extract literal value if node is a literal."""
810
+ if isinstance(node, cst.Integer):
811
+ return True, int(node.value), "int"
812
+ if isinstance(node, cst.Float):
813
+ return True, float(node.value), "float"
814
+ if isinstance(node, (cst.SimpleString, cst.FormattedString, cst.ConcatenatedString)):
815
+ # Extract string value
816
+ try:
817
+ value = self._extract_string_value(node)
818
+ return True, value, "str"
819
+ except Exception:
820
+ return True, self._node_to_code(node), "str"
821
+ if isinstance(node, cst.Name):
822
+ if node.value == "True":
823
+ return True, True, "bool"
824
+ if node.value == "False":
825
+ return True, False, "bool"
826
+ if node.value == "None":
827
+ return True, None, "None"
828
+ if isinstance(node, cst.List):
829
+ return True, self._node_to_code(node), "list"
830
+ if isinstance(node, cst.Dict):
831
+ return True, self._node_to_code(node), "dict"
832
+ if isinstance(node, cst.Tuple):
833
+ return True, self._node_to_code(node), "tuple"
834
+
835
+ return False, None, None
836
+
837
+ def _extract_string_value(self, node: cst.BaseExpression) -> str:
838
+ """Extract the actual string value from a string node."""
839
+ if isinstance(node, cst.SimpleString):
840
+ # Remove quotes and handle escape sequences
841
+ raw = node.value
842
+ # Determine quote style
843
+ if raw.startswith('"""') or raw.startswith("'''"):
844
+ return raw[3:-3]
845
+ elif raw.startswith('"') or raw.startswith("'"):
846
+ return raw[1:-1]
847
+ # Handle prefixes like r"...", f"...", etc.
848
+ for i, c in enumerate(raw):
849
+ if c in "\"'":
850
+ if raw[i : i + 3] in ('"""', "'''"):
851
+ return raw[i + 3 : -3]
852
+ return raw[i + 1 : -1]
853
+ return self._node_to_code(node)
854
+
855
+ def _extract_name_info(self, node: cst.BaseExpression) -> tuple[bool, str | None]:
856
+ """Extract name if node is a simple name reference."""
857
+ if isinstance(node, cst.Name) and node.value not in {"True", "False", "None"}:
858
+ return True, node.value
859
+ return False, None
860
+
861
+ # =========================================================================
862
+ # Assignment Extraction
863
+ # =========================================================================
864
+
865
+ def visit_Assign(self, node: cst.Assign) -> bool:
866
+ """Extract simple assignments, including tuple unpacking."""
867
+ for target in node.targets:
868
+ # Handle tuple/list unpacking: a, b = value or [a, b] = value
869
+ if isinstance(target.target, (cst.Tuple, cst.List)):
870
+ self._extract_unpacking_assignment(target.target, node.value)
871
+ else:
872
+ assignment = self._extract_assignment(target.target, node.value)
873
+ if assignment:
874
+ self.assignments.append(assignment)
875
+ return True
876
+
877
+ def _extract_unpacking_assignment(
878
+ self,
879
+ target: cst.Tuple | cst.List,
880
+ value: cst.BaseExpression,
881
+ ) -> None:
882
+ """
883
+ Extract assignments from tuple/list unpacking.
884
+
885
+ Handles:
886
+ - a, b = 1, 2
887
+ - a, b = func()
888
+ - a, *rest, b = [1, 2, 3, 4]
889
+ """
890
+ # Get all target elements
891
+ elements = target.elements
892
+ value_source = self._node_to_code(value)
893
+
894
+ for i, element in enumerate(elements):
895
+ # Handle StarredElement for *rest patterns
896
+ if isinstance(element, cst.StarredElement):
897
+ inner = element.value
898
+ else:
899
+ inner = element.value if hasattr(element, "value") else element
900
+
901
+ # Extract the target name
902
+ target_name = self._extract_assignment_target(inner)
903
+ if not target_name:
904
+ continue
905
+
906
+ # Get position
907
+ line = self._get_line(inner)
908
+
909
+ # Get enclosing function
910
+ in_function = self._current_function.qualified_name if self._current_function else None
911
+
912
+ # Check if value is a call (for the whole assignment)
913
+ is_call = isinstance(value, cst.Call)
914
+ called_function = None
915
+ if is_call:
916
+ called_function = self._node_to_code(value.func)
917
+
918
+ source_vars, is_method, is_fstr = extract_value_metadata(value)
919
+ assignment = ExtractedAssignment(
920
+ target=target_name,
921
+ value_source=f"{value_source}[{i}]",
922
+ annotation=None,
923
+ line=line,
924
+ in_function=in_function,
925
+ is_literal=False,
926
+ is_call=is_call,
927
+ called_function=called_function,
928
+ is_name=False,
929
+ referenced_name=None,
930
+ source_variables=source_vars,
931
+ is_method_call=is_method,
932
+ is_string_interpolation=is_fstr,
933
+ )
934
+ self.assignments.append(assignment)
935
+
936
+ # Track module-level variables
937
+ if not self._current_function and not self._current_class:
938
+ self.module_variables.append(target_name)
939
+
940
+ def visit_AnnAssign(self, node: cst.AnnAssign) -> bool:
941
+ """Extract annotated assignments."""
942
+ annotation = self._node_to_code(node.annotation.annotation)
943
+ value = node.value
944
+
945
+ assignment = self._extract_assignment(node.target, value, annotation)
946
+ if assignment:
947
+ self.assignments.append(assignment)
948
+ return True
949
+
950
+ def _extract_assignment(
951
+ self,
952
+ target: cst.BaseExpression,
953
+ value: cst.BaseExpression | None,
954
+ annotation: str | None = None,
955
+ ) -> ExtractedAssignment | None:
956
+ """Extract an assignment."""
957
+ target_name = self._extract_assignment_target(target)
958
+ if not target_name:
959
+ return None
960
+
961
+ value_source = self._node_to_code(value) if value else ""
962
+
963
+ # Check if value is a call
964
+ is_call = isinstance(value, cst.Call) if value else False
965
+ called_function = None
966
+ if is_call and value:
967
+ called_function = self._node_to_code(value.func)
968
+
969
+ # Check if value is a name reference
970
+ is_name = isinstance(value, cst.Name) if value else False
971
+ referenced_name = value.value if is_name and isinstance(value, cst.Name) else None
972
+
973
+ # Check if literal
974
+ is_literal = False
975
+ if value:
976
+ is_literal, _, _ = self._extract_literal_info(value)
977
+
978
+ # Get position
979
+ line = self._get_line(target)
980
+
981
+ # Get enclosing function
982
+ in_function = self._current_function.qualified_name if self._current_function else None
983
+
984
+ # Track module-level variables
985
+ if not self._current_function and not self._current_class:
986
+ self.module_variables.append(target_name)
987
+
988
+ source_vars, is_method, is_fstr = extract_value_metadata(value)
989
+
990
+ return ExtractedAssignment(
991
+ target=target_name,
992
+ value_source=value_source,
993
+ annotation=annotation,
994
+ line=line,
995
+ in_function=in_function,
996
+ is_literal=is_literal,
997
+ is_call=is_call,
998
+ called_function=called_function,
999
+ is_name=is_name,
1000
+ referenced_name=referenced_name,
1001
+ source_variables=source_vars,
1002
+ is_method_call=is_method,
1003
+ is_string_interpolation=is_fstr,
1004
+ )
1005
+
1006
+ def _extract_assignment_target(self, target: cst.BaseExpression) -> str | None:
1007
+ """
1008
+ Extract the target name from an assignment target.
1009
+
1010
+ Handles:
1011
+ - Simple names: x = value
1012
+ - Attribute access: self.x = value, obj.attr = value
1013
+ - Subscript: d[key] = value (returns d)
1014
+ """
1015
+ # Simple name assignment: x = value
1016
+ if isinstance(target, cst.Name):
1017
+ return target.value
1018
+
1019
+ # Attribute assignment: self.x = value, obj.attr = value
1020
+ if isinstance(target, cst.Attribute):
1021
+ # Return the full dotted name
1022
+ return self._get_dotted_name(target)
1023
+
1024
+ # Subscript assignment: d[key] = value
1025
+ if isinstance(target, cst.Subscript):
1026
+ # Return the base name
1027
+ if isinstance(target.value, cst.Name):
1028
+ return target.value.value
1029
+ elif isinstance(target.value, cst.Attribute):
1030
+ return self._get_dotted_name(target.value)
1031
+
1032
+ return None
1033
+
1034
+ # =========================================================================
1035
+ # Control Flow Extraction
1036
+ # =========================================================================
1037
+
1038
+ def visit_If(self, node: cst.If) -> bool:
1039
+ """Track if statements."""
1040
+ self._in_conditional += 1
1041
+
1042
+ if self._current_function:
1043
+ start_line, _, end_line, _ = self._get_node_position(node)
1044
+
1045
+ # Detect elif and else branches
1046
+ has_elif = False
1047
+ has_else = False
1048
+ elif_lines: list[int] = []
1049
+ else_line: int | None = None
1050
+
1051
+ # Check for else/elif
1052
+ if node.orelse:
1053
+ if isinstance(node.orelse, cst.If):
1054
+ has_elif = True
1055
+ elif_line, _, _, _ = self._get_node_position(node.orelse)
1056
+ elif_lines.append(elif_line)
1057
+ elif isinstance(node.orelse, cst.Else):
1058
+ has_else = True
1059
+ else_line_pos, _, _, _ = self._get_node_position(node.orelse)
1060
+ else_line = else_line_pos
1061
+
1062
+ block = ExtractedControlFlowBlock(
1063
+ block_type="if",
1064
+ start_line=start_line,
1065
+ end_line=end_line,
1066
+ has_elif=has_elif,
1067
+ has_else=has_else,
1068
+ elif_lines=elif_lines,
1069
+ else_line=else_line,
1070
+ )
1071
+ self._current_control_flow.append(block)
1072
+
1073
+ return True
1074
+
1075
+ def leave_If(self, node: cst.If) -> None:
1076
+ """Leave if statement."""
1077
+ self._in_conditional = max(0, self._in_conditional - 1)
1078
+
1079
+ def visit_IfExp(self, node: cst.IfExp) -> bool:
1080
+ """Track ternary/conditional expressions (x if cond else y)."""
1081
+ self._in_conditional += 1
1082
+
1083
+ if self._current_function:
1084
+ line, _, end_line, _ = self._get_node_position(node)
1085
+ block = ExtractedControlFlowBlock(
1086
+ block_type="if",
1087
+ start_line=line,
1088
+ end_line=end_line or line,
1089
+ )
1090
+ self._current_control_flow.append(block)
1091
+
1092
+ return True
1093
+
1094
+ def leave_IfExp(self, node: cst.IfExp) -> None:
1095
+ """Leave ternary/conditional expression."""
1096
+ self._in_conditional = max(0, self._in_conditional - 1)
1097
+
1098
+ def visit_For(self, node: cst.For) -> bool:
1099
+ """Track for loops."""
1100
+ self._in_loop += 1
1101
+
1102
+ if self._current_function:
1103
+ start_line, _, end_line, _ = self._get_node_position(node)
1104
+ block = ExtractedControlFlowBlock(
1105
+ block_type="for",
1106
+ start_line=start_line,
1107
+ end_line=end_line,
1108
+ has_break=False, # Will be updated by visit_Break
1109
+ has_continue=False,
1110
+ )
1111
+ self._current_control_flow.append(block)
1112
+
1113
+ return True
1114
+
1115
+ def leave_For(self, node: cst.For) -> None:
1116
+ """Leave for loop."""
1117
+ self._in_loop = max(0, self._in_loop - 1)
1118
+
1119
+ def visit_While(self, node: cst.While) -> bool:
1120
+ """Track while loops."""
1121
+ self._in_loop += 1
1122
+
1123
+ if self._current_function:
1124
+ start_line, _, end_line, _ = self._get_node_position(node)
1125
+ block = ExtractedControlFlowBlock(
1126
+ block_type="while",
1127
+ start_line=start_line,
1128
+ end_line=end_line,
1129
+ )
1130
+ self._current_control_flow.append(block)
1131
+
1132
+ return True
1133
+
1134
+ def leave_While(self, node: cst.While) -> None:
1135
+ """Leave while loop."""
1136
+ self._in_loop = max(0, self._in_loop - 1)
1137
+
1138
+ def visit_Try(self, node: cst.Try) -> bool:
1139
+ """Track try blocks."""
1140
+ self._in_try = True
1141
+
1142
+ if self._current_function:
1143
+ start_line, _, end_line, _ = self._get_node_position(node)
1144
+
1145
+ # Extract except blocks
1146
+ except_blocks: list[tuple[int, int]] = []
1147
+ for handler in node.handlers:
1148
+ exc_start, _, exc_end, _ = self._get_node_position(handler)
1149
+ except_blocks.append((exc_start, exc_end))
1150
+
1151
+ # Extract finally block
1152
+ finally_block: tuple[int, int] | None = None
1153
+ if node.finalbody:
1154
+ fin_start, _, fin_end, _ = self._get_node_position(node.finalbody)
1155
+ finally_block = (fin_start, fin_end)
1156
+
1157
+ block = ExtractedControlFlowBlock(
1158
+ block_type="try",
1159
+ start_line=start_line,
1160
+ end_line=end_line,
1161
+ except_blocks=except_blocks,
1162
+ finally_block=finally_block,
1163
+ )
1164
+ self._current_control_flow.append(block)
1165
+
1166
+ return True
1167
+
1168
+ def leave_Try(self, node: cst.Try) -> None:
1169
+ """Leave try block."""
1170
+ self._in_try = False
1171
+
1172
+ def visit_ExceptHandler(self, node: cst.ExceptHandler) -> bool:
1173
+ """Track except handlers."""
1174
+ self._in_except = True
1175
+ return True
1176
+
1177
+ def leave_ExceptHandler(self, node: cst.ExceptHandler) -> None:
1178
+ """Leave except handler."""
1179
+ self._in_except = False
1180
+
1181
+ def visit_Finally(self, node: cst.Finally) -> bool:
1182
+ """Track finally blocks."""
1183
+ self._in_finally = True
1184
+ return True
1185
+
1186
+ def leave_Finally(self, node: cst.Finally) -> None:
1187
+ """Leave finally block."""
1188
+ self._in_finally = False
1189
+
1190
+ def visit_With(self, node: cst.With) -> bool:
1191
+ """Track with blocks."""
1192
+ self._in_with = True
1193
+
1194
+ if self._current_function:
1195
+ start_line, _, end_line, _ = self._get_node_position(node)
1196
+
1197
+ # Extract all context-manager expressions
1198
+ context_expr = None
1199
+ with_items: list[str] = []
1200
+ if node.items:
1201
+ first_item = node.items[0]
1202
+ context_expr = self._node_to_code(first_item.item)
1203
+ for item in node.items:
1204
+ expr = self._node_to_code(item.item)
1205
+ if expr:
1206
+ with_items.append(expr)
1207
+
1208
+ block = ExtractedControlFlowBlock(
1209
+ block_type="with",
1210
+ start_line=start_line,
1211
+ end_line=end_line,
1212
+ context_expr=context_expr,
1213
+ with_items=with_items,
1214
+ )
1215
+ self._current_control_flow.append(block)
1216
+
1217
+ return True
1218
+
1219
+ def leave_With(self, node: cst.With) -> None:
1220
+ """Leave with block."""
1221
+ self._in_with = False
1222
+
1223
+ def visit_ListComp(self, node: cst.ListComp) -> bool:
1224
+ """Track list comprehensions."""
1225
+ self._in_comprehension = True
1226
+
1227
+ if self._current_function:
1228
+ line, _, _, _ = self._get_node_position(node)
1229
+ block = ExtractedControlFlowBlock(
1230
+ block_type="comprehension",
1231
+ start_line=line,
1232
+ end_line=line,
1233
+ )
1234
+ self._current_control_flow.append(block)
1235
+
1236
+ return True
1237
+
1238
+ def leave_ListComp(self, node: cst.ListComp) -> None:
1239
+ """Leave list comprehension."""
1240
+ self._in_comprehension = False
1241
+
1242
+ def visit_SetComp(self, node: cst.SetComp) -> bool:
1243
+ """Track set comprehensions."""
1244
+ self._in_comprehension = True
1245
+
1246
+ if self._current_function:
1247
+ line, _, end_line, _ = self._get_node_position(node)
1248
+ block = ExtractedControlFlowBlock(
1249
+ block_type="comprehension",
1250
+ start_line=line,
1251
+ end_line=end_line or line,
1252
+ )
1253
+ self._current_control_flow.append(block)
1254
+
1255
+ return True
1256
+
1257
+ def leave_SetComp(self, node: cst.SetComp) -> None:
1258
+ """Leave set comprehension."""
1259
+ self._in_comprehension = False
1260
+
1261
+ def visit_DictComp(self, node: cst.DictComp) -> bool:
1262
+ """Track dict comprehensions."""
1263
+ self._in_comprehension = True
1264
+
1265
+ if self._current_function:
1266
+ line, _, end_line, _ = self._get_node_position(node)
1267
+ block = ExtractedControlFlowBlock(
1268
+ block_type="comprehension",
1269
+ start_line=line,
1270
+ end_line=end_line or line,
1271
+ )
1272
+ self._current_control_flow.append(block)
1273
+
1274
+ return True
1275
+
1276
+ def leave_DictComp(self, node: cst.DictComp) -> None:
1277
+ """Leave dict comprehension."""
1278
+ self._in_comprehension = False
1279
+
1280
+ def visit_GeneratorExp(self, node: cst.GeneratorExp) -> bool:
1281
+ """Track generator expressions."""
1282
+ self._in_comprehension = True
1283
+
1284
+ if self._current_function:
1285
+ line, _, end_line, _ = self._get_node_position(node)
1286
+ block = ExtractedControlFlowBlock(
1287
+ block_type="comprehension",
1288
+ start_line=line,
1289
+ end_line=end_line or line,
1290
+ )
1291
+ self._current_control_flow.append(block)
1292
+
1293
+ return True
1294
+
1295
+ def leave_GeneratorExp(self, node: cst.GeneratorExp) -> None:
1296
+ """Leave generator expression."""
1297
+ self._in_comprehension = False
1298
+
1299
+ def visit_Yield(self, node: cst.Yield) -> bool:
1300
+ """Track yield statements and analyze what's being yielded."""
1301
+ if self._current_function:
1302
+ self._current_function.has_yield = True
1303
+
1304
+ # For generators, track what's yielded similarly to returns
1305
+ # We treat yields as contributing to the "return type" inference
1306
+ if node.value:
1307
+ ret_info = self._extract_yield_info(node)
1308
+ if ret_info:
1309
+ self._current_function.return_statements.append(ret_info)
1310
+
1311
+ return True # Continue to extract calls in yield expressions
1312
+
1313
+ def _extract_yield_info(self, node: cst.Yield) -> ExtractedReturn | None:
1314
+ """Extract detailed information about a yield statement."""
1315
+ if node.value is None:
1316
+ return None
1317
+
1318
+ line, _, _, _ = self._get_node_position(node)
1319
+ value = node.value
1320
+
1321
+ # Check for call yield: yield func()
1322
+ if isinstance(value, cst.Call):
1323
+ call_name = self._node_to_code(value.func)
1324
+ return ExtractedReturn(
1325
+ line=line,
1326
+ returns_call=True,
1327
+ call_name=call_name,
1328
+ expression_text=self._node_to_code(value),
1329
+ )
1330
+
1331
+ # Check for variable yield: yield x
1332
+ if isinstance(value, cst.Name):
1333
+ return ExtractedReturn(
1334
+ line=line,
1335
+ returns_variable=True,
1336
+ variable_name=value.value,
1337
+ )
1338
+
1339
+ # Check for literal yield
1340
+ is_literal, lit_value, lit_type = self._extract_literal_info(value)
1341
+ if is_literal:
1342
+ return ExtractedReturn(
1343
+ line=line,
1344
+ returns_literal=True,
1345
+ literal_value=lit_value,
1346
+ literal_type=lit_type,
1347
+ )
1348
+
1349
+ # Generic expression
1350
+ return ExtractedReturn(
1351
+ line=line,
1352
+ returns_expression=True,
1353
+ expression_text=self._node_to_code(value),
1354
+ )
1355
+
1356
+ def visit_Return(self, node: cst.Return) -> bool:
1357
+ """Track return statements and analyze what's being returned."""
1358
+ if self._current_function:
1359
+ self._current_function.has_return = True
1360
+
1361
+ # Extract detailed return info
1362
+ ret_info = self._extract_return_info(node)
1363
+ if ret_info:
1364
+ self._current_function.return_statements.append(ret_info)
1365
+
1366
+ return True # Continue to extract calls in return expressions
1367
+
1368
+ def _extract_return_info(self, node: cst.Return) -> ExtractedReturn:
1369
+ """Extract detailed information about a return statement."""
1370
+ line, _, _, _ = self._get_node_position(node)
1371
+
1372
+ # Check if returning None or bare return
1373
+ if node.value is None:
1374
+ return ExtractedReturn(
1375
+ line=line,
1376
+ returns_none=True,
1377
+ )
1378
+
1379
+ value = node.value
1380
+
1381
+ # Check for return None explicitly
1382
+ if isinstance(value, cst.Name) and value.value == "None":
1383
+ return ExtractedReturn(
1384
+ line=line,
1385
+ returns_none=True,
1386
+ )
1387
+
1388
+ # Check for call return: return func()
1389
+ if isinstance(value, cst.Call):
1390
+ call_name = self._node_to_code(value.func)
1391
+ return ExtractedReturn(
1392
+ line=line,
1393
+ returns_call=True,
1394
+ call_name=call_name,
1395
+ expression_text=self._node_to_code(value),
1396
+ )
1397
+
1398
+ # Check for variable return: return x
1399
+ if isinstance(value, cst.Name):
1400
+ return ExtractedReturn(
1401
+ line=line,
1402
+ returns_variable=True,
1403
+ variable_name=value.value,
1404
+ )
1405
+
1406
+ # Check for attribute return: return self.x or return obj.attr
1407
+ if isinstance(value, cst.Attribute):
1408
+ return ExtractedReturn(
1409
+ line=line,
1410
+ returns_variable=True,
1411
+ variable_name=self._get_dotted_name(value),
1412
+ )
1413
+
1414
+ # Check for literal return
1415
+ is_literal, lit_value, lit_type = self._extract_literal_info(value)
1416
+ if is_literal:
1417
+ return ExtractedReturn(
1418
+ line=line,
1419
+ returns_literal=True,
1420
+ literal_value=lit_value,
1421
+ literal_type=lit_type,
1422
+ )
1423
+
1424
+ # Check for comprehension return
1425
+ if isinstance(value, (cst.ListComp, cst.SetComp, cst.DictComp, cst.GeneratorExp)):
1426
+ return ExtractedReturn(
1427
+ line=line,
1428
+ returns_comprehension=True,
1429
+ expression_text=self._node_to_code(value),
1430
+ )
1431
+
1432
+ # Check for lambda return
1433
+ if isinstance(value, cst.Lambda):
1434
+ return ExtractedReturn(
1435
+ line=line,
1436
+ returns_lambda=True,
1437
+ expression_text=self._node_to_code(value),
1438
+ )
1439
+
1440
+ # Generic expression
1441
+ return ExtractedReturn(
1442
+ line=line,
1443
+ returns_expression=True,
1444
+ expression_text=self._node_to_code(value),
1445
+ )
1446
+
1447
+ # =========================================================================
1448
+ # Module-level Extraction
1449
+ # =========================================================================
1450
+
1451
+ def visit_Module(self, node: cst.Module) -> bool:
1452
+ """Extract module-level information."""
1453
+ # Extract module docstring
1454
+ if node.body:
1455
+ first_stmt = node.body[0]
1456
+ if isinstance(first_stmt, cst.SimpleStatementLine):
1457
+ if first_stmt.body and isinstance(first_stmt.body[0], cst.Expr):
1458
+ expr = first_stmt.body[0]
1459
+ if isinstance(expr.value, (cst.SimpleString, cst.ConcatenatedString)):
1460
+ self.module_docstring = self._extract_string_value(expr.value)
1461
+
1462
+ return True
1463
+
1464
+ # =========================================================================
1465
+ # Utility Methods
1466
+ # =========================================================================
1467
+
1468
+ def _node_to_code(self, node: cst.CSTNode | None) -> str:
1469
+ """Convert CST node back to source code."""
1470
+ if node is None:
1471
+ return ""
1472
+ try:
1473
+ return node.code if hasattr(node, "code") else cst.parse_module("").code_for_node(node)
1474
+ except Exception:
1475
+ # Fallback: use the module's code_for_node
1476
+ try:
1477
+ module = cst.parse_module("")
1478
+ return module.code_for_node(node)
1479
+ except Exception:
1480
+ return str(node)
1481
+
1482
+ def _get_dotted_name(self, node: cst.BaseExpression) -> str:
1483
+ """Get a dotted name from an expression (e.g., a.b.c)."""
1484
+ if isinstance(node, cst.Name):
1485
+ return node.value
1486
+ if isinstance(node, cst.Attribute):
1487
+ base = self._get_dotted_name(node.value)
1488
+ return f"{base}.{node.attr.value}"
1489
+ return self._node_to_code(node)
1490
+
1491
+ def _extract_docstring(self, body: cst.BaseSuite) -> str | None:
1492
+ """Extract docstring from function/class body."""
1493
+ if not isinstance(body, cst.IndentedBlock):
1494
+ return None
1495
+
1496
+ if not body.body:
1497
+ return None
1498
+
1499
+ first_stmt = body.body[0]
1500
+ if isinstance(first_stmt, cst.SimpleStatementLine):
1501
+ if first_stmt.body and isinstance(first_stmt.body[0], cst.Expr):
1502
+ expr = first_stmt.body[0]
1503
+ if isinstance(expr.value, (cst.SimpleString, cst.ConcatenatedString)):
1504
+ return self._extract_string_value(expr.value)
1505
+
1506
+ return None
1507
+
1508
+ def _extract_literal_or_code(self, node: cst.BaseExpression) -> Any:
1509
+ """Extract literal value or return source code."""
1510
+ is_lit, value, _ = self._extract_literal_info(node)
1511
+ if is_lit:
1512
+ return value
1513
+ return self._node_to_code(node)
1514
+
1515
+ def _make_location(self, node: cst.CSTNode) -> CodeLocation:
1516
+ """Make a CodeLocation from a node."""
1517
+ line, col, end_line, end_col = self._get_node_position(node)
1518
+ return CodeLocation(
1519
+ file=self._file_path or Path("unknown"),
1520
+ line=line,
1521
+ column=col,
1522
+ end_line=end_line,
1523
+ end_column=end_col,
1524
+ )
1525
+
1526
+ def _get_node_position(self, node: cst.CSTNode) -> tuple[int, int, int, int]:
1527
+ """Get position of a node."""
1528
+ if self._wrapper:
1529
+ try:
1530
+ pos = self._wrapper.resolve(cst.metadata.PositionProvider).get(node)
1531
+ if pos:
1532
+ return (
1533
+ pos.start.line,
1534
+ pos.start.column,
1535
+ pos.end.line,
1536
+ pos.end.column,
1537
+ )
1538
+ except Exception:
1539
+ pass
1540
+ return (0, 0, 0, 0)
1541
+
1542
+ def _get_line(self, node: cst.CSTNode) -> int:
1543
+ """Get just the line number of a node."""
1544
+ return self._get_node_position(node)[0]