apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,719 @@
1
+ """
2
+ Python parser using LibCST.
3
+
4
+ This module provides the main Python parsing interface that:
5
+ - Parses Python source files into CST
6
+ - Extracts structural information using visitors
7
+ - Resolves types and schemas
8
+ - Produces ParsedFile objects for analysis
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import time
15
+ from collections.abc import Iterator, Sequence
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING, ClassVar
18
+
19
+ import libcst as cst
20
+
21
+ from ...core.types import (
22
+ CodeLocation,
23
+ Language,
24
+ ParseError,
25
+ QualifiedName,
26
+ )
27
+ from ..base import (
28
+ BaseParser,
29
+ ParsedArgument,
30
+ ParsedAssignment,
31
+ ParsedCallSite,
32
+ ParsedClass,
33
+ ParsedDecorator,
34
+ ParsedField,
35
+ ParsedFile,
36
+ ParsedFunction,
37
+ ParsedImport,
38
+ ParsedParameter,
39
+ ParsedReturn,
40
+ ParserRegistry,
41
+ )
42
+ from .type_resolver import TypeResolver
43
+ from .visitors import PythonExtractor
44
+
45
+ if TYPE_CHECKING:
46
+ from .visitors import ExtractedClass, ExtractedField, ExtractedFunction
47
+
48
+
49
+ # =============================================================================
50
+ # Python Parser
51
+ # =============================================================================
52
+
53
+
54
+ class PythonParser(BaseParser):
55
+ """
56
+ Parser for Python source files using LibCST.
57
+
58
+ Handles:
59
+ - Full CST parsing with position tracking
60
+ - Function and class extraction
61
+ - Import analysis
62
+ - Call site extraction
63
+ - Assignment tracking
64
+ - Type resolution
65
+ """
66
+
67
+ LANGUAGE: ClassVar[Language] = Language.PYTHON
68
+ SUPPORTED_EXTENSIONS: ClassVar[frozenset[str]] = frozenset({".py", ".pyw", ".pyi"})
69
+
70
+ def __init__(self) -> None:
71
+ self._type_resolver = TypeResolver()
72
+
73
+ def parse_file(self, file_path: Path | str) -> ParsedFile:
74
+ """
75
+ Parse a Python source file.
76
+
77
+ Args:
78
+ file_path: Path to the Python file
79
+
80
+ Returns:
81
+ ParsedFile with all extracted information
82
+
83
+ Raises:
84
+ ParseError: If the file cannot be parsed
85
+ """
86
+ start_time = time.perf_counter()
87
+
88
+ # Ensure Path object
89
+ if isinstance(file_path, str):
90
+ file_path = Path(file_path)
91
+
92
+ try:
93
+ source = file_path.read_text(encoding="utf-8")
94
+ except UnicodeDecodeError:
95
+ # Try with different encodings
96
+ try:
97
+ source = file_path.read_text(encoding="latin-1")
98
+ except Exception as e:
99
+ return ParsedFile(
100
+ path=file_path,
101
+ language=Language.PYTHON,
102
+ success=False,
103
+ error=ParseError(str(e), file_path),
104
+ )
105
+ except Exception as e:
106
+ return ParsedFile(
107
+ path=file_path,
108
+ language=Language.PYTHON,
109
+ success=False,
110
+ error=ParseError(str(e), file_path),
111
+ )
112
+
113
+ result = self.parse_source(source, file_path)
114
+
115
+ # Calculate parse time
116
+ elapsed_ms = int((time.perf_counter() - start_time) * 1000)
117
+ result.parse_time_ms = elapsed_ms
118
+
119
+ return result
120
+
121
+ def parse_source(self, source: str, file_path: Path | None = None) -> ParsedFile:
122
+ """
123
+ Parse Python source code string.
124
+
125
+ Args:
126
+ source: Python source code
127
+ file_path: Optional file path for location info
128
+
129
+ Returns:
130
+ ParsedFile with all extracted information
131
+ """
132
+ path = file_path or Path("unknown.py")
133
+
134
+ # Infer module name from path
135
+ module_name = self._infer_module_name(path)
136
+
137
+ try:
138
+ # Parse with LibCST
139
+ tree = cst.parse_module(source)
140
+
141
+ # Wrap for metadata (position tracking)
142
+ wrapper = cst.MetadataWrapper(tree)
143
+
144
+ # Create extractor and visit
145
+ extractor = PythonExtractor(source, path, module_name)
146
+ extractor.set_metadata_wrapper(wrapper)
147
+
148
+ # Walk the tree
149
+ wrapper.visit(extractor)
150
+
151
+ # Convert extracted data to ParsedFile format
152
+ return self._build_parsed_file(
153
+ path=path,
154
+ source=source,
155
+ extractor=extractor,
156
+ module_name=module_name,
157
+ )
158
+
159
+ except cst.ParserSyntaxError as e:
160
+ # Syntax error in source
161
+ # LibCST exception attributes vary by version
162
+ line_num = getattr(e, "raw_line", None) or 0
163
+ col_num = getattr(e, "raw_column", None) or 0
164
+ msg = str(e)
165
+
166
+ return ParsedFile(
167
+ path=path,
168
+ language=Language.PYTHON,
169
+ success=False,
170
+ error=ParseError(
171
+ f"Syntax error: {msg}",
172
+ path,
173
+ line=line_num,
174
+ column=col_num,
175
+ ),
176
+ line_count=source.count("\n") + 1,
177
+ )
178
+ except Exception as e:
179
+ return ParsedFile(
180
+ path=path,
181
+ language=Language.PYTHON,
182
+ success=False,
183
+ error=ParseError(str(e), path),
184
+ line_count=source.count("\n") + 1,
185
+ )
186
+
187
+ def parse_files(self, file_paths: Sequence[Path]) -> Iterator[ParsedFile]:
188
+ """
189
+ Parse multiple Python files.
190
+
191
+ Yields ParsedFile for each file, including failures.
192
+ """
193
+ for path in file_paths:
194
+ if self.can_parse(path):
195
+ yield self.parse_file(path)
196
+
197
+ # =========================================================================
198
+ # Conversion Methods
199
+ # =========================================================================
200
+
201
+ def _build_parsed_file(
202
+ self,
203
+ path: Path,
204
+ source: str,
205
+ extractor: PythonExtractor,
206
+ module_name: str,
207
+ ) -> ParsedFile:
208
+ """Build ParsedFile from extraction results."""
209
+ # Convert imports
210
+ imports = [self._convert_import(imp) for imp in extractor.imports]
211
+
212
+ # Convert functions
213
+ functions = [self._convert_function(func, path) for func in extractor.functions]
214
+
215
+ # Convert classes
216
+ classes = [self._convert_class(cls, path) for cls in extractor.classes]
217
+
218
+ # Convert call sites
219
+ call_sites = [self._convert_call(call, path) for call in extractor.calls]
220
+
221
+ # Convert assignments (pass module_name for target_qualified_name)
222
+ assignments = [
223
+ self._convert_assignment(assign, path, module_name) for assign in extractor.assignments
224
+ ]
225
+
226
+ # Build symbol table
227
+ all_symbols = {}
228
+ for func in functions:
229
+ all_symbols[func.qualified_name.full] = func
230
+ for cls in classes:
231
+ all_symbols[cls.qualified_name.full] = cls
232
+ for method in cls.methods:
233
+ all_symbols[method.qualified_name.full] = method
234
+
235
+ return ParsedFile(
236
+ path=path,
237
+ language=Language.PYTHON,
238
+ success=True,
239
+ module_name=module_name,
240
+ module_docstring=extractor.module_docstring,
241
+ imports=imports,
242
+ functions=functions,
243
+ classes=classes,
244
+ all_symbols=all_symbols,
245
+ call_sites=call_sites,
246
+ assignments=assignments,
247
+ module_variables=extractor.module_variables,
248
+ line_count=source.count("\n") + 1,
249
+ )
250
+
251
+ def _convert_import(self, imp) -> ParsedImport:
252
+ """Convert extracted import to ParsedImport."""
253
+ names = [name for name, _ in imp.names] if imp.is_from_import else []
254
+
255
+ return ParsedImport(
256
+ module=imp.module,
257
+ names=names,
258
+ alias=imp.names[0][1] if imp.names and imp.names[0][1] else None,
259
+ is_relative=imp.is_relative,
260
+ relative_level=imp.relative_level,
261
+ location=CodeLocation(
262
+ file=Path("unknown"),
263
+ line=imp.line,
264
+ )
265
+ if imp.line
266
+ else None,
267
+ )
268
+
269
+ def _convert_function(self, func: ExtractedFunction, path: Path) -> ParsedFunction:
270
+ """Convert extracted function to ParsedFunction."""
271
+ # Convert parameters
272
+ params = [
273
+ ParsedParameter(
274
+ name=p.name,
275
+ type_annotation=p.annotation,
276
+ default_value=p.default,
277
+ is_variadic=p.is_variadic,
278
+ is_keyword_variadic=p.is_keyword_variadic,
279
+ )
280
+ for p in func.parameters
281
+ ]
282
+
283
+ # Convert decorators
284
+ decorators = [
285
+ ParsedDecorator(
286
+ name=d.name,
287
+ qualified_name=QualifiedName(module="", name=d.full_name),
288
+ arguments=d.arguments,
289
+ positional_args=d.positional_args,
290
+ location=d.location,
291
+ )
292
+ for d in func.decorators
293
+ ]
294
+
295
+ # Build qualified name
296
+ parts = func.qualified_name.rsplit(".", 1)
297
+ module = parts[0] if len(parts) > 1 else ""
298
+ name = parts[-1]
299
+
300
+ # Convert return statements
301
+ return_statements = [
302
+ ParsedReturn(
303
+ line=r.line,
304
+ returns_none=r.returns_none,
305
+ returns_call=r.returns_call,
306
+ returns_variable=r.returns_variable,
307
+ returns_literal=r.returns_literal,
308
+ returns_expression=r.returns_expression,
309
+ returns_lambda=getattr(r, "returns_lambda", False),
310
+ returns_comprehension=getattr(r, "returns_comprehension", False),
311
+ call_name=r.call_name,
312
+ variable_name=r.variable_name,
313
+ literal_type=r.literal_type,
314
+ expression_text=r.expression_text,
315
+ )
316
+ for r in getattr(func, "return_statements", [])
317
+ ]
318
+
319
+ return ParsedFunction(
320
+ name=func.name,
321
+ qualified_name=QualifiedName(module=module, name=name),
322
+ location=CodeLocation(
323
+ file=path,
324
+ line=func.line,
325
+ column=func.column,
326
+ end_line=func.end_line,
327
+ ),
328
+ parameters=params,
329
+ return_type=func.return_annotation,
330
+ decorators=decorators,
331
+ is_async=func.is_async,
332
+ binding=func.binding,
333
+ owner_type=func.owner_type,
334
+ docstring=func.docstring,
335
+ body_line_count=func.end_line - func.line if func.end_line > func.line else 1,
336
+ body_source=func.body_source,
337
+ has_yield=getattr(func, "has_yield", False),
338
+ has_return=getattr(func, "has_return", False),
339
+ local_variables=func.local_variables,
340
+ control_flow_info=getattr(func, "control_flow_info", {}),
341
+ return_statements=return_statements,
342
+ )
343
+
344
+ def _convert_class(self, cls: ExtractedClass, path: Path) -> ParsedClass:
345
+ """Convert extracted class to ParsedClass."""
346
+ # Convert methods
347
+ methods = [self._convert_function(method, path) for method in cls.methods]
348
+
349
+ # Convert decorators
350
+ decorators = [
351
+ ParsedDecorator(
352
+ name=d.name,
353
+ qualified_name=QualifiedName(module="", name=d.full_name),
354
+ arguments=d.arguments,
355
+ positional_args=d.positional_args,
356
+ location=d.location,
357
+ )
358
+ for d in cls.decorators
359
+ ]
360
+
361
+ # Convert fields (for Pydantic models and dataclasses)
362
+ fields = [self._convert_field(f, path) for f in cls.fields]
363
+
364
+ # Build qualified name
365
+ parts = cls.qualified_name.rsplit(".", 1)
366
+ module = parts[0] if len(parts) > 1 else ""
367
+ name = parts[-1]
368
+
369
+ # Detect if this is a Pydantic model
370
+ is_pydantic = cls.is_pydantic_model or any(
371
+ base in {"BaseModel", "BaseSettings", "pydantic.BaseModel", "pydantic.BaseSettings"}
372
+ for base in cls.bases
373
+ )
374
+
375
+ return ParsedClass(
376
+ name=cls.name,
377
+ qualified_name=QualifiedName(module=module, name=name),
378
+ location=CodeLocation(
379
+ file=path,
380
+ line=cls.line,
381
+ column=cls.column,
382
+ end_line=cls.end_line,
383
+ ),
384
+ base_classes=cls.bases,
385
+ decorators=decorators,
386
+ fields=fields,
387
+ methods=methods,
388
+ class_variables=cls.class_variables,
389
+ instance_variables=cls.instance_variables,
390
+ docstring=cls.docstring,
391
+ is_dataclass=cls.is_dataclass,
392
+ is_pydantic_model=is_pydantic,
393
+ )
394
+
395
+ def _convert_field(self, f: ExtractedField, path: Path) -> ParsedField:
396
+ """Convert extracted field to ParsedField."""
397
+ # Extract constraints from field_info
398
+ constraints = {}
399
+ for key in ["min_length", "max_length", "gt", "ge", "lt", "le", "regex", "pattern"]:
400
+ if key in f.field_info:
401
+ constraints[key] = f.field_info[key]
402
+
403
+ # Determine if required
404
+ is_required = f.default is None or f.default == "..."
405
+ if "default" in f.field_info:
406
+ is_required = False
407
+
408
+ return ParsedField(
409
+ name=f.name,
410
+ type_annotation=f.annotation,
411
+ default_value=f.default,
412
+ field_info=f.field_info,
413
+ is_required=is_required,
414
+ alias=f.field_info.get("alias"),
415
+ description=f.field_info.get("description"),
416
+ constraints=constraints,
417
+ )
418
+
419
+ def _convert_call(self, call, path: Path) -> ParsedCallSite:
420
+ """Convert extracted call to ParsedCallSite."""
421
+ # Convert arguments
422
+ arguments = [
423
+ ParsedArgument(
424
+ position=arg.position,
425
+ name=arg.keyword,
426
+ is_literal=arg.is_literal,
427
+ literal_value=arg.literal_value,
428
+ literal_type=arg.literal_type,
429
+ is_variable=arg.is_name,
430
+ variable_name=arg.name_value,
431
+ is_expression=not arg.is_literal and not arg.is_name,
432
+ expression_text=arg.value_source
433
+ if not arg.is_literal and not arg.is_name
434
+ else None,
435
+ is_call_result=getattr(arg, "is_call_result", False),
436
+ called_function=getattr(arg, "called_function", None),
437
+ is_spread=arg.is_starred,
438
+ is_keyword_spread=arg.is_double_starred,
439
+ is_string_interpolation=getattr(arg, "is_string_interpolation", False),
440
+ is_concatenation=getattr(arg, "is_concatenation", False),
441
+ is_format_call=getattr(arg, "is_format_call", False),
442
+ container_type=getattr(arg, "container_type", None),
443
+ source_variables=getattr(arg, "source_variables", None) or [],
444
+ )
445
+ for arg in call.arguments
446
+ ]
447
+
448
+ # Parse callee for qualified name
449
+ callee_qn = None
450
+ if "." in call.callee:
451
+ parts = call.callee.rsplit(".", 1)
452
+ callee_qn = QualifiedName(module=parts[0], name=parts[1])
453
+ else:
454
+ callee_qn = QualifiedName(module="", name=call.callee)
455
+
456
+ # Caller qualified name
457
+ caller_qn = None
458
+ if call.in_function:
459
+ parts = call.in_function.rsplit(".", 1)
460
+ caller_qn = QualifiedName(
461
+ module=parts[0] if len(parts) > 1 else "",
462
+ name=parts[-1],
463
+ )
464
+
465
+ return ParsedCallSite(
466
+ callee_name=call.callee,
467
+ callee_qualified_name=callee_qn,
468
+ callee_resolved=False, # Will be resolved later in analysis
469
+ location=CodeLocation(
470
+ file=path,
471
+ line=call.line,
472
+ column=call.column,
473
+ end_line=call.end_line or None,
474
+ ),
475
+ caller_function=caller_qn,
476
+ arguments=arguments,
477
+ is_method_call=call.is_method_call,
478
+ receiver_expression=call.receiver,
479
+ # Control flow context
480
+ in_loop=getattr(call, "in_loop", False),
481
+ in_conditional=getattr(call, "in_conditional", False),
482
+ in_try=getattr(call, "in_try", False),
483
+ in_except=getattr(call, "in_except", False),
484
+ in_finally=getattr(call, "in_finally", False),
485
+ in_with=getattr(call, "in_with", False),
486
+ in_comprehension=getattr(call, "in_comprehension", False),
487
+ loop_depth=getattr(call, "loop_depth", 0),
488
+ conditional_depth=getattr(call, "conditional_depth", 0),
489
+ )
490
+
491
+ def _convert_assignment(self, assign, path: Path, module_name: str = "") -> ParsedAssignment:
492
+ """Convert extracted assignment to ParsedAssignment."""
493
+ in_function_str = (
494
+ getattr(assign.in_function, "full", str(assign.in_function))
495
+ if getattr(assign, "in_function", None) is not None
496
+ else None
497
+ )
498
+ # Build target_qualified_name so flow-sensitive CFG can attribute
499
+ # assignments to functions (value-flow / variable_derives_from).
500
+ target_qualified_name = self._assignment_target_qualified_name(
501
+ assign.target,
502
+ in_function_str,
503
+ module_name,
504
+ )
505
+ return ParsedAssignment(
506
+ target=assign.target,
507
+ location=CodeLocation(
508
+ file=path,
509
+ line=assign.line,
510
+ ),
511
+ source_type=(
512
+ "literal"
513
+ if assign.is_literal
514
+ else "call"
515
+ if assign.is_call
516
+ else "variable"
517
+ if assign.is_name
518
+ else "expression"
519
+ ),
520
+ source_value=assign.value_source,
521
+ source_call=assign.called_function if assign.is_call else None,
522
+ in_function=in_function_str,
523
+ type_annotation=assign.annotation,
524
+ target_qualified_name=target_qualified_name,
525
+ source_variables=getattr(assign, "source_variables", None) or [],
526
+ is_method_call=getattr(assign, "is_method_call", False),
527
+ is_string_interpolation=getattr(assign, "is_string_interpolation", False),
528
+ )
529
+
530
+ def _assignment_target_qualified_name(
531
+ self,
532
+ target: str,
533
+ in_function: str | None,
534
+ module_name: str,
535
+ ) -> QualifiedName | None:
536
+ """
537
+ Build qualified name for an assignment target for flow-sensitive analysis.
538
+ - Inside a function: "module.func_name.target" so it starts with func qname.
539
+ - Module-level: "module_name.target" (two parts for module-level binding).
540
+ """
541
+ if in_function:
542
+ # Local assignment: full = "module.func.target"
543
+ if "." in in_function:
544
+ mod, func_part = in_function.rsplit(".", 1)
545
+ name = f"{func_part}.{target}"
546
+ else:
547
+ mod = ""
548
+ name = f"{in_function}.{target}"
549
+ return QualifiedName(module=mod, name=name)
550
+ if module_name:
551
+ return QualifiedName(module=module_name, name=target)
552
+ return None
553
+
554
+ # =========================================================================
555
+ # Helper Methods
556
+ # =========================================================================
557
+
558
+ def _infer_module_name(self, file_path: Path) -> str:
559
+ """
560
+ Infer Python module name from file path.
561
+
562
+ e.g., /project/src/app/routes/users.py -> app.routes.users
563
+ """
564
+ # Remove .py extension
565
+ stem = file_path.stem
566
+ if stem == "__init__":
567
+ # For __init__.py, use parent directory name
568
+ return file_path.parent.name
569
+
570
+ # Try to build module path from directory structure
571
+ parts = []
572
+ current = file_path.parent
573
+
574
+ # Walk up looking for __init__.py or until we hit a non-package directory
575
+ while current.name:
576
+ init_file = current / "__init__.py"
577
+ if not init_file.exists():
578
+ break
579
+ parts.append(current.name)
580
+ current = current.parent
581
+
582
+ parts.reverse()
583
+ parts.append(stem)
584
+
585
+ return ".".join(parts) if parts else stem
586
+
587
+
588
+ # =============================================================================
589
+ # Parser Registration
590
+ # =============================================================================
591
+
592
+
593
+ # Create singleton parser instance
594
+ _python_parser = PythonParser()
595
+
596
+
597
+ def get_python_parser() -> PythonParser:
598
+ """Get the Python parser instance."""
599
+ return _python_parser
600
+
601
+
602
+ # Register with parser registry
603
+ ParserRegistry.register(_python_parser)
604
+
605
+
606
+ # =============================================================================
607
+ # Project-Level Parsing
608
+ # =============================================================================
609
+
610
+
611
+ def _parse_file_worker(path: Path) -> ParsedFile:
612
+ """Top-level worker function for ProcessPoolExecutor (must be picklable)."""
613
+ parser = PythonParser()
614
+ return parser.parse_file(path)
615
+
616
+
617
+ class PythonProjectParser:
618
+ """
619
+ Parser for entire Python projects.
620
+
621
+ Handles:
622
+ - Multi-file parsing (parallel when > PARALLEL_THRESHOLD files)
623
+ - Cross-file type resolution
624
+ - Import resolution
625
+ - Building complete project model
626
+ """
627
+
628
+ PARALLEL_THRESHOLD = 20
629
+
630
+ def __init__(self) -> None:
631
+ self._parser = PythonParser()
632
+ self._type_resolver = TypeResolver()
633
+ self._parsed_files: dict[Path, ParsedFile] = {}
634
+ self._all_classes: dict[str, ParsedClass] = {}
635
+ self._all_functions: dict[str, ParsedFunction] = {}
636
+
637
+ def parse_project(self, file_paths: list[Path]) -> dict[Path, ParsedFile]:
638
+ """
639
+ Parse all Python files in a project.
640
+
641
+ Uses ProcessPoolExecutor for parallelism on large projects.
642
+ """
643
+ import logging
644
+
645
+ logger = logging.getLogger(__name__)
646
+
647
+ if len(file_paths) >= self.PARALLEL_THRESHOLD:
648
+ self._parse_parallel(file_paths, logger)
649
+ else:
650
+ self._parse_serial(file_paths)
651
+
652
+ self._resolve_references()
653
+ return self._parsed_files
654
+
655
+ def _parse_serial(self, file_paths: list[Path]) -> None:
656
+ for path in file_paths:
657
+ parsed = self._parser.parse_file(path)
658
+ self._register_parsed(path, parsed)
659
+
660
+ def _parse_parallel(self, file_paths: list[Path], logger: logging.Logger) -> None:
661
+ import os
662
+ from concurrent.futures import ProcessPoolExecutor, as_completed
663
+
664
+ max_workers = min(os.cpu_count() or 1, 8)
665
+ logger.info("Parallel parsing %d files with %d workers", len(file_paths), max_workers)
666
+
667
+ try:
668
+ with ProcessPoolExecutor(max_workers=max_workers) as pool:
669
+ future_to_path = {
670
+ pool.submit(_parse_file_worker, path): path for path in file_paths
671
+ }
672
+ for future in as_completed(future_to_path):
673
+ path = future_to_path[future]
674
+ try:
675
+ parsed = future.result()
676
+ self._register_parsed(path, parsed)
677
+ except Exception as e:
678
+ logger.warning("Worker failed for %s: %s", path, e)
679
+ self._parsed_files[path] = ParsedFile(
680
+ path=path,
681
+ language=Language.PYTHON,
682
+ success=False,
683
+ error=str(e),
684
+ )
685
+ except Exception as e:
686
+ logger.warning("Parallel parsing failed, falling back to serial: %s", e)
687
+ self._parse_serial(file_paths)
688
+
689
+ def _register_parsed(self, path: Path, parsed: ParsedFile) -> None:
690
+ self._parsed_files[path] = parsed
691
+ if parsed.success:
692
+ for imp in parsed.imports:
693
+ self._type_resolver.add_import(imp)
694
+ for cls in parsed.classes:
695
+ self._all_classes[cls.qualified_name.full] = cls
696
+ self._type_resolver.add_class(cls, path)
697
+ for func in parsed.functions:
698
+ self._all_functions[func.qualified_name.full] = func
699
+
700
+ def _resolve_references(self) -> None:
701
+ """Resolve cross-file type references and imports."""
702
+ # This would involve:
703
+ # 1. Resolving import targets to actual definitions
704
+ # 2. Resolving type annotations to class definitions
705
+ # 3. Building inheritance hierarchies
706
+ # 4. Resolving function call targets
707
+ pass
708
+
709
+ def get_class(self, qualified_name: str) -> ParsedClass | None:
710
+ """Get a class by qualified name."""
711
+ return self._all_classes.get(qualified_name)
712
+
713
+ def get_function(self, qualified_name: str) -> ParsedFunction | None:
714
+ """Get a function by qualified name."""
715
+ return self._all_functions.get(qualified_name)
716
+
717
+ def get_type_resolver(self) -> TypeResolver:
718
+ """Get the type resolver with all registered types."""
719
+ return self._type_resolver