apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Python parser using LibCST.
|
|
3
|
+
|
|
4
|
+
This module provides the main Python parsing interface that:
|
|
5
|
+
- Parses Python source files into CST
|
|
6
|
+
- Extracts structural information using visitors
|
|
7
|
+
- Resolves types and schemas
|
|
8
|
+
- Produces ParsedFile objects for analysis
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import time
|
|
15
|
+
from collections.abc import Iterator, Sequence
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
18
|
+
|
|
19
|
+
import libcst as cst
|
|
20
|
+
|
|
21
|
+
from ...core.types import (
|
|
22
|
+
CodeLocation,
|
|
23
|
+
Language,
|
|
24
|
+
ParseError,
|
|
25
|
+
QualifiedName,
|
|
26
|
+
)
|
|
27
|
+
from ..base import (
|
|
28
|
+
BaseParser,
|
|
29
|
+
ParsedArgument,
|
|
30
|
+
ParsedAssignment,
|
|
31
|
+
ParsedCallSite,
|
|
32
|
+
ParsedClass,
|
|
33
|
+
ParsedDecorator,
|
|
34
|
+
ParsedField,
|
|
35
|
+
ParsedFile,
|
|
36
|
+
ParsedFunction,
|
|
37
|
+
ParsedImport,
|
|
38
|
+
ParsedParameter,
|
|
39
|
+
ParsedReturn,
|
|
40
|
+
ParserRegistry,
|
|
41
|
+
)
|
|
42
|
+
from .type_resolver import TypeResolver
|
|
43
|
+
from .visitors import PythonExtractor
|
|
44
|
+
|
|
45
|
+
if TYPE_CHECKING:
|
|
46
|
+
from .visitors import ExtractedClass, ExtractedField, ExtractedFunction
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# =============================================================================
|
|
50
|
+
# Python Parser
|
|
51
|
+
# =============================================================================
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class PythonParser(BaseParser):
|
|
55
|
+
"""
|
|
56
|
+
Parser for Python source files using LibCST.
|
|
57
|
+
|
|
58
|
+
Handles:
|
|
59
|
+
- Full CST parsing with position tracking
|
|
60
|
+
- Function and class extraction
|
|
61
|
+
- Import analysis
|
|
62
|
+
- Call site extraction
|
|
63
|
+
- Assignment tracking
|
|
64
|
+
- Type resolution
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
LANGUAGE: ClassVar[Language] = Language.PYTHON
|
|
68
|
+
SUPPORTED_EXTENSIONS: ClassVar[frozenset[str]] = frozenset({".py", ".pyw", ".pyi"})
|
|
69
|
+
|
|
70
|
+
def __init__(self) -> None:
|
|
71
|
+
self._type_resolver = TypeResolver()
|
|
72
|
+
|
|
73
|
+
def parse_file(self, file_path: Path | str) -> ParsedFile:
|
|
74
|
+
"""
|
|
75
|
+
Parse a Python source file.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
file_path: Path to the Python file
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
ParsedFile with all extracted information
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
ParseError: If the file cannot be parsed
|
|
85
|
+
"""
|
|
86
|
+
start_time = time.perf_counter()
|
|
87
|
+
|
|
88
|
+
# Ensure Path object
|
|
89
|
+
if isinstance(file_path, str):
|
|
90
|
+
file_path = Path(file_path)
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
source = file_path.read_text(encoding="utf-8")
|
|
94
|
+
except UnicodeDecodeError:
|
|
95
|
+
# Try with different encodings
|
|
96
|
+
try:
|
|
97
|
+
source = file_path.read_text(encoding="latin-1")
|
|
98
|
+
except Exception as e:
|
|
99
|
+
return ParsedFile(
|
|
100
|
+
path=file_path,
|
|
101
|
+
language=Language.PYTHON,
|
|
102
|
+
success=False,
|
|
103
|
+
error=ParseError(str(e), file_path),
|
|
104
|
+
)
|
|
105
|
+
except Exception as e:
|
|
106
|
+
return ParsedFile(
|
|
107
|
+
path=file_path,
|
|
108
|
+
language=Language.PYTHON,
|
|
109
|
+
success=False,
|
|
110
|
+
error=ParseError(str(e), file_path),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
result = self.parse_source(source, file_path)
|
|
114
|
+
|
|
115
|
+
# Calculate parse time
|
|
116
|
+
elapsed_ms = int((time.perf_counter() - start_time) * 1000)
|
|
117
|
+
result.parse_time_ms = elapsed_ms
|
|
118
|
+
|
|
119
|
+
return result
|
|
120
|
+
|
|
121
|
+
def parse_source(self, source: str, file_path: Path | None = None) -> ParsedFile:
|
|
122
|
+
"""
|
|
123
|
+
Parse Python source code string.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
source: Python source code
|
|
127
|
+
file_path: Optional file path for location info
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
ParsedFile with all extracted information
|
|
131
|
+
"""
|
|
132
|
+
path = file_path or Path("unknown.py")
|
|
133
|
+
|
|
134
|
+
# Infer module name from path
|
|
135
|
+
module_name = self._infer_module_name(path)
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
# Parse with LibCST
|
|
139
|
+
tree = cst.parse_module(source)
|
|
140
|
+
|
|
141
|
+
# Wrap for metadata (position tracking)
|
|
142
|
+
wrapper = cst.MetadataWrapper(tree)
|
|
143
|
+
|
|
144
|
+
# Create extractor and visit
|
|
145
|
+
extractor = PythonExtractor(source, path, module_name)
|
|
146
|
+
extractor.set_metadata_wrapper(wrapper)
|
|
147
|
+
|
|
148
|
+
# Walk the tree
|
|
149
|
+
wrapper.visit(extractor)
|
|
150
|
+
|
|
151
|
+
# Convert extracted data to ParsedFile format
|
|
152
|
+
return self._build_parsed_file(
|
|
153
|
+
path=path,
|
|
154
|
+
source=source,
|
|
155
|
+
extractor=extractor,
|
|
156
|
+
module_name=module_name,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
except cst.ParserSyntaxError as e:
|
|
160
|
+
# Syntax error in source
|
|
161
|
+
# LibCST exception attributes vary by version
|
|
162
|
+
line_num = getattr(e, "raw_line", None) or 0
|
|
163
|
+
col_num = getattr(e, "raw_column", None) or 0
|
|
164
|
+
msg = str(e)
|
|
165
|
+
|
|
166
|
+
return ParsedFile(
|
|
167
|
+
path=path,
|
|
168
|
+
language=Language.PYTHON,
|
|
169
|
+
success=False,
|
|
170
|
+
error=ParseError(
|
|
171
|
+
f"Syntax error: {msg}",
|
|
172
|
+
path,
|
|
173
|
+
line=line_num,
|
|
174
|
+
column=col_num,
|
|
175
|
+
),
|
|
176
|
+
line_count=source.count("\n") + 1,
|
|
177
|
+
)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
return ParsedFile(
|
|
180
|
+
path=path,
|
|
181
|
+
language=Language.PYTHON,
|
|
182
|
+
success=False,
|
|
183
|
+
error=ParseError(str(e), path),
|
|
184
|
+
line_count=source.count("\n") + 1,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def parse_files(self, file_paths: Sequence[Path]) -> Iterator[ParsedFile]:
|
|
188
|
+
"""
|
|
189
|
+
Parse multiple Python files.
|
|
190
|
+
|
|
191
|
+
Yields ParsedFile for each file, including failures.
|
|
192
|
+
"""
|
|
193
|
+
for path in file_paths:
|
|
194
|
+
if self.can_parse(path):
|
|
195
|
+
yield self.parse_file(path)
|
|
196
|
+
|
|
197
|
+
# =========================================================================
|
|
198
|
+
# Conversion Methods
|
|
199
|
+
# =========================================================================
|
|
200
|
+
|
|
201
|
+
def _build_parsed_file(
|
|
202
|
+
self,
|
|
203
|
+
path: Path,
|
|
204
|
+
source: str,
|
|
205
|
+
extractor: PythonExtractor,
|
|
206
|
+
module_name: str,
|
|
207
|
+
) -> ParsedFile:
|
|
208
|
+
"""Build ParsedFile from extraction results."""
|
|
209
|
+
# Convert imports
|
|
210
|
+
imports = [self._convert_import(imp) for imp in extractor.imports]
|
|
211
|
+
|
|
212
|
+
# Convert functions
|
|
213
|
+
functions = [self._convert_function(func, path) for func in extractor.functions]
|
|
214
|
+
|
|
215
|
+
# Convert classes
|
|
216
|
+
classes = [self._convert_class(cls, path) for cls in extractor.classes]
|
|
217
|
+
|
|
218
|
+
# Convert call sites
|
|
219
|
+
call_sites = [self._convert_call(call, path) for call in extractor.calls]
|
|
220
|
+
|
|
221
|
+
# Convert assignments (pass module_name for target_qualified_name)
|
|
222
|
+
assignments = [
|
|
223
|
+
self._convert_assignment(assign, path, module_name) for assign in extractor.assignments
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
# Build symbol table
|
|
227
|
+
all_symbols = {}
|
|
228
|
+
for func in functions:
|
|
229
|
+
all_symbols[func.qualified_name.full] = func
|
|
230
|
+
for cls in classes:
|
|
231
|
+
all_symbols[cls.qualified_name.full] = cls
|
|
232
|
+
for method in cls.methods:
|
|
233
|
+
all_symbols[method.qualified_name.full] = method
|
|
234
|
+
|
|
235
|
+
return ParsedFile(
|
|
236
|
+
path=path,
|
|
237
|
+
language=Language.PYTHON,
|
|
238
|
+
success=True,
|
|
239
|
+
module_name=module_name,
|
|
240
|
+
module_docstring=extractor.module_docstring,
|
|
241
|
+
imports=imports,
|
|
242
|
+
functions=functions,
|
|
243
|
+
classes=classes,
|
|
244
|
+
all_symbols=all_symbols,
|
|
245
|
+
call_sites=call_sites,
|
|
246
|
+
assignments=assignments,
|
|
247
|
+
module_variables=extractor.module_variables,
|
|
248
|
+
line_count=source.count("\n") + 1,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def _convert_import(self, imp) -> ParsedImport:
|
|
252
|
+
"""Convert extracted import to ParsedImport."""
|
|
253
|
+
names = [name for name, _ in imp.names] if imp.is_from_import else []
|
|
254
|
+
|
|
255
|
+
return ParsedImport(
|
|
256
|
+
module=imp.module,
|
|
257
|
+
names=names,
|
|
258
|
+
alias=imp.names[0][1] if imp.names and imp.names[0][1] else None,
|
|
259
|
+
is_relative=imp.is_relative,
|
|
260
|
+
relative_level=imp.relative_level,
|
|
261
|
+
location=CodeLocation(
|
|
262
|
+
file=Path("unknown"),
|
|
263
|
+
line=imp.line,
|
|
264
|
+
)
|
|
265
|
+
if imp.line
|
|
266
|
+
else None,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def _convert_function(self, func: ExtractedFunction, path: Path) -> ParsedFunction:
|
|
270
|
+
"""Convert extracted function to ParsedFunction."""
|
|
271
|
+
# Convert parameters
|
|
272
|
+
params = [
|
|
273
|
+
ParsedParameter(
|
|
274
|
+
name=p.name,
|
|
275
|
+
type_annotation=p.annotation,
|
|
276
|
+
default_value=p.default,
|
|
277
|
+
is_variadic=p.is_variadic,
|
|
278
|
+
is_keyword_variadic=p.is_keyword_variadic,
|
|
279
|
+
)
|
|
280
|
+
for p in func.parameters
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
# Convert decorators
|
|
284
|
+
decorators = [
|
|
285
|
+
ParsedDecorator(
|
|
286
|
+
name=d.name,
|
|
287
|
+
qualified_name=QualifiedName(module="", name=d.full_name),
|
|
288
|
+
arguments=d.arguments,
|
|
289
|
+
positional_args=d.positional_args,
|
|
290
|
+
location=d.location,
|
|
291
|
+
)
|
|
292
|
+
for d in func.decorators
|
|
293
|
+
]
|
|
294
|
+
|
|
295
|
+
# Build qualified name
|
|
296
|
+
parts = func.qualified_name.rsplit(".", 1)
|
|
297
|
+
module = parts[0] if len(parts) > 1 else ""
|
|
298
|
+
name = parts[-1]
|
|
299
|
+
|
|
300
|
+
# Convert return statements
|
|
301
|
+
return_statements = [
|
|
302
|
+
ParsedReturn(
|
|
303
|
+
line=r.line,
|
|
304
|
+
returns_none=r.returns_none,
|
|
305
|
+
returns_call=r.returns_call,
|
|
306
|
+
returns_variable=r.returns_variable,
|
|
307
|
+
returns_literal=r.returns_literal,
|
|
308
|
+
returns_expression=r.returns_expression,
|
|
309
|
+
returns_lambda=getattr(r, "returns_lambda", False),
|
|
310
|
+
returns_comprehension=getattr(r, "returns_comprehension", False),
|
|
311
|
+
call_name=r.call_name,
|
|
312
|
+
variable_name=r.variable_name,
|
|
313
|
+
literal_type=r.literal_type,
|
|
314
|
+
expression_text=r.expression_text,
|
|
315
|
+
)
|
|
316
|
+
for r in getattr(func, "return_statements", [])
|
|
317
|
+
]
|
|
318
|
+
|
|
319
|
+
return ParsedFunction(
|
|
320
|
+
name=func.name,
|
|
321
|
+
qualified_name=QualifiedName(module=module, name=name),
|
|
322
|
+
location=CodeLocation(
|
|
323
|
+
file=path,
|
|
324
|
+
line=func.line,
|
|
325
|
+
column=func.column,
|
|
326
|
+
end_line=func.end_line,
|
|
327
|
+
),
|
|
328
|
+
parameters=params,
|
|
329
|
+
return_type=func.return_annotation,
|
|
330
|
+
decorators=decorators,
|
|
331
|
+
is_async=func.is_async,
|
|
332
|
+
binding=func.binding,
|
|
333
|
+
owner_type=func.owner_type,
|
|
334
|
+
docstring=func.docstring,
|
|
335
|
+
body_line_count=func.end_line - func.line if func.end_line > func.line else 1,
|
|
336
|
+
body_source=func.body_source,
|
|
337
|
+
has_yield=getattr(func, "has_yield", False),
|
|
338
|
+
has_return=getattr(func, "has_return", False),
|
|
339
|
+
local_variables=func.local_variables,
|
|
340
|
+
control_flow_info=getattr(func, "control_flow_info", {}),
|
|
341
|
+
return_statements=return_statements,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
def _convert_class(self, cls: ExtractedClass, path: Path) -> ParsedClass:
|
|
345
|
+
"""Convert extracted class to ParsedClass."""
|
|
346
|
+
# Convert methods
|
|
347
|
+
methods = [self._convert_function(method, path) for method in cls.methods]
|
|
348
|
+
|
|
349
|
+
# Convert decorators
|
|
350
|
+
decorators = [
|
|
351
|
+
ParsedDecorator(
|
|
352
|
+
name=d.name,
|
|
353
|
+
qualified_name=QualifiedName(module="", name=d.full_name),
|
|
354
|
+
arguments=d.arguments,
|
|
355
|
+
positional_args=d.positional_args,
|
|
356
|
+
location=d.location,
|
|
357
|
+
)
|
|
358
|
+
for d in cls.decorators
|
|
359
|
+
]
|
|
360
|
+
|
|
361
|
+
# Convert fields (for Pydantic models and dataclasses)
|
|
362
|
+
fields = [self._convert_field(f, path) for f in cls.fields]
|
|
363
|
+
|
|
364
|
+
# Build qualified name
|
|
365
|
+
parts = cls.qualified_name.rsplit(".", 1)
|
|
366
|
+
module = parts[0] if len(parts) > 1 else ""
|
|
367
|
+
name = parts[-1]
|
|
368
|
+
|
|
369
|
+
# Detect if this is a Pydantic model
|
|
370
|
+
is_pydantic = cls.is_pydantic_model or any(
|
|
371
|
+
base in {"BaseModel", "BaseSettings", "pydantic.BaseModel", "pydantic.BaseSettings"}
|
|
372
|
+
for base in cls.bases
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
return ParsedClass(
|
|
376
|
+
name=cls.name,
|
|
377
|
+
qualified_name=QualifiedName(module=module, name=name),
|
|
378
|
+
location=CodeLocation(
|
|
379
|
+
file=path,
|
|
380
|
+
line=cls.line,
|
|
381
|
+
column=cls.column,
|
|
382
|
+
end_line=cls.end_line,
|
|
383
|
+
),
|
|
384
|
+
base_classes=cls.bases,
|
|
385
|
+
decorators=decorators,
|
|
386
|
+
fields=fields,
|
|
387
|
+
methods=methods,
|
|
388
|
+
class_variables=cls.class_variables,
|
|
389
|
+
instance_variables=cls.instance_variables,
|
|
390
|
+
docstring=cls.docstring,
|
|
391
|
+
is_dataclass=cls.is_dataclass,
|
|
392
|
+
is_pydantic_model=is_pydantic,
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
def _convert_field(self, f: ExtractedField, path: Path) -> ParsedField:
|
|
396
|
+
"""Convert extracted field to ParsedField."""
|
|
397
|
+
# Extract constraints from field_info
|
|
398
|
+
constraints = {}
|
|
399
|
+
for key in ["min_length", "max_length", "gt", "ge", "lt", "le", "regex", "pattern"]:
|
|
400
|
+
if key in f.field_info:
|
|
401
|
+
constraints[key] = f.field_info[key]
|
|
402
|
+
|
|
403
|
+
# Determine if required
|
|
404
|
+
is_required = f.default is None or f.default == "..."
|
|
405
|
+
if "default" in f.field_info:
|
|
406
|
+
is_required = False
|
|
407
|
+
|
|
408
|
+
return ParsedField(
|
|
409
|
+
name=f.name,
|
|
410
|
+
type_annotation=f.annotation,
|
|
411
|
+
default_value=f.default,
|
|
412
|
+
field_info=f.field_info,
|
|
413
|
+
is_required=is_required,
|
|
414
|
+
alias=f.field_info.get("alias"),
|
|
415
|
+
description=f.field_info.get("description"),
|
|
416
|
+
constraints=constraints,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
def _convert_call(self, call, path: Path) -> ParsedCallSite:
|
|
420
|
+
"""Convert extracted call to ParsedCallSite."""
|
|
421
|
+
# Convert arguments
|
|
422
|
+
arguments = [
|
|
423
|
+
ParsedArgument(
|
|
424
|
+
position=arg.position,
|
|
425
|
+
name=arg.keyword,
|
|
426
|
+
is_literal=arg.is_literal,
|
|
427
|
+
literal_value=arg.literal_value,
|
|
428
|
+
literal_type=arg.literal_type,
|
|
429
|
+
is_variable=arg.is_name,
|
|
430
|
+
variable_name=arg.name_value,
|
|
431
|
+
is_expression=not arg.is_literal and not arg.is_name,
|
|
432
|
+
expression_text=arg.value_source
|
|
433
|
+
if not arg.is_literal and not arg.is_name
|
|
434
|
+
else None,
|
|
435
|
+
is_call_result=getattr(arg, "is_call_result", False),
|
|
436
|
+
called_function=getattr(arg, "called_function", None),
|
|
437
|
+
is_spread=arg.is_starred,
|
|
438
|
+
is_keyword_spread=arg.is_double_starred,
|
|
439
|
+
is_string_interpolation=getattr(arg, "is_string_interpolation", False),
|
|
440
|
+
is_concatenation=getattr(arg, "is_concatenation", False),
|
|
441
|
+
is_format_call=getattr(arg, "is_format_call", False),
|
|
442
|
+
container_type=getattr(arg, "container_type", None),
|
|
443
|
+
source_variables=getattr(arg, "source_variables", None) or [],
|
|
444
|
+
)
|
|
445
|
+
for arg in call.arguments
|
|
446
|
+
]
|
|
447
|
+
|
|
448
|
+
# Parse callee for qualified name
|
|
449
|
+
callee_qn = None
|
|
450
|
+
if "." in call.callee:
|
|
451
|
+
parts = call.callee.rsplit(".", 1)
|
|
452
|
+
callee_qn = QualifiedName(module=parts[0], name=parts[1])
|
|
453
|
+
else:
|
|
454
|
+
callee_qn = QualifiedName(module="", name=call.callee)
|
|
455
|
+
|
|
456
|
+
# Caller qualified name
|
|
457
|
+
caller_qn = None
|
|
458
|
+
if call.in_function:
|
|
459
|
+
parts = call.in_function.rsplit(".", 1)
|
|
460
|
+
caller_qn = QualifiedName(
|
|
461
|
+
module=parts[0] if len(parts) > 1 else "",
|
|
462
|
+
name=parts[-1],
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
return ParsedCallSite(
|
|
466
|
+
callee_name=call.callee,
|
|
467
|
+
callee_qualified_name=callee_qn,
|
|
468
|
+
callee_resolved=False, # Will be resolved later in analysis
|
|
469
|
+
location=CodeLocation(
|
|
470
|
+
file=path,
|
|
471
|
+
line=call.line,
|
|
472
|
+
column=call.column,
|
|
473
|
+
end_line=call.end_line or None,
|
|
474
|
+
),
|
|
475
|
+
caller_function=caller_qn,
|
|
476
|
+
arguments=arguments,
|
|
477
|
+
is_method_call=call.is_method_call,
|
|
478
|
+
receiver_expression=call.receiver,
|
|
479
|
+
# Control flow context
|
|
480
|
+
in_loop=getattr(call, "in_loop", False),
|
|
481
|
+
in_conditional=getattr(call, "in_conditional", False),
|
|
482
|
+
in_try=getattr(call, "in_try", False),
|
|
483
|
+
in_except=getattr(call, "in_except", False),
|
|
484
|
+
in_finally=getattr(call, "in_finally", False),
|
|
485
|
+
in_with=getattr(call, "in_with", False),
|
|
486
|
+
in_comprehension=getattr(call, "in_comprehension", False),
|
|
487
|
+
loop_depth=getattr(call, "loop_depth", 0),
|
|
488
|
+
conditional_depth=getattr(call, "conditional_depth", 0),
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
def _convert_assignment(self, assign, path: Path, module_name: str = "") -> ParsedAssignment:
|
|
492
|
+
"""Convert extracted assignment to ParsedAssignment."""
|
|
493
|
+
in_function_str = (
|
|
494
|
+
getattr(assign.in_function, "full", str(assign.in_function))
|
|
495
|
+
if getattr(assign, "in_function", None) is not None
|
|
496
|
+
else None
|
|
497
|
+
)
|
|
498
|
+
# Build target_qualified_name so flow-sensitive CFG can attribute
|
|
499
|
+
# assignments to functions (value-flow / variable_derives_from).
|
|
500
|
+
target_qualified_name = self._assignment_target_qualified_name(
|
|
501
|
+
assign.target,
|
|
502
|
+
in_function_str,
|
|
503
|
+
module_name,
|
|
504
|
+
)
|
|
505
|
+
return ParsedAssignment(
|
|
506
|
+
target=assign.target,
|
|
507
|
+
location=CodeLocation(
|
|
508
|
+
file=path,
|
|
509
|
+
line=assign.line,
|
|
510
|
+
),
|
|
511
|
+
source_type=(
|
|
512
|
+
"literal"
|
|
513
|
+
if assign.is_literal
|
|
514
|
+
else "call"
|
|
515
|
+
if assign.is_call
|
|
516
|
+
else "variable"
|
|
517
|
+
if assign.is_name
|
|
518
|
+
else "expression"
|
|
519
|
+
),
|
|
520
|
+
source_value=assign.value_source,
|
|
521
|
+
source_call=assign.called_function if assign.is_call else None,
|
|
522
|
+
in_function=in_function_str,
|
|
523
|
+
type_annotation=assign.annotation,
|
|
524
|
+
target_qualified_name=target_qualified_name,
|
|
525
|
+
source_variables=getattr(assign, "source_variables", None) or [],
|
|
526
|
+
is_method_call=getattr(assign, "is_method_call", False),
|
|
527
|
+
is_string_interpolation=getattr(assign, "is_string_interpolation", False),
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
def _assignment_target_qualified_name(
|
|
531
|
+
self,
|
|
532
|
+
target: str,
|
|
533
|
+
in_function: str | None,
|
|
534
|
+
module_name: str,
|
|
535
|
+
) -> QualifiedName | None:
|
|
536
|
+
"""
|
|
537
|
+
Build qualified name for an assignment target for flow-sensitive analysis.
|
|
538
|
+
- Inside a function: "module.func_name.target" so it starts with func qname.
|
|
539
|
+
- Module-level: "module_name.target" (two parts for module-level binding).
|
|
540
|
+
"""
|
|
541
|
+
if in_function:
|
|
542
|
+
# Local assignment: full = "module.func.target"
|
|
543
|
+
if "." in in_function:
|
|
544
|
+
mod, func_part = in_function.rsplit(".", 1)
|
|
545
|
+
name = f"{func_part}.{target}"
|
|
546
|
+
else:
|
|
547
|
+
mod = ""
|
|
548
|
+
name = f"{in_function}.{target}"
|
|
549
|
+
return QualifiedName(module=mod, name=name)
|
|
550
|
+
if module_name:
|
|
551
|
+
return QualifiedName(module=module_name, name=target)
|
|
552
|
+
return None
|
|
553
|
+
|
|
554
|
+
# =========================================================================
|
|
555
|
+
# Helper Methods
|
|
556
|
+
# =========================================================================
|
|
557
|
+
|
|
558
|
+
def _infer_module_name(self, file_path: Path) -> str:
|
|
559
|
+
"""
|
|
560
|
+
Infer Python module name from file path.
|
|
561
|
+
|
|
562
|
+
e.g., /project/src/app/routes/users.py -> app.routes.users
|
|
563
|
+
"""
|
|
564
|
+
# Remove .py extension
|
|
565
|
+
stem = file_path.stem
|
|
566
|
+
if stem == "__init__":
|
|
567
|
+
# For __init__.py, use parent directory name
|
|
568
|
+
return file_path.parent.name
|
|
569
|
+
|
|
570
|
+
# Try to build module path from directory structure
|
|
571
|
+
parts = []
|
|
572
|
+
current = file_path.parent
|
|
573
|
+
|
|
574
|
+
# Walk up looking for __init__.py or until we hit a non-package directory
|
|
575
|
+
while current.name:
|
|
576
|
+
init_file = current / "__init__.py"
|
|
577
|
+
if not init_file.exists():
|
|
578
|
+
break
|
|
579
|
+
parts.append(current.name)
|
|
580
|
+
current = current.parent
|
|
581
|
+
|
|
582
|
+
parts.reverse()
|
|
583
|
+
parts.append(stem)
|
|
584
|
+
|
|
585
|
+
return ".".join(parts) if parts else stem
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
# =============================================================================
|
|
589
|
+
# Parser Registration
|
|
590
|
+
# =============================================================================
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
# Create singleton parser instance
|
|
594
|
+
_python_parser = PythonParser()
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def get_python_parser() -> PythonParser:
|
|
598
|
+
"""Get the Python parser instance."""
|
|
599
|
+
return _python_parser
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
# Register with parser registry
|
|
603
|
+
ParserRegistry.register(_python_parser)
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
# =============================================================================
|
|
607
|
+
# Project-Level Parsing
|
|
608
|
+
# =============================================================================
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
def _parse_file_worker(path: Path) -> ParsedFile:
|
|
612
|
+
"""Top-level worker function for ProcessPoolExecutor (must be picklable)."""
|
|
613
|
+
parser = PythonParser()
|
|
614
|
+
return parser.parse_file(path)
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
class PythonProjectParser:
|
|
618
|
+
"""
|
|
619
|
+
Parser for entire Python projects.
|
|
620
|
+
|
|
621
|
+
Handles:
|
|
622
|
+
- Multi-file parsing (parallel when > PARALLEL_THRESHOLD files)
|
|
623
|
+
- Cross-file type resolution
|
|
624
|
+
- Import resolution
|
|
625
|
+
- Building complete project model
|
|
626
|
+
"""
|
|
627
|
+
|
|
628
|
+
PARALLEL_THRESHOLD = 20
|
|
629
|
+
|
|
630
|
+
def __init__(self) -> None:
|
|
631
|
+
self._parser = PythonParser()
|
|
632
|
+
self._type_resolver = TypeResolver()
|
|
633
|
+
self._parsed_files: dict[Path, ParsedFile] = {}
|
|
634
|
+
self._all_classes: dict[str, ParsedClass] = {}
|
|
635
|
+
self._all_functions: dict[str, ParsedFunction] = {}
|
|
636
|
+
|
|
637
|
+
def parse_project(self, file_paths: list[Path]) -> dict[Path, ParsedFile]:
|
|
638
|
+
"""
|
|
639
|
+
Parse all Python files in a project.
|
|
640
|
+
|
|
641
|
+
Uses ProcessPoolExecutor for parallelism on large projects.
|
|
642
|
+
"""
|
|
643
|
+
import logging
|
|
644
|
+
|
|
645
|
+
logger = logging.getLogger(__name__)
|
|
646
|
+
|
|
647
|
+
if len(file_paths) >= self.PARALLEL_THRESHOLD:
|
|
648
|
+
self._parse_parallel(file_paths, logger)
|
|
649
|
+
else:
|
|
650
|
+
self._parse_serial(file_paths)
|
|
651
|
+
|
|
652
|
+
self._resolve_references()
|
|
653
|
+
return self._parsed_files
|
|
654
|
+
|
|
655
|
+
def _parse_serial(self, file_paths: list[Path]) -> None:
|
|
656
|
+
for path in file_paths:
|
|
657
|
+
parsed = self._parser.parse_file(path)
|
|
658
|
+
self._register_parsed(path, parsed)
|
|
659
|
+
|
|
660
|
+
def _parse_parallel(self, file_paths: list[Path], logger: logging.Logger) -> None:
|
|
661
|
+
import os
|
|
662
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
663
|
+
|
|
664
|
+
max_workers = min(os.cpu_count() or 1, 8)
|
|
665
|
+
logger.info("Parallel parsing %d files with %d workers", len(file_paths), max_workers)
|
|
666
|
+
|
|
667
|
+
try:
|
|
668
|
+
with ProcessPoolExecutor(max_workers=max_workers) as pool:
|
|
669
|
+
future_to_path = {
|
|
670
|
+
pool.submit(_parse_file_worker, path): path for path in file_paths
|
|
671
|
+
}
|
|
672
|
+
for future in as_completed(future_to_path):
|
|
673
|
+
path = future_to_path[future]
|
|
674
|
+
try:
|
|
675
|
+
parsed = future.result()
|
|
676
|
+
self._register_parsed(path, parsed)
|
|
677
|
+
except Exception as e:
|
|
678
|
+
logger.warning("Worker failed for %s: %s", path, e)
|
|
679
|
+
self._parsed_files[path] = ParsedFile(
|
|
680
|
+
path=path,
|
|
681
|
+
language=Language.PYTHON,
|
|
682
|
+
success=False,
|
|
683
|
+
error=str(e),
|
|
684
|
+
)
|
|
685
|
+
except Exception as e:
|
|
686
|
+
logger.warning("Parallel parsing failed, falling back to serial: %s", e)
|
|
687
|
+
self._parse_serial(file_paths)
|
|
688
|
+
|
|
689
|
+
def _register_parsed(self, path: Path, parsed: ParsedFile) -> None:
|
|
690
|
+
self._parsed_files[path] = parsed
|
|
691
|
+
if parsed.success:
|
|
692
|
+
for imp in parsed.imports:
|
|
693
|
+
self._type_resolver.add_import(imp)
|
|
694
|
+
for cls in parsed.classes:
|
|
695
|
+
self._all_classes[cls.qualified_name.full] = cls
|
|
696
|
+
self._type_resolver.add_class(cls, path)
|
|
697
|
+
for func in parsed.functions:
|
|
698
|
+
self._all_functions[func.qualified_name.full] = func
|
|
699
|
+
|
|
700
|
+
def _resolve_references(self) -> None:
|
|
701
|
+
"""Resolve cross-file type references and imports."""
|
|
702
|
+
# This would involve:
|
|
703
|
+
# 1. Resolving import targets to actual definitions
|
|
704
|
+
# 2. Resolving type annotations to class definitions
|
|
705
|
+
# 3. Building inheritance hierarchies
|
|
706
|
+
# 4. Resolving function call targets
|
|
707
|
+
pass
|
|
708
|
+
|
|
709
|
+
def get_class(self, qualified_name: str) -> ParsedClass | None:
|
|
710
|
+
"""Get a class by qualified name."""
|
|
711
|
+
return self._all_classes.get(qualified_name)
|
|
712
|
+
|
|
713
|
+
def get_function(self, qualified_name: str) -> ParsedFunction | None:
|
|
714
|
+
"""Get a function by qualified name."""
|
|
715
|
+
return self._all_functions.get(qualified_name)
|
|
716
|
+
|
|
717
|
+
def get_type_resolver(self) -> TypeResolver:
|
|
718
|
+
"""Get the type resolver with all registered types."""
|
|
719
|
+
return self._type_resolver
|