apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base classes and protocols for language-specific parsers.
|
|
3
|
+
|
|
4
|
+
Each language (Python, Java, etc.) implements the Parser protocol
|
|
5
|
+
to provide parsing capabilities. This module defines the contract
|
|
6
|
+
that all parsers must fulfill.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
15
|
+
|
|
16
|
+
from ..core.types import (
|
|
17
|
+
AnalysisNote,
|
|
18
|
+
CodeLocation,
|
|
19
|
+
Confidence,
|
|
20
|
+
Language,
|
|
21
|
+
ParseError,
|
|
22
|
+
QualifiedName,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from collections.abc import Iterator, Sequence
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# =============================================================================
|
|
30
|
+
# Parsed Symbol Types
|
|
31
|
+
# =============================================================================
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ParsedSymbol:
|
|
36
|
+
"""
|
|
37
|
+
Base class for all parsed symbols (functions, classes, variables).
|
|
38
|
+
|
|
39
|
+
Symbols are the named entities we extract from source code.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
name: str
|
|
43
|
+
qualified_name: QualifiedName
|
|
44
|
+
location: CodeLocation
|
|
45
|
+
|
|
46
|
+
# Documentation
|
|
47
|
+
docstring: str | None = None
|
|
48
|
+
|
|
49
|
+
# Analysis metadata
|
|
50
|
+
confidence: Confidence = Confidence.HIGH
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def file_path(self) -> Path | None:
|
|
54
|
+
"""File path where this symbol is defined (from location)."""
|
|
55
|
+
return self.location.file if self.location else None
|
|
56
|
+
|
|
57
|
+
notes: list[AnalysisNote] = field(default_factory=list)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class ParsedParameter:
|
|
62
|
+
"""A function/method parameter."""
|
|
63
|
+
|
|
64
|
+
name: str
|
|
65
|
+
type_annotation: str | None = None
|
|
66
|
+
default_value: str | None = None
|
|
67
|
+
is_variadic: bool = False # *args
|
|
68
|
+
is_keyword_variadic: bool = False # **kwargs
|
|
69
|
+
|
|
70
|
+
# Location of parameter in source
|
|
71
|
+
location: CodeLocation | None = None
|
|
72
|
+
|
|
73
|
+
# Additional metadata (framework-specific annotations, etc.)
|
|
74
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class ParsedDecorator:
|
|
79
|
+
"""A decorator/annotation on a function or class."""
|
|
80
|
+
|
|
81
|
+
name: str
|
|
82
|
+
qualified_name: QualifiedName | None = None
|
|
83
|
+
arguments: dict[str, Any] = field(default_factory=dict)
|
|
84
|
+
positional_args: list[Any] = field(default_factory=list)
|
|
85
|
+
location: CodeLocation | None = None
|
|
86
|
+
|
|
87
|
+
# Raw argument string for complex cases
|
|
88
|
+
raw_arguments: str | None = None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class ParsedFunction(ParsedSymbol):
|
|
93
|
+
"""
|
|
94
|
+
A parsed function or method definition.
|
|
95
|
+
|
|
96
|
+
This captures all information about a function that we need
|
|
97
|
+
for analysis: parameters, return type, decorators, body info.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
# Function signature
|
|
101
|
+
parameters: list[ParsedParameter] = field(default_factory=list)
|
|
102
|
+
return_type: str | None = None
|
|
103
|
+
|
|
104
|
+
# Decorators/annotations
|
|
105
|
+
decorators: list[ParsedDecorator] = field(default_factory=list)
|
|
106
|
+
|
|
107
|
+
# Function properties
|
|
108
|
+
is_async: bool = False
|
|
109
|
+
binding: str = "free" # "instance", "static", or "free"
|
|
110
|
+
is_abstract: bool = False
|
|
111
|
+
|
|
112
|
+
# For methods
|
|
113
|
+
owner_type: str | None = None
|
|
114
|
+
class_qualified_name: QualifiedName | None = None
|
|
115
|
+
|
|
116
|
+
# Body analysis
|
|
117
|
+
body_line_count: int = 0
|
|
118
|
+
body_source: str | None = None
|
|
119
|
+
has_yield: bool = False # Generator
|
|
120
|
+
has_return: bool = False
|
|
121
|
+
|
|
122
|
+
# Internal variables (for data flow)
|
|
123
|
+
local_variables: list[str] = field(default_factory=list)
|
|
124
|
+
|
|
125
|
+
# Control flow information for flow-sensitive analysis
|
|
126
|
+
control_flow_info: dict[str, Any] = field(default_factory=dict)
|
|
127
|
+
|
|
128
|
+
# Return statement analysis
|
|
129
|
+
return_statements: list[ParsedReturn] = field(default_factory=list)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class ParsedReturn:
|
|
134
|
+
"""A return (or yield) statement analysis."""
|
|
135
|
+
|
|
136
|
+
line: int
|
|
137
|
+
|
|
138
|
+
# What is being returned
|
|
139
|
+
returns_none: bool = False
|
|
140
|
+
returns_call: bool = False
|
|
141
|
+
returns_variable: bool = False
|
|
142
|
+
returns_literal: bool = False
|
|
143
|
+
returns_expression: bool = False
|
|
144
|
+
returns_lambda: bool = False
|
|
145
|
+
returns_comprehension: bool = False
|
|
146
|
+
|
|
147
|
+
# Details
|
|
148
|
+
call_name: str | None = None
|
|
149
|
+
variable_name: str | None = None
|
|
150
|
+
literal_type: str | None = None
|
|
151
|
+
expression_text: str | None = None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class ParsedField:
|
|
156
|
+
"""A class field (for Pydantic models, dataclasses, etc.)."""
|
|
157
|
+
|
|
158
|
+
name: str
|
|
159
|
+
type_annotation: str | None = None
|
|
160
|
+
# "public", "protected", "private", "internal", "protected internal", etc.
|
|
161
|
+
access_modifier: str = "public"
|
|
162
|
+
default_value: str | None = None
|
|
163
|
+
|
|
164
|
+
# Field() metadata
|
|
165
|
+
field_info: dict[str, Any] = field(default_factory=dict)
|
|
166
|
+
|
|
167
|
+
# For Pydantic
|
|
168
|
+
is_required: bool = True
|
|
169
|
+
alias: str | None = None
|
|
170
|
+
description: str | None = None
|
|
171
|
+
|
|
172
|
+
# Validation constraints
|
|
173
|
+
constraints: dict[str, Any] = field(default_factory=dict)
|
|
174
|
+
|
|
175
|
+
# Nested type reference
|
|
176
|
+
nested_model_name: str | None = None
|
|
177
|
+
|
|
178
|
+
location: CodeLocation | None = None
|
|
179
|
+
|
|
180
|
+
# Annotations / decorators on this field (e.g. @Value, @NotNull, @Column)
|
|
181
|
+
decorators: list[ParsedDecorator] = field(default_factory=list)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass
|
|
185
|
+
class ParsedClass(ParsedSymbol):
|
|
186
|
+
"""A parsed class definition."""
|
|
187
|
+
|
|
188
|
+
# Inheritance
|
|
189
|
+
base_classes: list[str] = field(default_factory=list)
|
|
190
|
+
base_class_qualified_names: list[QualifiedName] = field(default_factory=list)
|
|
191
|
+
|
|
192
|
+
# Decorators/annotations
|
|
193
|
+
decorators: list[ParsedDecorator] = field(default_factory=list)
|
|
194
|
+
|
|
195
|
+
# Class properties
|
|
196
|
+
is_abstract: bool = False
|
|
197
|
+
is_dataclass: bool = False
|
|
198
|
+
is_enum: bool = False
|
|
199
|
+
is_pydantic_model: bool = False # NEW: Track Pydantic models explicitly
|
|
200
|
+
|
|
201
|
+
# Fields (for Pydantic models and dataclasses)
|
|
202
|
+
fields: list[ParsedField] = field(default_factory=list) # NEW: Preserve field info
|
|
203
|
+
|
|
204
|
+
# Members
|
|
205
|
+
methods: list[ParsedFunction] = field(default_factory=list)
|
|
206
|
+
class_variables: list[str] = field(default_factory=list)
|
|
207
|
+
instance_variables: list[str] = field(default_factory=list)
|
|
208
|
+
|
|
209
|
+
# Nested classes
|
|
210
|
+
nested_classes: list[ParsedClass] = field(default_factory=list)
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def bases(self) -> list[str]:
|
|
214
|
+
"""Alias for base_classes (used by language_services/type_resolver)."""
|
|
215
|
+
return self.base_classes
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@dataclass
|
|
219
|
+
class ParsedImport:
|
|
220
|
+
"""An import statement."""
|
|
221
|
+
|
|
222
|
+
module: str
|
|
223
|
+
names: list[str] = field(default_factory=list) # Empty for "import module"
|
|
224
|
+
alias: str | None = None
|
|
225
|
+
is_relative: bool = False
|
|
226
|
+
relative_level: int = 0 # Number of dots in relative import
|
|
227
|
+
|
|
228
|
+
location: CodeLocation | None = None
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def is_from_import(self) -> bool:
|
|
232
|
+
"""Check if this is a 'from x import y' style import."""
|
|
233
|
+
return len(self.names) > 0
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@dataclass
|
|
237
|
+
class ParsedCallSite:
|
|
238
|
+
"""
|
|
239
|
+
A function/method call site.
|
|
240
|
+
|
|
241
|
+
This captures where functions are called and with what arguments.
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
# What is being called
|
|
245
|
+
callee_name: str
|
|
246
|
+
|
|
247
|
+
# Where the call is
|
|
248
|
+
location: CodeLocation
|
|
249
|
+
|
|
250
|
+
# Optional fields with defaults
|
|
251
|
+
callee_qualified_name: QualifiedName | None = None
|
|
252
|
+
callee_resolved: bool = False
|
|
253
|
+
caller_function: QualifiedName | None = None
|
|
254
|
+
|
|
255
|
+
# Arguments
|
|
256
|
+
arguments: list[ParsedArgument] = field(default_factory=list)
|
|
257
|
+
|
|
258
|
+
# Call context
|
|
259
|
+
is_method_call: bool = False
|
|
260
|
+
receiver_expression: str | None = None # The object before the dot
|
|
261
|
+
receiver_type: str | None = None
|
|
262
|
+
|
|
263
|
+
# Control flow context
|
|
264
|
+
in_loop: bool = False
|
|
265
|
+
in_conditional: bool = False
|
|
266
|
+
in_try: bool = False
|
|
267
|
+
in_except: bool = False
|
|
268
|
+
in_finally: bool = False
|
|
269
|
+
in_with: bool = False
|
|
270
|
+
in_comprehension: bool = False
|
|
271
|
+
loop_depth: int = 0
|
|
272
|
+
conditional_depth: int = 0
|
|
273
|
+
|
|
274
|
+
# If we couldn't resolve, possible targets
|
|
275
|
+
possible_callees: list[QualifiedName] = field(default_factory=list)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@dataclass
|
|
279
|
+
class ParsedArgument:
|
|
280
|
+
"""An argument in a function call."""
|
|
281
|
+
|
|
282
|
+
position: int | None = None
|
|
283
|
+
name: str | None = None # For keyword arguments
|
|
284
|
+
|
|
285
|
+
# Value information
|
|
286
|
+
is_literal: bool = False
|
|
287
|
+
literal_value: Any = None
|
|
288
|
+
literal_type: str | None = None # "str", "int", "float", "bool", "None"
|
|
289
|
+
|
|
290
|
+
is_variable: bool = False
|
|
291
|
+
variable_name: str | None = None
|
|
292
|
+
|
|
293
|
+
is_expression: bool = False
|
|
294
|
+
expression_text: str | None = None
|
|
295
|
+
|
|
296
|
+
# For complex expressions
|
|
297
|
+
is_call_result: bool = False
|
|
298
|
+
called_function: str | None = None
|
|
299
|
+
|
|
300
|
+
# Spread arguments (Python *args, JS spread, Java varargs)
|
|
301
|
+
is_spread: bool = False
|
|
302
|
+
# Keyword spread (Python **kwargs, JS object spread)
|
|
303
|
+
is_keyword_spread: bool = False
|
|
304
|
+
|
|
305
|
+
# Argument construction evidence (language-agnostic).
|
|
306
|
+
# Python f-strings, JS template literals, Kotlin string templates, C# interpolated strings.
|
|
307
|
+
is_string_interpolation: bool = False
|
|
308
|
+
# String concatenation with + operator (all languages)
|
|
309
|
+
is_concatenation: bool = False
|
|
310
|
+
# .format(), String.format(), fmt.Sprintf(), etc.
|
|
311
|
+
is_format_call: bool = False
|
|
312
|
+
container_type: str | None = None # "list", "tuple", "dict", "set", "array", "map"
|
|
313
|
+
source_variables: list[str] = field(default_factory=list)
|
|
314
|
+
|
|
315
|
+
# Attribute names applied directly to a lambda argument (e.g. [Authorize], [AllowAnonymous])
|
|
316
|
+
lambda_attribute_names: list[str] = field(default_factory=list)
|
|
317
|
+
|
|
318
|
+
# Lambda / anonymous function argument (C# Minimal API, Java lambdas, etc.)
|
|
319
|
+
# Each tuple is (type_annotation, parameter_name). Only populated when the
|
|
320
|
+
# argument is a typed lambda expression, not a bare variable reference.
|
|
321
|
+
lambda_parameter_types: list[tuple[str, str]] = field(default_factory=list)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@dataclass
|
|
325
|
+
class ParsedAssignment:
|
|
326
|
+
"""A variable assignment."""
|
|
327
|
+
|
|
328
|
+
target: str # Variable name
|
|
329
|
+
location: CodeLocation
|
|
330
|
+
|
|
331
|
+
# Source information
|
|
332
|
+
source_type: str # "literal", "variable", "call", "expression", "parameter"
|
|
333
|
+
|
|
334
|
+
# Optional fields
|
|
335
|
+
target_qualified_name: QualifiedName | None = None
|
|
336
|
+
source_value: str | None = None
|
|
337
|
+
source_call: str | None = None # If assigned from function call
|
|
338
|
+
|
|
339
|
+
# Enclosing function (for scoping: None = module level)
|
|
340
|
+
in_function: str | None = None
|
|
341
|
+
|
|
342
|
+
# Type information
|
|
343
|
+
type_annotation: str | None = None
|
|
344
|
+
inferred_type: str | None = None
|
|
345
|
+
|
|
346
|
+
# Variables in the RHS expression that the target value derives from.
|
|
347
|
+
# Language-agnostic: set by each language parser during extraction.
|
|
348
|
+
# Examples: "x = int(y)" → ["y"], "q = f'{a} {b}'" → ["a", "b"].
|
|
349
|
+
source_variables: list[str] = field(default_factory=list)
|
|
350
|
+
|
|
351
|
+
# True when the value is a method call on a variable (y = x.strip())
|
|
352
|
+
is_method_call: bool = False
|
|
353
|
+
|
|
354
|
+
# True when the value is a string interpolation (f-string, template literal, etc.)
|
|
355
|
+
is_string_interpolation: bool = False
|
|
356
|
+
|
|
357
|
+
@property
|
|
358
|
+
def value_source(self) -> str | None:
|
|
359
|
+
"""Alias for source_value (used by constant/path resolvers)."""
|
|
360
|
+
return self.source_value
|
|
361
|
+
|
|
362
|
+
@property
|
|
363
|
+
def is_literal(self) -> bool:
|
|
364
|
+
"""Whether the assignment source is a literal."""
|
|
365
|
+
return self.source_type == "literal"
|
|
366
|
+
|
|
367
|
+
@property
|
|
368
|
+
def is_call(self) -> bool:
|
|
369
|
+
"""Whether the assignment source is a function call."""
|
|
370
|
+
return self.source_type == "call"
|
|
371
|
+
|
|
372
|
+
@property
|
|
373
|
+
def is_name(self) -> bool:
|
|
374
|
+
"""Whether the assignment source is a variable reference."""
|
|
375
|
+
return self.source_type == "variable"
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# =============================================================================
|
|
379
|
+
# Parsed File
|
|
380
|
+
# =============================================================================
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
@dataclass
|
|
384
|
+
class ParsedFile:
|
|
385
|
+
"""
|
|
386
|
+
Result of parsing a single source file.
|
|
387
|
+
|
|
388
|
+
Contains all extracted information from the file.
|
|
389
|
+
"""
|
|
390
|
+
|
|
391
|
+
path: Path
|
|
392
|
+
language: Language
|
|
393
|
+
|
|
394
|
+
# Parse success
|
|
395
|
+
success: bool = True
|
|
396
|
+
error: ParseError | None = None
|
|
397
|
+
|
|
398
|
+
# Module info
|
|
399
|
+
module_name: str | None = None # Inferred module name
|
|
400
|
+
module_docstring: str | None = None
|
|
401
|
+
|
|
402
|
+
# Imports
|
|
403
|
+
imports: list[ParsedImport] = field(default_factory=list)
|
|
404
|
+
|
|
405
|
+
# Definitions
|
|
406
|
+
functions: list[ParsedFunction] = field(default_factory=list)
|
|
407
|
+
classes: list[ParsedClass] = field(default_factory=list)
|
|
408
|
+
|
|
409
|
+
# All symbols (including nested)
|
|
410
|
+
all_symbols: dict[str, ParsedSymbol] = field(default_factory=dict)
|
|
411
|
+
|
|
412
|
+
# Call sites
|
|
413
|
+
call_sites: list[ParsedCallSite] = field(default_factory=list)
|
|
414
|
+
|
|
415
|
+
# Assignments (for data flow)
|
|
416
|
+
assignments: list[ParsedAssignment] = field(default_factory=list)
|
|
417
|
+
|
|
418
|
+
# Module-level variables
|
|
419
|
+
module_variables: list[str] = field(default_factory=list)
|
|
420
|
+
|
|
421
|
+
# Analysis metadata
|
|
422
|
+
line_count: int = 0
|
|
423
|
+
parse_time_ms: int = 0
|
|
424
|
+
notes: list[AnalysisNote] = field(default_factory=list)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# =============================================================================
|
|
428
|
+
# Abstract Parser Base
|
|
429
|
+
# =============================================================================
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class BaseParser(ABC):
|
|
433
|
+
"""
|
|
434
|
+
Abstract base class for language-specific parsers.
|
|
435
|
+
|
|
436
|
+
Implementations must override all abstract methods.
|
|
437
|
+
This provides a common interface for all parsers while
|
|
438
|
+
allowing language-specific implementation details.
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
# Class-level constants to be overridden
|
|
442
|
+
LANGUAGE: ClassVar[Language]
|
|
443
|
+
SUPPORTED_EXTENSIONS: ClassVar[frozenset[str]]
|
|
444
|
+
|
|
445
|
+
@property
|
|
446
|
+
def language(self) -> Language:
|
|
447
|
+
"""The language this parser handles."""
|
|
448
|
+
return self.LANGUAGE
|
|
449
|
+
|
|
450
|
+
@property
|
|
451
|
+
def supported_extensions(self) -> frozenset[str]:
|
|
452
|
+
"""File extensions this parser handles."""
|
|
453
|
+
return self.SUPPORTED_EXTENSIONS
|
|
454
|
+
|
|
455
|
+
def can_parse(self, file_path: Path) -> bool:
|
|
456
|
+
"""Check if this parser can handle the given file."""
|
|
457
|
+
return file_path.suffix.lower() in self.supported_extensions
|
|
458
|
+
|
|
459
|
+
@abstractmethod
|
|
460
|
+
def parse_file(self, file_path: Path) -> ParsedFile:
|
|
461
|
+
"""
|
|
462
|
+
Parse a single file and return structured data.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
file_path: Path to the source file
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
ParsedFile containing all extracted information
|
|
469
|
+
|
|
470
|
+
Raises:
|
|
471
|
+
ParseError: If the file cannot be parsed
|
|
472
|
+
"""
|
|
473
|
+
...
|
|
474
|
+
|
|
475
|
+
def parse_files(self, file_paths: Sequence[Path]) -> Iterator[ParsedFile]:
|
|
476
|
+
"""
|
|
477
|
+
Parse multiple files, yielding results.
|
|
478
|
+
|
|
479
|
+
Default implementation parses sequentially.
|
|
480
|
+
Subclasses may override for parallel parsing.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
file_paths: Paths to source files
|
|
484
|
+
|
|
485
|
+
Yields:
|
|
486
|
+
ParsedFile for each successfully parsed file
|
|
487
|
+
"""
|
|
488
|
+
for path in file_paths:
|
|
489
|
+
if self.can_parse(path):
|
|
490
|
+
yield self.parse_file(path)
|
|
491
|
+
|
|
492
|
+
@abstractmethod
|
|
493
|
+
def parse_source(self, source: str, file_path: Path | None = None) -> ParsedFile:
|
|
494
|
+
"""
|
|
495
|
+
Parse source code string directly.
|
|
496
|
+
|
|
497
|
+
Useful for testing and for parsing code fragments.
|
|
498
|
+
|
|
499
|
+
Args:
|
|
500
|
+
source: Source code string
|
|
501
|
+
file_path: Optional path for location info
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
ParsedFile containing all extracted information
|
|
505
|
+
"""
|
|
506
|
+
...
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
# =============================================================================
|
|
510
|
+
# Parser Registry
|
|
511
|
+
# =============================================================================
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
class ParserRegistry:
|
|
515
|
+
"""
|
|
516
|
+
Registry of available parsers.
|
|
517
|
+
|
|
518
|
+
Parsers register themselves and the registry provides
|
|
519
|
+
lookup by language or file extension.
|
|
520
|
+
"""
|
|
521
|
+
|
|
522
|
+
_parsers: dict[Language, BaseParser] = {}
|
|
523
|
+
_extension_map: dict[str, Language] = {}
|
|
524
|
+
|
|
525
|
+
@classmethod
|
|
526
|
+
def register(cls, parser: BaseParser) -> None:
|
|
527
|
+
"""Register a parser."""
|
|
528
|
+
cls._parsers[parser.language] = parser
|
|
529
|
+
for ext in parser.supported_extensions:
|
|
530
|
+
cls._extension_map[ext] = parser.language
|
|
531
|
+
|
|
532
|
+
@classmethod
|
|
533
|
+
def get_parser(cls, language: Language) -> BaseParser | None:
|
|
534
|
+
"""Get parser for a language."""
|
|
535
|
+
return cls._parsers.get(language)
|
|
536
|
+
|
|
537
|
+
@classmethod
|
|
538
|
+
def get_parser_for_file(cls, file_path: Path) -> BaseParser | None:
|
|
539
|
+
"""Get parser for a file based on extension."""
|
|
540
|
+
ext = file_path.suffix.lower()
|
|
541
|
+
language = cls._extension_map.get(ext)
|
|
542
|
+
if language:
|
|
543
|
+
return cls._parsers.get(language)
|
|
544
|
+
return None
|
|
545
|
+
|
|
546
|
+
@classmethod
|
|
547
|
+
def supported_languages(cls) -> frozenset[Language]:
|
|
548
|
+
"""Get set of supported languages."""
|
|
549
|
+
return frozenset(cls._parsers.keys())
|
|
550
|
+
|
|
551
|
+
@classmethod
|
|
552
|
+
def supported_extensions(cls) -> frozenset[str]:
|
|
553
|
+
"""Get set of supported file extensions."""
|
|
554
|
+
return frozenset(cls._extension_map.keys())
|