apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,774 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Java parser using the javalang library.
|
|
3
|
+
|
|
4
|
+
Parses .java source files into the language-agnostic ParsedFile dataclass.
|
|
5
|
+
Extracts classes, methods, fields, imports, call sites, and assignments.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import time
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from ...core.types import (
|
|
16
|
+
CodeLocation,
|
|
17
|
+
Language,
|
|
18
|
+
ParseError,
|
|
19
|
+
QualifiedName,
|
|
20
|
+
)
|
|
21
|
+
from ..base import (
|
|
22
|
+
BaseParser,
|
|
23
|
+
ParsedArgument,
|
|
24
|
+
ParsedAssignment,
|
|
25
|
+
ParsedCallSite,
|
|
26
|
+
ParsedClass,
|
|
27
|
+
ParsedDecorator,
|
|
28
|
+
ParsedField,
|
|
29
|
+
ParsedFile,
|
|
30
|
+
ParsedFunction,
|
|
31
|
+
ParsedImport,
|
|
32
|
+
ParsedParameter,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# =============================================================================
|
|
39
|
+
# Java Extractor
|
|
40
|
+
# =============================================================================
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class JavaExtractor:
|
|
44
|
+
"""
|
|
45
|
+
Walks a javalang CompilationUnit and extracts ParsedFile data.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, file_path: Path, source: str, tree: Any) -> None:
|
|
49
|
+
self.file_path = file_path
|
|
50
|
+
self.source = source
|
|
51
|
+
self.tree = tree # javalang CompilationUnit
|
|
52
|
+
self.lines = source.splitlines()
|
|
53
|
+
|
|
54
|
+
def extract(self) -> ParsedFile:
|
|
55
|
+
|
|
56
|
+
start = time.perf_counter()
|
|
57
|
+
|
|
58
|
+
module_name = self._get_package_name()
|
|
59
|
+
imports = self._extract_imports()
|
|
60
|
+
classes, call_sites, assignments = self._extract_types(module_name)
|
|
61
|
+
|
|
62
|
+
# Flatten methods from all classes as top-level functions
|
|
63
|
+
functions: list[ParsedFunction] = []
|
|
64
|
+
for cls in classes:
|
|
65
|
+
functions.extend(cls.methods)
|
|
66
|
+
|
|
67
|
+
parse_time_ms = int((time.perf_counter() - start) * 1000)
|
|
68
|
+
|
|
69
|
+
return ParsedFile(
|
|
70
|
+
path=self.file_path,
|
|
71
|
+
language=Language.JAVA,
|
|
72
|
+
success=True,
|
|
73
|
+
module_name=module_name,
|
|
74
|
+
imports=imports,
|
|
75
|
+
classes=classes,
|
|
76
|
+
functions=functions,
|
|
77
|
+
call_sites=call_sites,
|
|
78
|
+
assignments=assignments,
|
|
79
|
+
line_count=len(self.lines),
|
|
80
|
+
parse_time_ms=parse_time_ms,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# -------------------------------------------------------------------------
|
|
84
|
+
# Package / module
|
|
85
|
+
# -------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
def _get_package_name(self) -> str | None:
|
|
88
|
+
pkg = getattr(self.tree, "package", None)
|
|
89
|
+
if pkg is None:
|
|
90
|
+
return None
|
|
91
|
+
name = getattr(pkg, "name", None)
|
|
92
|
+
return name
|
|
93
|
+
|
|
94
|
+
# -------------------------------------------------------------------------
|
|
95
|
+
# Imports
|
|
96
|
+
# -------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def _extract_imports(self) -> list[ParsedImport]:
|
|
99
|
+
imports: list[ParsedImport] = []
|
|
100
|
+
for imp in getattr(self.tree, "imports", None) or []:
|
|
101
|
+
path: str = imp.path or ""
|
|
102
|
+
wildcard: bool = bool(imp.wildcard)
|
|
103
|
+
pos = getattr(imp, "position", None)
|
|
104
|
+
location = self._make_location(pos)
|
|
105
|
+
|
|
106
|
+
if wildcard:
|
|
107
|
+
# removesuffix removes the exact ".*" trailer; rstrip(".*")
|
|
108
|
+
# would strip individual '.' and '*' characters and over-strip
|
|
109
|
+
# paths with multiple trailing dots.
|
|
110
|
+
module = path.removesuffix(".*") if path.endswith(".*") else path
|
|
111
|
+
imports.append(
|
|
112
|
+
ParsedImport(
|
|
113
|
+
module=module,
|
|
114
|
+
names=[],
|
|
115
|
+
location=location,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
dot = path.rfind(".")
|
|
120
|
+
if dot >= 0:
|
|
121
|
+
imports.append(
|
|
122
|
+
ParsedImport(
|
|
123
|
+
module=path[:dot],
|
|
124
|
+
names=[path[dot + 1 :]],
|
|
125
|
+
location=location,
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
imports.append(
|
|
130
|
+
ParsedImport(
|
|
131
|
+
module=path,
|
|
132
|
+
names=[],
|
|
133
|
+
location=location,
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
return imports
|
|
137
|
+
|
|
138
|
+
# -------------------------------------------------------------------------
|
|
139
|
+
# Types (classes, enums, interfaces)
|
|
140
|
+
# -------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
def _extract_types(
|
|
143
|
+
self, package_name: str | None
|
|
144
|
+
) -> tuple[list[ParsedClass], list[ParsedCallSite], list[ParsedAssignment]]:
|
|
145
|
+
import javalang
|
|
146
|
+
|
|
147
|
+
all_classes: list[ParsedClass] = []
|
|
148
|
+
all_calls: list[ParsedCallSite] = []
|
|
149
|
+
all_assigns: list[ParsedAssignment] = []
|
|
150
|
+
|
|
151
|
+
for type_decl in getattr(self.tree, "types", None) or []:
|
|
152
|
+
if isinstance(
|
|
153
|
+
type_decl,
|
|
154
|
+
(
|
|
155
|
+
javalang.tree.ClassDeclaration,
|
|
156
|
+
javalang.tree.InterfaceDeclaration,
|
|
157
|
+
javalang.tree.EnumDeclaration,
|
|
158
|
+
),
|
|
159
|
+
):
|
|
160
|
+
cls, calls, assigns = self._extract_class(type_decl, package_name)
|
|
161
|
+
all_classes.append(cls)
|
|
162
|
+
all_calls.extend(calls)
|
|
163
|
+
all_assigns.extend(assigns)
|
|
164
|
+
|
|
165
|
+
return all_classes, all_calls, all_assigns
|
|
166
|
+
|
|
167
|
+
def _extract_class(
|
|
168
|
+
self, class_decl: Any, package_name: str | None
|
|
169
|
+
) -> tuple[ParsedClass, list[ParsedCallSite], list[ParsedAssignment]]:
|
|
170
|
+
import javalang
|
|
171
|
+
|
|
172
|
+
module = package_name or ""
|
|
173
|
+
class_name: str = class_decl.name
|
|
174
|
+
class_fqn = f"{module}.{class_name}" if module else class_name
|
|
175
|
+
qname = QualifiedName(module=module, name=class_name)
|
|
176
|
+
|
|
177
|
+
pos = getattr(class_decl, "position", None)
|
|
178
|
+
location = self._make_location(pos)
|
|
179
|
+
|
|
180
|
+
decorators = [
|
|
181
|
+
self._extract_annotation(a) for a in (getattr(class_decl, "annotations", None) or [])
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
# Base classes
|
|
185
|
+
base_classes: list[str] = []
|
|
186
|
+
ext = getattr(class_decl, "extends", None)
|
|
187
|
+
if ext is not None:
|
|
188
|
+
# ClassDeclaration: extends is a single type
|
|
189
|
+
# InterfaceDeclaration: extends is a list
|
|
190
|
+
if isinstance(ext, list):
|
|
191
|
+
for e in ext:
|
|
192
|
+
base_classes.append(self._get_type_name(e))
|
|
193
|
+
else:
|
|
194
|
+
base_classes.append(self._get_type_name(ext))
|
|
195
|
+
|
|
196
|
+
for impl in getattr(class_decl, "implements", None) or []:
|
|
197
|
+
base_classes.append(self._get_type_name(impl))
|
|
198
|
+
|
|
199
|
+
# Detect special class kinds
|
|
200
|
+
is_enum = isinstance(class_decl, javalang.tree.EnumDeclaration)
|
|
201
|
+
isinstance(class_decl, javalang.tree.InterfaceDeclaration)
|
|
202
|
+
|
|
203
|
+
# Members
|
|
204
|
+
methods: list[ParsedFunction] = []
|
|
205
|
+
fields: list[ParsedField] = []
|
|
206
|
+
all_calls: list[ParsedCallSite] = []
|
|
207
|
+
all_assigns: list[ParsedAssignment] = []
|
|
208
|
+
|
|
209
|
+
for member in getattr(class_decl, "body", None) or []:
|
|
210
|
+
if isinstance(member, javalang.tree.MethodDeclaration):
|
|
211
|
+
method, calls, assigns = self._extract_method(member, class_fqn)
|
|
212
|
+
methods.append(method)
|
|
213
|
+
all_calls.extend(calls)
|
|
214
|
+
all_assigns.extend(assigns)
|
|
215
|
+
elif isinstance(member, javalang.tree.ConstructorDeclaration):
|
|
216
|
+
# Treat constructors like methods so their call sites and
|
|
217
|
+
# assignments (e.g. @Autowired constructor injection) are
|
|
218
|
+
# captured in the call graph and data-flow analysis.
|
|
219
|
+
_, calls, assigns = self._extract_method(member, class_fqn)
|
|
220
|
+
all_calls.extend(calls)
|
|
221
|
+
all_assigns.extend(assigns)
|
|
222
|
+
elif isinstance(member, javalang.tree.FieldDeclaration):
|
|
223
|
+
field = self._extract_field(member)
|
|
224
|
+
if field:
|
|
225
|
+
fields.append(field)
|
|
226
|
+
elif isinstance(member, javalang.tree.ClassDeclaration):
|
|
227
|
+
# Nested class — recurse but don't add to top-level
|
|
228
|
+
nested_cls, nested_calls, nested_assigns = self._extract_class(member, class_fqn)
|
|
229
|
+
all_calls.extend(nested_calls)
|
|
230
|
+
all_assigns.extend(nested_assigns)
|
|
231
|
+
|
|
232
|
+
parsed_class = ParsedClass(
|
|
233
|
+
name=class_name,
|
|
234
|
+
qualified_name=qname,
|
|
235
|
+
location=location,
|
|
236
|
+
base_classes=base_classes,
|
|
237
|
+
decorators=decorators,
|
|
238
|
+
methods=methods,
|
|
239
|
+
fields=fields,
|
|
240
|
+
is_enum=is_enum,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return parsed_class, all_calls, all_assigns
|
|
244
|
+
|
|
245
|
+
# -------------------------------------------------------------------------
|
|
246
|
+
# Methods
|
|
247
|
+
# -------------------------------------------------------------------------
|
|
248
|
+
|
|
249
|
+
def _extract_method(
|
|
250
|
+
self, method_decl: Any, class_fqn: str
|
|
251
|
+
) -> tuple[ParsedFunction, list[ParsedCallSite], list[ParsedAssignment]]:
|
|
252
|
+
method_name: str = method_decl.name
|
|
253
|
+
pos = getattr(method_decl, "position", None)
|
|
254
|
+
location = self._make_location(pos)
|
|
255
|
+
qname = QualifiedName(module=class_fqn, name=method_name)
|
|
256
|
+
|
|
257
|
+
decorators = [
|
|
258
|
+
self._extract_annotation(a) for a in (getattr(method_decl, "annotations", None) or [])
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
parameters = [
|
|
262
|
+
self._extract_parameter(p) for p in (getattr(method_decl, "parameters", None) or [])
|
|
263
|
+
]
|
|
264
|
+
|
|
265
|
+
return_type = self._get_type_name(getattr(method_decl, "return_type", None))
|
|
266
|
+
|
|
267
|
+
modifiers: set[str] = getattr(method_decl, "modifiers", None) or set()
|
|
268
|
+
binding = "static" if "static" in modifiers else "instance"
|
|
269
|
+
|
|
270
|
+
# Extract call sites and assignments from method body
|
|
271
|
+
calls = self._extract_call_sites_from_body(method_decl, qname.full)
|
|
272
|
+
assigns = self._extract_assignments_from_body(method_decl, qname.full)
|
|
273
|
+
|
|
274
|
+
func = ParsedFunction(
|
|
275
|
+
name=method_name,
|
|
276
|
+
qualified_name=qname,
|
|
277
|
+
location=location,
|
|
278
|
+
parameters=parameters,
|
|
279
|
+
return_type=return_type,
|
|
280
|
+
decorators=decorators,
|
|
281
|
+
binding=binding,
|
|
282
|
+
owner_type=class_fqn,
|
|
283
|
+
is_async=False,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
return func, calls, assigns
|
|
287
|
+
|
|
288
|
+
# -------------------------------------------------------------------------
|
|
289
|
+
# Parameters
|
|
290
|
+
# -------------------------------------------------------------------------
|
|
291
|
+
|
|
292
|
+
def _extract_parameter(self, param: Any) -> ParsedParameter:
|
|
293
|
+
name: str = getattr(param, "name", "")
|
|
294
|
+
type_name = self._get_type_name(getattr(param, "type", None))
|
|
295
|
+
varargs: bool = bool(getattr(param, "varargs", False))
|
|
296
|
+
pos = getattr(param, "position", None)
|
|
297
|
+
location = self._make_location(pos)
|
|
298
|
+
|
|
299
|
+
# Extract annotation metadata (for Spring @PathVariable, @RequestParam, etc.)
|
|
300
|
+
# Use the full _extract_annotation path so named attributes are preserved:
|
|
301
|
+
# @Size(min=3, max=50) → {"Size": {"min": 3, "max": 50}}
|
|
302
|
+
# @RequestParam("name") → {"RequestParam": "name"}
|
|
303
|
+
# @NotNull → {"NotNull": None}
|
|
304
|
+
metadata: dict[str, Any] = {}
|
|
305
|
+
for ann in getattr(param, "annotations", None) or []:
|
|
306
|
+
dec = self._extract_annotation(ann)
|
|
307
|
+
if dec.arguments:
|
|
308
|
+
metadata[dec.name] = dec.arguments
|
|
309
|
+
elif dec.positional_args:
|
|
310
|
+
val = (
|
|
311
|
+
dec.positional_args[0] if len(dec.positional_args) == 1 else dec.positional_args
|
|
312
|
+
)
|
|
313
|
+
metadata[dec.name] = val
|
|
314
|
+
else:
|
|
315
|
+
metadata[dec.name] = None
|
|
316
|
+
|
|
317
|
+
return ParsedParameter(
|
|
318
|
+
name=name,
|
|
319
|
+
type_annotation=type_name,
|
|
320
|
+
is_variadic=varargs,
|
|
321
|
+
location=location,
|
|
322
|
+
metadata=metadata,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# -------------------------------------------------------------------------
|
|
326
|
+
# Fields
|
|
327
|
+
# -------------------------------------------------------------------------
|
|
328
|
+
|
|
329
|
+
def _extract_field(self, field_decl: Any) -> ParsedField | None:
|
|
330
|
+
field_type = self._get_type_name(getattr(field_decl, "type", None))
|
|
331
|
+
# Capture field-level annotations (e.g. @Value, @Column, @NotNull)
|
|
332
|
+
decorators = [
|
|
333
|
+
self._extract_annotation(ann) for ann in getattr(field_decl, "annotations", None) or []
|
|
334
|
+
]
|
|
335
|
+
for declarator in getattr(field_decl, "declarators", None) or []:
|
|
336
|
+
name = getattr(declarator, "name", None)
|
|
337
|
+
if name:
|
|
338
|
+
default_val = None
|
|
339
|
+
initializer = getattr(declarator, "initializer", None)
|
|
340
|
+
if initializer is not None:
|
|
341
|
+
default_val = self._expr_to_str(initializer)
|
|
342
|
+
return ParsedField(
|
|
343
|
+
name=name,
|
|
344
|
+
type_annotation=field_type,
|
|
345
|
+
default_value=default_val,
|
|
346
|
+
decorators=decorators,
|
|
347
|
+
)
|
|
348
|
+
return None
|
|
349
|
+
|
|
350
|
+
# -------------------------------------------------------------------------
|
|
351
|
+
# Annotations → ParsedDecorator
|
|
352
|
+
# -------------------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
def _extract_annotation(self, annotation: Any) -> ParsedDecorator:
|
|
355
|
+
name: str = getattr(annotation, "name", "")
|
|
356
|
+
element = getattr(annotation, "element", None)
|
|
357
|
+
|
|
358
|
+
args: dict[str, Any] = {}
|
|
359
|
+
positional: list[Any] = []
|
|
360
|
+
|
|
361
|
+
if element is None:
|
|
362
|
+
pass
|
|
363
|
+
elif isinstance(element, list):
|
|
364
|
+
for item in element:
|
|
365
|
+
item_name = getattr(item, "name", None)
|
|
366
|
+
item_val = self._extract_element_value(getattr(item, "value", item))
|
|
367
|
+
if item_name:
|
|
368
|
+
args[item_name] = item_val
|
|
369
|
+
else:
|
|
370
|
+
positional.append(item_val)
|
|
371
|
+
else:
|
|
372
|
+
# Single element (Literal, MemberReference, etc.)
|
|
373
|
+
val = self._extract_element_value(element)
|
|
374
|
+
if val is not None:
|
|
375
|
+
positional.append(val)
|
|
376
|
+
|
|
377
|
+
return ParsedDecorator(
|
|
378
|
+
name=name,
|
|
379
|
+
arguments=args,
|
|
380
|
+
positional_args=positional,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
def _extract_element_value(self, element: Any) -> Any:
|
|
384
|
+
"""Extract a scalar value from a javalang annotation element."""
|
|
385
|
+
import javalang
|
|
386
|
+
|
|
387
|
+
if element is None:
|
|
388
|
+
return None
|
|
389
|
+
|
|
390
|
+
if isinstance(element, str):
|
|
391
|
+
if len(element) >= 2 and element[0] == element[-1] and element[0] in ('"', "'"):
|
|
392
|
+
return element[1:-1]
|
|
393
|
+
return element
|
|
394
|
+
|
|
395
|
+
if isinstance(element, javalang.tree.Literal):
|
|
396
|
+
val = element.value
|
|
397
|
+
if isinstance(val, str):
|
|
398
|
+
if len(val) >= 2 and val[0] == val[-1] and val[0] in ('"', "'"):
|
|
399
|
+
return val[1:-1]
|
|
400
|
+
return val
|
|
401
|
+
return val
|
|
402
|
+
|
|
403
|
+
if isinstance(element, javalang.tree.MemberReference):
|
|
404
|
+
# e.g. RequestMethod.GET → "GET"
|
|
405
|
+
member = getattr(element, "member", None)
|
|
406
|
+
return member
|
|
407
|
+
|
|
408
|
+
if isinstance(element, javalang.tree.MethodInvocation):
|
|
409
|
+
return getattr(element, "member", None)
|
|
410
|
+
|
|
411
|
+
if isinstance(element, list):
|
|
412
|
+
return [self._extract_element_value(e) for e in element]
|
|
413
|
+
|
|
414
|
+
# ElementArrayValue: annotation array literal {"/v1", "/v2"} in
|
|
415
|
+
# @GetMapping({"/v1/users", "/v2/users"}). javalang stores these as
|
|
416
|
+
# ElementArrayValue(values=[Literal(...), ...]) — note "values" (plural).
|
|
417
|
+
values_attr = getattr(element, "values", None)
|
|
418
|
+
if values_attr is not None and isinstance(values_attr, list):
|
|
419
|
+
return [self._extract_element_value(v) for v in values_attr]
|
|
420
|
+
|
|
421
|
+
if hasattr(element, "value"):
|
|
422
|
+
return self._extract_element_value(element.value)
|
|
423
|
+
|
|
424
|
+
return str(element)
|
|
425
|
+
|
|
426
|
+
# -------------------------------------------------------------------------
|
|
427
|
+
# Call site extraction
|
|
428
|
+
# -------------------------------------------------------------------------
|
|
429
|
+
|
|
430
|
+
def _extract_call_sites_from_body(
|
|
431
|
+
self, method_decl: Any, caller_fqn: str
|
|
432
|
+
) -> list[ParsedCallSite]:
|
|
433
|
+
import javalang
|
|
434
|
+
|
|
435
|
+
calls: list[ParsedCallSite] = []
|
|
436
|
+
body = getattr(method_decl, "body", None)
|
|
437
|
+
if not body:
|
|
438
|
+
return calls
|
|
439
|
+
|
|
440
|
+
try:
|
|
441
|
+
for _path, node in method_decl:
|
|
442
|
+
if not isinstance(node, javalang.tree.MethodInvocation):
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
callee_name: str = node.member or ""
|
|
446
|
+
qualifier = getattr(node, "qualifier", None) or ""
|
|
447
|
+
if qualifier:
|
|
448
|
+
callee_name = f"{qualifier}.{callee_name}"
|
|
449
|
+
|
|
450
|
+
pos = getattr(node, "position", None)
|
|
451
|
+
location = self._make_location(pos)
|
|
452
|
+
|
|
453
|
+
args: list[ParsedArgument] = []
|
|
454
|
+
for i, arg_expr in enumerate(getattr(node, "arguments", None) or []):
|
|
455
|
+
args.append(self._extract_argument(arg_expr, i))
|
|
456
|
+
|
|
457
|
+
caller_qname = QualifiedName(
|
|
458
|
+
module=caller_fqn.rsplit(".", 1)[0] if "." in caller_fqn else "",
|
|
459
|
+
name=caller_fqn.rsplit(".", 1)[-1],
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
calls.append(
|
|
463
|
+
ParsedCallSite(
|
|
464
|
+
callee_name=callee_name,
|
|
465
|
+
location=location,
|
|
466
|
+
caller_function=caller_qname,
|
|
467
|
+
arguments=args,
|
|
468
|
+
is_method_call=bool(qualifier),
|
|
469
|
+
receiver_expression=qualifier or None,
|
|
470
|
+
)
|
|
471
|
+
)
|
|
472
|
+
except Exception:
|
|
473
|
+
pass # Best-effort
|
|
474
|
+
|
|
475
|
+
return calls
|
|
476
|
+
|
|
477
|
+
def _extract_assignments_from_body(
|
|
478
|
+
self, method_decl: Any, caller_fqn: str
|
|
479
|
+
) -> list[ParsedAssignment]:
|
|
480
|
+
import javalang
|
|
481
|
+
|
|
482
|
+
assigns: list[ParsedAssignment] = []
|
|
483
|
+
body = getattr(method_decl, "body", None)
|
|
484
|
+
if not body:
|
|
485
|
+
return assigns
|
|
486
|
+
|
|
487
|
+
try:
|
|
488
|
+
for _path, node in method_decl:
|
|
489
|
+
if not isinstance(node, javalang.tree.LocalVariableDeclaration):
|
|
490
|
+
continue
|
|
491
|
+
|
|
492
|
+
type_name = self._get_type_name(getattr(node, "type", None))
|
|
493
|
+
pos = getattr(node, "position", None)
|
|
494
|
+
location = self._make_location(pos)
|
|
495
|
+
|
|
496
|
+
for declarator in getattr(node, "declarators", None) or []:
|
|
497
|
+
target = getattr(declarator, "name", None)
|
|
498
|
+
if not target:
|
|
499
|
+
continue
|
|
500
|
+
|
|
501
|
+
initializer = getattr(declarator, "initializer", None)
|
|
502
|
+
source_type = "literal"
|
|
503
|
+
source_value = None
|
|
504
|
+
source_call = None
|
|
505
|
+
source_vars: list[str] = []
|
|
506
|
+
is_string_interp = False
|
|
507
|
+
|
|
508
|
+
if initializer is not None:
|
|
509
|
+
if isinstance(initializer, javalang.tree.Literal):
|
|
510
|
+
source_type = "literal"
|
|
511
|
+
source_value = self._extract_element_value(initializer)
|
|
512
|
+
elif isinstance(initializer, javalang.tree.MethodInvocation):
|
|
513
|
+
source_type = "call"
|
|
514
|
+
source_call = initializer.member
|
|
515
|
+
elif isinstance(initializer, javalang.tree.MemberReference):
|
|
516
|
+
source_type = "variable"
|
|
517
|
+
var_name = initializer.member
|
|
518
|
+
source_value = var_name
|
|
519
|
+
source_vars = [var_name]
|
|
520
|
+
elif isinstance(initializer, javalang.tree.BinaryOperation):
|
|
521
|
+
# String concatenation: a + b
|
|
522
|
+
source_type = "expression"
|
|
523
|
+
source_value = self._expr_to_str(initializer)
|
|
524
|
+
source_vars = self._collect_vars(initializer)
|
|
525
|
+
is_string_interp = True
|
|
526
|
+
else:
|
|
527
|
+
source_type = "expression"
|
|
528
|
+
source_value = self._expr_to_str(initializer)
|
|
529
|
+
|
|
530
|
+
assigns.append(
|
|
531
|
+
ParsedAssignment(
|
|
532
|
+
target=target,
|
|
533
|
+
location=location,
|
|
534
|
+
source_type=source_type,
|
|
535
|
+
source_value=str(source_value) if source_value else None,
|
|
536
|
+
source_call=source_call,
|
|
537
|
+
in_function=caller_fqn,
|
|
538
|
+
type_annotation=type_name,
|
|
539
|
+
source_variables=source_vars,
|
|
540
|
+
is_string_interpolation=is_string_interp,
|
|
541
|
+
)
|
|
542
|
+
)
|
|
543
|
+
except Exception:
|
|
544
|
+
pass # Best-effort
|
|
545
|
+
|
|
546
|
+
return assigns
|
|
547
|
+
|
|
548
|
+
# -------------------------------------------------------------------------
|
|
549
|
+
# Argument extraction
|
|
550
|
+
# -------------------------------------------------------------------------
|
|
551
|
+
|
|
552
|
+
def _extract_argument(self, expr: Any, position: int) -> ParsedArgument:
|
|
553
|
+
import javalang
|
|
554
|
+
|
|
555
|
+
if isinstance(expr, javalang.tree.Literal):
|
|
556
|
+
val = self._extract_element_value(expr)
|
|
557
|
+
return ParsedArgument(
|
|
558
|
+
position=position,
|
|
559
|
+
is_literal=True,
|
|
560
|
+
literal_value=val,
|
|
561
|
+
literal_type=self._infer_literal_type(expr.value),
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
if isinstance(expr, javalang.tree.MemberReference):
|
|
565
|
+
var_name = expr.member
|
|
566
|
+
return ParsedArgument(
|
|
567
|
+
position=position,
|
|
568
|
+
is_variable=True,
|
|
569
|
+
variable_name=var_name,
|
|
570
|
+
source_variables=[var_name],
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
if isinstance(expr, javalang.tree.MethodInvocation):
|
|
574
|
+
return ParsedArgument(
|
|
575
|
+
position=position,
|
|
576
|
+
is_call_result=True,
|
|
577
|
+
called_function=expr.member,
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
if isinstance(expr, javalang.tree.BinaryOperation):
|
|
581
|
+
# String concatenation
|
|
582
|
+
source_vars = self._collect_vars(expr)
|
|
583
|
+
return ParsedArgument(
|
|
584
|
+
position=position,
|
|
585
|
+
is_expression=True,
|
|
586
|
+
expression_text=self._expr_to_str(expr),
|
|
587
|
+
is_concatenation=True,
|
|
588
|
+
source_variables=source_vars,
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
# Generic fallback
|
|
592
|
+
return ParsedArgument(
|
|
593
|
+
position=position,
|
|
594
|
+
is_expression=True,
|
|
595
|
+
expression_text=self._expr_to_str(expr),
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
# -------------------------------------------------------------------------
|
|
599
|
+
# Type name helpers
|
|
600
|
+
# -------------------------------------------------------------------------
|
|
601
|
+
|
|
602
|
+
def _get_type_name(self, type_node: Any) -> str | None:
|
|
603
|
+
"""Convert a javalang type node to a simple string name."""
|
|
604
|
+
if type_node is None:
|
|
605
|
+
return None
|
|
606
|
+
|
|
607
|
+
import javalang
|
|
608
|
+
|
|
609
|
+
if isinstance(type_node, str):
|
|
610
|
+
return type_node
|
|
611
|
+
|
|
612
|
+
if isinstance(type_node, javalang.tree.BasicType):
|
|
613
|
+
return type_node.name
|
|
614
|
+
|
|
615
|
+
if isinstance(type_node, javalang.tree.ReferenceType):
|
|
616
|
+
name = type_node.name
|
|
617
|
+
# Handle generics: List<String> → List<String>
|
|
618
|
+
args = getattr(type_node, "arguments", None)
|
|
619
|
+
if args:
|
|
620
|
+
inner = ", ".join(
|
|
621
|
+
self._get_type_name(a.type) or "?" for a in args if hasattr(a, "type")
|
|
622
|
+
)
|
|
623
|
+
return f"{name}<{inner}>"
|
|
624
|
+
return name
|
|
625
|
+
|
|
626
|
+
if isinstance(type_node, javalang.tree.VoidReturn):
|
|
627
|
+
return "void"
|
|
628
|
+
|
|
629
|
+
# Fallback
|
|
630
|
+
name = getattr(type_node, "name", None)
|
|
631
|
+
return name
|
|
632
|
+
|
|
633
|
+
def _infer_literal_type(self, raw: Any) -> str | None:
|
|
634
|
+
if isinstance(raw, str):
|
|
635
|
+
s = raw.strip()
|
|
636
|
+
if s.startswith('"') or s.startswith("'"):
|
|
637
|
+
return "str"
|
|
638
|
+
if s in ("true", "false"):
|
|
639
|
+
return "bool"
|
|
640
|
+
if s == "null":
|
|
641
|
+
return "None"
|
|
642
|
+
try:
|
|
643
|
+
int(s)
|
|
644
|
+
return "int"
|
|
645
|
+
except ValueError:
|
|
646
|
+
pass
|
|
647
|
+
try:
|
|
648
|
+
float(s.rstrip("fFdD"))
|
|
649
|
+
return "float"
|
|
650
|
+
except ValueError:
|
|
651
|
+
pass
|
|
652
|
+
return None
|
|
653
|
+
|
|
654
|
+
# -------------------------------------------------------------------------
|
|
655
|
+
# Expression helpers
|
|
656
|
+
# -------------------------------------------------------------------------
|
|
657
|
+
|
|
658
|
+
def _expr_to_str(self, expr: Any) -> str:
|
|
659
|
+
"""Best-effort string representation of an expression node."""
|
|
660
|
+
import javalang
|
|
661
|
+
|
|
662
|
+
if expr is None:
|
|
663
|
+
return ""
|
|
664
|
+
if isinstance(expr, javalang.tree.Literal):
|
|
665
|
+
return str(expr.value)
|
|
666
|
+
if isinstance(expr, javalang.tree.MemberReference):
|
|
667
|
+
qualifier = getattr(expr, "qualifier", "") or ""
|
|
668
|
+
member = expr.member or ""
|
|
669
|
+
return f"{qualifier}.{member}" if qualifier else member
|
|
670
|
+
if isinstance(expr, javalang.tree.MethodInvocation):
|
|
671
|
+
qualifier = getattr(expr, "qualifier", "") or ""
|
|
672
|
+
member = expr.member or ""
|
|
673
|
+
# Preserve literal string arguments so callers can inspect them
|
|
674
|
+
# (e.g. System.getenv("JWT_SECRET") stays readable downstream).
|
|
675
|
+
raw_args = getattr(expr, "arguments", None) or []
|
|
676
|
+
arg_strs: list[str] = []
|
|
677
|
+
for a in raw_args:
|
|
678
|
+
if isinstance(a, javalang.tree.Literal):
|
|
679
|
+
arg_strs.append(str(a.value))
|
|
680
|
+
else:
|
|
681
|
+
arg_strs.append("...")
|
|
682
|
+
inner = ", ".join(arg_strs) if arg_strs else "..."
|
|
683
|
+
callee = f"{qualifier}.{member}" if qualifier else member
|
|
684
|
+
return f"{callee}({inner})"
|
|
685
|
+
if isinstance(expr, javalang.tree.BinaryOperation):
|
|
686
|
+
left = self._expr_to_str(expr.operandl)
|
|
687
|
+
right = self._expr_to_str(expr.operandr)
|
|
688
|
+
op = getattr(expr, "operator", "+")
|
|
689
|
+
return f"{left} {op} {right}"
|
|
690
|
+
if isinstance(expr, javalang.tree.ClassCreator):
|
|
691
|
+
return f"new {self._get_type_name(expr.type)}(...)"
|
|
692
|
+
return repr(expr)
|
|
693
|
+
|
|
694
|
+
def _collect_vars(self, expr: Any) -> list[str]:
|
|
695
|
+
"""Collect all MemberReference variable names from an expression tree."""
|
|
696
|
+
import javalang
|
|
697
|
+
|
|
698
|
+
vars_: list[str] = []
|
|
699
|
+
if isinstance(expr, javalang.tree.MemberReference):
|
|
700
|
+
vars_.append(expr.member)
|
|
701
|
+
elif isinstance(expr, javalang.tree.BinaryOperation):
|
|
702
|
+
vars_.extend(self._collect_vars(expr.operandl))
|
|
703
|
+
vars_.extend(self._collect_vars(expr.operandr))
|
|
704
|
+
return vars_
|
|
705
|
+
|
|
706
|
+
# -------------------------------------------------------------------------
|
|
707
|
+
# Location helper
|
|
708
|
+
# -------------------------------------------------------------------------
|
|
709
|
+
|
|
710
|
+
def _make_location(self, pos: Any) -> CodeLocation:
|
|
711
|
+
if pos is None:
|
|
712
|
+
return CodeLocation(file=self.file_path, line=0)
|
|
713
|
+
line = getattr(pos, "line", 0) or 0
|
|
714
|
+
col = getattr(pos, "column", None)
|
|
715
|
+
return CodeLocation(file=self.file_path, line=line, column=col)
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
# =============================================================================
|
|
719
|
+
# JavaParser
|
|
720
|
+
# =============================================================================
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
class JavaParser(BaseParser):
|
|
724
|
+
"""
|
|
725
|
+
Parser for Java source files.
|
|
726
|
+
|
|
727
|
+
Uses the javalang library to build a CST and extract:
|
|
728
|
+
- Classes, methods, fields
|
|
729
|
+
- Imports
|
|
730
|
+
- Call sites (method invocations)
|
|
731
|
+
- Assignments (local variable declarations)
|
|
732
|
+
"""
|
|
733
|
+
|
|
734
|
+
LANGUAGE: Language = Language.JAVA
|
|
735
|
+
SUPPORTED_EXTENSIONS: frozenset[str] = frozenset({".java"})
|
|
736
|
+
|
|
737
|
+
def parse_file(self, file_path: Path) -> ParsedFile:
|
|
738
|
+
try:
|
|
739
|
+
source = file_path.read_text(encoding="utf-8", errors="replace")
|
|
740
|
+
except OSError as e:
|
|
741
|
+
return ParsedFile(
|
|
742
|
+
path=file_path,
|
|
743
|
+
language=Language.JAVA,
|
|
744
|
+
success=False,
|
|
745
|
+
error=ParseError(f"Could not read file: {e}", file_path),
|
|
746
|
+
)
|
|
747
|
+
return self.parse_source(source, file_path)
|
|
748
|
+
|
|
749
|
+
def parse_source(self, source: str, file_path: Path | None = None) -> ParsedFile:
|
|
750
|
+
resolved_path = file_path or Path("<unknown>")
|
|
751
|
+
try:
|
|
752
|
+
import javalang
|
|
753
|
+
|
|
754
|
+
tree = javalang.parse.parse(source)
|
|
755
|
+
except Exception as e:
|
|
756
|
+
logger.debug("Failed to parse Java file %s: %s", file_path, e)
|
|
757
|
+
return ParsedFile(
|
|
758
|
+
path=resolved_path,
|
|
759
|
+
language=Language.JAVA,
|
|
760
|
+
success=False,
|
|
761
|
+
error=ParseError(str(e), file_path),
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
try:
|
|
765
|
+
extractor = JavaExtractor(resolved_path, source, tree)
|
|
766
|
+
return extractor.extract()
|
|
767
|
+
except Exception as e:
|
|
768
|
+
logger.warning("Extraction failed for %s: %s", file_path, e)
|
|
769
|
+
return ParsedFile(
|
|
770
|
+
path=resolved_path,
|
|
771
|
+
language=Language.JAVA,
|
|
772
|
+
success=False,
|
|
773
|
+
error=ParseError(f"Extraction error: {e}", file_path),
|
|
774
|
+
)
|