apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,774 @@
1
+ """
2
+ Java parser using the javalang library.
3
+
4
+ Parses .java source files into the language-agnostic ParsedFile dataclass.
5
+ Extracts classes, methods, fields, imports, call sites, and assignments.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from ...core.types import (
16
+ CodeLocation,
17
+ Language,
18
+ ParseError,
19
+ QualifiedName,
20
+ )
21
+ from ..base import (
22
+ BaseParser,
23
+ ParsedArgument,
24
+ ParsedAssignment,
25
+ ParsedCallSite,
26
+ ParsedClass,
27
+ ParsedDecorator,
28
+ ParsedField,
29
+ ParsedFile,
30
+ ParsedFunction,
31
+ ParsedImport,
32
+ ParsedParameter,
33
+ )
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ # =============================================================================
39
+ # Java Extractor
40
+ # =============================================================================
41
+
42
+
43
+ class JavaExtractor:
44
+ """
45
+ Walks a javalang CompilationUnit and extracts ParsedFile data.
46
+ """
47
+
48
+ def __init__(self, file_path: Path, source: str, tree: Any) -> None:
49
+ self.file_path = file_path
50
+ self.source = source
51
+ self.tree = tree # javalang CompilationUnit
52
+ self.lines = source.splitlines()
53
+
54
+ def extract(self) -> ParsedFile:
55
+
56
+ start = time.perf_counter()
57
+
58
+ module_name = self._get_package_name()
59
+ imports = self._extract_imports()
60
+ classes, call_sites, assignments = self._extract_types(module_name)
61
+
62
+ # Flatten methods from all classes as top-level functions
63
+ functions: list[ParsedFunction] = []
64
+ for cls in classes:
65
+ functions.extend(cls.methods)
66
+
67
+ parse_time_ms = int((time.perf_counter() - start) * 1000)
68
+
69
+ return ParsedFile(
70
+ path=self.file_path,
71
+ language=Language.JAVA,
72
+ success=True,
73
+ module_name=module_name,
74
+ imports=imports,
75
+ classes=classes,
76
+ functions=functions,
77
+ call_sites=call_sites,
78
+ assignments=assignments,
79
+ line_count=len(self.lines),
80
+ parse_time_ms=parse_time_ms,
81
+ )
82
+
83
+ # -------------------------------------------------------------------------
84
+ # Package / module
85
+ # -------------------------------------------------------------------------
86
+
87
+ def _get_package_name(self) -> str | None:
88
+ pkg = getattr(self.tree, "package", None)
89
+ if pkg is None:
90
+ return None
91
+ name = getattr(pkg, "name", None)
92
+ return name
93
+
94
+ # -------------------------------------------------------------------------
95
+ # Imports
96
+ # -------------------------------------------------------------------------
97
+
98
+ def _extract_imports(self) -> list[ParsedImport]:
99
+ imports: list[ParsedImport] = []
100
+ for imp in getattr(self.tree, "imports", None) or []:
101
+ path: str = imp.path or ""
102
+ wildcard: bool = bool(imp.wildcard)
103
+ pos = getattr(imp, "position", None)
104
+ location = self._make_location(pos)
105
+
106
+ if wildcard:
107
+ # removesuffix removes the exact ".*" trailer; rstrip(".*")
108
+ # would strip individual '.' and '*' characters and over-strip
109
+ # paths with multiple trailing dots.
110
+ module = path.removesuffix(".*") if path.endswith(".*") else path
111
+ imports.append(
112
+ ParsedImport(
113
+ module=module,
114
+ names=[],
115
+ location=location,
116
+ )
117
+ )
118
+ else:
119
+ dot = path.rfind(".")
120
+ if dot >= 0:
121
+ imports.append(
122
+ ParsedImport(
123
+ module=path[:dot],
124
+ names=[path[dot + 1 :]],
125
+ location=location,
126
+ )
127
+ )
128
+ else:
129
+ imports.append(
130
+ ParsedImport(
131
+ module=path,
132
+ names=[],
133
+ location=location,
134
+ )
135
+ )
136
+ return imports
137
+
138
+ # -------------------------------------------------------------------------
139
+ # Types (classes, enums, interfaces)
140
+ # -------------------------------------------------------------------------
141
+
142
+ def _extract_types(
143
+ self, package_name: str | None
144
+ ) -> tuple[list[ParsedClass], list[ParsedCallSite], list[ParsedAssignment]]:
145
+ import javalang
146
+
147
+ all_classes: list[ParsedClass] = []
148
+ all_calls: list[ParsedCallSite] = []
149
+ all_assigns: list[ParsedAssignment] = []
150
+
151
+ for type_decl in getattr(self.tree, "types", None) or []:
152
+ if isinstance(
153
+ type_decl,
154
+ (
155
+ javalang.tree.ClassDeclaration,
156
+ javalang.tree.InterfaceDeclaration,
157
+ javalang.tree.EnumDeclaration,
158
+ ),
159
+ ):
160
+ cls, calls, assigns = self._extract_class(type_decl, package_name)
161
+ all_classes.append(cls)
162
+ all_calls.extend(calls)
163
+ all_assigns.extend(assigns)
164
+
165
+ return all_classes, all_calls, all_assigns
166
+
167
+ def _extract_class(
168
+ self, class_decl: Any, package_name: str | None
169
+ ) -> tuple[ParsedClass, list[ParsedCallSite], list[ParsedAssignment]]:
170
+ import javalang
171
+
172
+ module = package_name or ""
173
+ class_name: str = class_decl.name
174
+ class_fqn = f"{module}.{class_name}" if module else class_name
175
+ qname = QualifiedName(module=module, name=class_name)
176
+
177
+ pos = getattr(class_decl, "position", None)
178
+ location = self._make_location(pos)
179
+
180
+ decorators = [
181
+ self._extract_annotation(a) for a in (getattr(class_decl, "annotations", None) or [])
182
+ ]
183
+
184
+ # Base classes
185
+ base_classes: list[str] = []
186
+ ext = getattr(class_decl, "extends", None)
187
+ if ext is not None:
188
+ # ClassDeclaration: extends is a single type
189
+ # InterfaceDeclaration: extends is a list
190
+ if isinstance(ext, list):
191
+ for e in ext:
192
+ base_classes.append(self._get_type_name(e))
193
+ else:
194
+ base_classes.append(self._get_type_name(ext))
195
+
196
+ for impl in getattr(class_decl, "implements", None) or []:
197
+ base_classes.append(self._get_type_name(impl))
198
+
199
+ # Detect special class kinds
200
+ is_enum = isinstance(class_decl, javalang.tree.EnumDeclaration)
201
+ isinstance(class_decl, javalang.tree.InterfaceDeclaration)
202
+
203
+ # Members
204
+ methods: list[ParsedFunction] = []
205
+ fields: list[ParsedField] = []
206
+ all_calls: list[ParsedCallSite] = []
207
+ all_assigns: list[ParsedAssignment] = []
208
+
209
+ for member in getattr(class_decl, "body", None) or []:
210
+ if isinstance(member, javalang.tree.MethodDeclaration):
211
+ method, calls, assigns = self._extract_method(member, class_fqn)
212
+ methods.append(method)
213
+ all_calls.extend(calls)
214
+ all_assigns.extend(assigns)
215
+ elif isinstance(member, javalang.tree.ConstructorDeclaration):
216
+ # Treat constructors like methods so their call sites and
217
+ # assignments (e.g. @Autowired constructor injection) are
218
+ # captured in the call graph and data-flow analysis.
219
+ _, calls, assigns = self._extract_method(member, class_fqn)
220
+ all_calls.extend(calls)
221
+ all_assigns.extend(assigns)
222
+ elif isinstance(member, javalang.tree.FieldDeclaration):
223
+ field = self._extract_field(member)
224
+ if field:
225
+ fields.append(field)
226
+ elif isinstance(member, javalang.tree.ClassDeclaration):
227
+ # Nested class — recurse but don't add to top-level
228
+ nested_cls, nested_calls, nested_assigns = self._extract_class(member, class_fqn)
229
+ all_calls.extend(nested_calls)
230
+ all_assigns.extend(nested_assigns)
231
+
232
+ parsed_class = ParsedClass(
233
+ name=class_name,
234
+ qualified_name=qname,
235
+ location=location,
236
+ base_classes=base_classes,
237
+ decorators=decorators,
238
+ methods=methods,
239
+ fields=fields,
240
+ is_enum=is_enum,
241
+ )
242
+
243
+ return parsed_class, all_calls, all_assigns
244
+
245
+ # -------------------------------------------------------------------------
246
+ # Methods
247
+ # -------------------------------------------------------------------------
248
+
249
+ def _extract_method(
250
+ self, method_decl: Any, class_fqn: str
251
+ ) -> tuple[ParsedFunction, list[ParsedCallSite], list[ParsedAssignment]]:
252
+ method_name: str = method_decl.name
253
+ pos = getattr(method_decl, "position", None)
254
+ location = self._make_location(pos)
255
+ qname = QualifiedName(module=class_fqn, name=method_name)
256
+
257
+ decorators = [
258
+ self._extract_annotation(a) for a in (getattr(method_decl, "annotations", None) or [])
259
+ ]
260
+
261
+ parameters = [
262
+ self._extract_parameter(p) for p in (getattr(method_decl, "parameters", None) or [])
263
+ ]
264
+
265
+ return_type = self._get_type_name(getattr(method_decl, "return_type", None))
266
+
267
+ modifiers: set[str] = getattr(method_decl, "modifiers", None) or set()
268
+ binding = "static" if "static" in modifiers else "instance"
269
+
270
+ # Extract call sites and assignments from method body
271
+ calls = self._extract_call_sites_from_body(method_decl, qname.full)
272
+ assigns = self._extract_assignments_from_body(method_decl, qname.full)
273
+
274
+ func = ParsedFunction(
275
+ name=method_name,
276
+ qualified_name=qname,
277
+ location=location,
278
+ parameters=parameters,
279
+ return_type=return_type,
280
+ decorators=decorators,
281
+ binding=binding,
282
+ owner_type=class_fqn,
283
+ is_async=False,
284
+ )
285
+
286
+ return func, calls, assigns
287
+
288
+ # -------------------------------------------------------------------------
289
+ # Parameters
290
+ # -------------------------------------------------------------------------
291
+
292
+ def _extract_parameter(self, param: Any) -> ParsedParameter:
293
+ name: str = getattr(param, "name", "")
294
+ type_name = self._get_type_name(getattr(param, "type", None))
295
+ varargs: bool = bool(getattr(param, "varargs", False))
296
+ pos = getattr(param, "position", None)
297
+ location = self._make_location(pos)
298
+
299
+ # Extract annotation metadata (for Spring @PathVariable, @RequestParam, etc.)
300
+ # Use the full _extract_annotation path so named attributes are preserved:
301
+ # @Size(min=3, max=50) → {"Size": {"min": 3, "max": 50}}
302
+ # @RequestParam("name") → {"RequestParam": "name"}
303
+ # @NotNull → {"NotNull": None}
304
+ metadata: dict[str, Any] = {}
305
+ for ann in getattr(param, "annotations", None) or []:
306
+ dec = self._extract_annotation(ann)
307
+ if dec.arguments:
308
+ metadata[dec.name] = dec.arguments
309
+ elif dec.positional_args:
310
+ val = (
311
+ dec.positional_args[0] if len(dec.positional_args) == 1 else dec.positional_args
312
+ )
313
+ metadata[dec.name] = val
314
+ else:
315
+ metadata[dec.name] = None
316
+
317
+ return ParsedParameter(
318
+ name=name,
319
+ type_annotation=type_name,
320
+ is_variadic=varargs,
321
+ location=location,
322
+ metadata=metadata,
323
+ )
324
+
325
+ # -------------------------------------------------------------------------
326
+ # Fields
327
+ # -------------------------------------------------------------------------
328
+
329
+ def _extract_field(self, field_decl: Any) -> ParsedField | None:
330
+ field_type = self._get_type_name(getattr(field_decl, "type", None))
331
+ # Capture field-level annotations (e.g. @Value, @Column, @NotNull)
332
+ decorators = [
333
+ self._extract_annotation(ann) for ann in getattr(field_decl, "annotations", None) or []
334
+ ]
335
+ for declarator in getattr(field_decl, "declarators", None) or []:
336
+ name = getattr(declarator, "name", None)
337
+ if name:
338
+ default_val = None
339
+ initializer = getattr(declarator, "initializer", None)
340
+ if initializer is not None:
341
+ default_val = self._expr_to_str(initializer)
342
+ return ParsedField(
343
+ name=name,
344
+ type_annotation=field_type,
345
+ default_value=default_val,
346
+ decorators=decorators,
347
+ )
348
+ return None
349
+
350
+ # -------------------------------------------------------------------------
351
+ # Annotations → ParsedDecorator
352
+ # -------------------------------------------------------------------------
353
+
354
+ def _extract_annotation(self, annotation: Any) -> ParsedDecorator:
355
+ name: str = getattr(annotation, "name", "")
356
+ element = getattr(annotation, "element", None)
357
+
358
+ args: dict[str, Any] = {}
359
+ positional: list[Any] = []
360
+
361
+ if element is None:
362
+ pass
363
+ elif isinstance(element, list):
364
+ for item in element:
365
+ item_name = getattr(item, "name", None)
366
+ item_val = self._extract_element_value(getattr(item, "value", item))
367
+ if item_name:
368
+ args[item_name] = item_val
369
+ else:
370
+ positional.append(item_val)
371
+ else:
372
+ # Single element (Literal, MemberReference, etc.)
373
+ val = self._extract_element_value(element)
374
+ if val is not None:
375
+ positional.append(val)
376
+
377
+ return ParsedDecorator(
378
+ name=name,
379
+ arguments=args,
380
+ positional_args=positional,
381
+ )
382
+
383
+ def _extract_element_value(self, element: Any) -> Any:
384
+ """Extract a scalar value from a javalang annotation element."""
385
+ import javalang
386
+
387
+ if element is None:
388
+ return None
389
+
390
+ if isinstance(element, str):
391
+ if len(element) >= 2 and element[0] == element[-1] and element[0] in ('"', "'"):
392
+ return element[1:-1]
393
+ return element
394
+
395
+ if isinstance(element, javalang.tree.Literal):
396
+ val = element.value
397
+ if isinstance(val, str):
398
+ if len(val) >= 2 and val[0] == val[-1] and val[0] in ('"', "'"):
399
+ return val[1:-1]
400
+ return val
401
+ return val
402
+
403
+ if isinstance(element, javalang.tree.MemberReference):
404
+ # e.g. RequestMethod.GET → "GET"
405
+ member = getattr(element, "member", None)
406
+ return member
407
+
408
+ if isinstance(element, javalang.tree.MethodInvocation):
409
+ return getattr(element, "member", None)
410
+
411
+ if isinstance(element, list):
412
+ return [self._extract_element_value(e) for e in element]
413
+
414
+ # ElementArrayValue: annotation array literal {"/v1", "/v2"} in
415
+ # @GetMapping({"/v1/users", "/v2/users"}). javalang stores these as
416
+ # ElementArrayValue(values=[Literal(...), ...]) — note "values" (plural).
417
+ values_attr = getattr(element, "values", None)
418
+ if values_attr is not None and isinstance(values_attr, list):
419
+ return [self._extract_element_value(v) for v in values_attr]
420
+
421
+ if hasattr(element, "value"):
422
+ return self._extract_element_value(element.value)
423
+
424
+ return str(element)
425
+
426
+ # -------------------------------------------------------------------------
427
+ # Call site extraction
428
+ # -------------------------------------------------------------------------
429
+
430
+ def _extract_call_sites_from_body(
431
+ self, method_decl: Any, caller_fqn: str
432
+ ) -> list[ParsedCallSite]:
433
+ import javalang
434
+
435
+ calls: list[ParsedCallSite] = []
436
+ body = getattr(method_decl, "body", None)
437
+ if not body:
438
+ return calls
439
+
440
+ try:
441
+ for _path, node in method_decl:
442
+ if not isinstance(node, javalang.tree.MethodInvocation):
443
+ continue
444
+
445
+ callee_name: str = node.member or ""
446
+ qualifier = getattr(node, "qualifier", None) or ""
447
+ if qualifier:
448
+ callee_name = f"{qualifier}.{callee_name}"
449
+
450
+ pos = getattr(node, "position", None)
451
+ location = self._make_location(pos)
452
+
453
+ args: list[ParsedArgument] = []
454
+ for i, arg_expr in enumerate(getattr(node, "arguments", None) or []):
455
+ args.append(self._extract_argument(arg_expr, i))
456
+
457
+ caller_qname = QualifiedName(
458
+ module=caller_fqn.rsplit(".", 1)[0] if "." in caller_fqn else "",
459
+ name=caller_fqn.rsplit(".", 1)[-1],
460
+ )
461
+
462
+ calls.append(
463
+ ParsedCallSite(
464
+ callee_name=callee_name,
465
+ location=location,
466
+ caller_function=caller_qname,
467
+ arguments=args,
468
+ is_method_call=bool(qualifier),
469
+ receiver_expression=qualifier or None,
470
+ )
471
+ )
472
+ except Exception:
473
+ pass # Best-effort
474
+
475
+ return calls
476
+
477
+ def _extract_assignments_from_body(
478
+ self, method_decl: Any, caller_fqn: str
479
+ ) -> list[ParsedAssignment]:
480
+ import javalang
481
+
482
+ assigns: list[ParsedAssignment] = []
483
+ body = getattr(method_decl, "body", None)
484
+ if not body:
485
+ return assigns
486
+
487
+ try:
488
+ for _path, node in method_decl:
489
+ if not isinstance(node, javalang.tree.LocalVariableDeclaration):
490
+ continue
491
+
492
+ type_name = self._get_type_name(getattr(node, "type", None))
493
+ pos = getattr(node, "position", None)
494
+ location = self._make_location(pos)
495
+
496
+ for declarator in getattr(node, "declarators", None) or []:
497
+ target = getattr(declarator, "name", None)
498
+ if not target:
499
+ continue
500
+
501
+ initializer = getattr(declarator, "initializer", None)
502
+ source_type = "literal"
503
+ source_value = None
504
+ source_call = None
505
+ source_vars: list[str] = []
506
+ is_string_interp = False
507
+
508
+ if initializer is not None:
509
+ if isinstance(initializer, javalang.tree.Literal):
510
+ source_type = "literal"
511
+ source_value = self._extract_element_value(initializer)
512
+ elif isinstance(initializer, javalang.tree.MethodInvocation):
513
+ source_type = "call"
514
+ source_call = initializer.member
515
+ elif isinstance(initializer, javalang.tree.MemberReference):
516
+ source_type = "variable"
517
+ var_name = initializer.member
518
+ source_value = var_name
519
+ source_vars = [var_name]
520
+ elif isinstance(initializer, javalang.tree.BinaryOperation):
521
+ # String concatenation: a + b
522
+ source_type = "expression"
523
+ source_value = self._expr_to_str(initializer)
524
+ source_vars = self._collect_vars(initializer)
525
+ is_string_interp = True
526
+ else:
527
+ source_type = "expression"
528
+ source_value = self._expr_to_str(initializer)
529
+
530
+ assigns.append(
531
+ ParsedAssignment(
532
+ target=target,
533
+ location=location,
534
+ source_type=source_type,
535
+ source_value=str(source_value) if source_value else None,
536
+ source_call=source_call,
537
+ in_function=caller_fqn,
538
+ type_annotation=type_name,
539
+ source_variables=source_vars,
540
+ is_string_interpolation=is_string_interp,
541
+ )
542
+ )
543
+ except Exception:
544
+ pass # Best-effort
545
+
546
+ return assigns
547
+
548
+ # -------------------------------------------------------------------------
549
+ # Argument extraction
550
+ # -------------------------------------------------------------------------
551
+
552
+ def _extract_argument(self, expr: Any, position: int) -> ParsedArgument:
553
+ import javalang
554
+
555
+ if isinstance(expr, javalang.tree.Literal):
556
+ val = self._extract_element_value(expr)
557
+ return ParsedArgument(
558
+ position=position,
559
+ is_literal=True,
560
+ literal_value=val,
561
+ literal_type=self._infer_literal_type(expr.value),
562
+ )
563
+
564
+ if isinstance(expr, javalang.tree.MemberReference):
565
+ var_name = expr.member
566
+ return ParsedArgument(
567
+ position=position,
568
+ is_variable=True,
569
+ variable_name=var_name,
570
+ source_variables=[var_name],
571
+ )
572
+
573
+ if isinstance(expr, javalang.tree.MethodInvocation):
574
+ return ParsedArgument(
575
+ position=position,
576
+ is_call_result=True,
577
+ called_function=expr.member,
578
+ )
579
+
580
+ if isinstance(expr, javalang.tree.BinaryOperation):
581
+ # String concatenation
582
+ source_vars = self._collect_vars(expr)
583
+ return ParsedArgument(
584
+ position=position,
585
+ is_expression=True,
586
+ expression_text=self._expr_to_str(expr),
587
+ is_concatenation=True,
588
+ source_variables=source_vars,
589
+ )
590
+
591
+ # Generic fallback
592
+ return ParsedArgument(
593
+ position=position,
594
+ is_expression=True,
595
+ expression_text=self._expr_to_str(expr),
596
+ )
597
+
598
+ # -------------------------------------------------------------------------
599
+ # Type name helpers
600
+ # -------------------------------------------------------------------------
601
+
602
+ def _get_type_name(self, type_node: Any) -> str | None:
603
+ """Convert a javalang type node to a simple string name."""
604
+ if type_node is None:
605
+ return None
606
+
607
+ import javalang
608
+
609
+ if isinstance(type_node, str):
610
+ return type_node
611
+
612
+ if isinstance(type_node, javalang.tree.BasicType):
613
+ return type_node.name
614
+
615
+ if isinstance(type_node, javalang.tree.ReferenceType):
616
+ name = type_node.name
617
+ # Handle generics: List<String> → List<String>
618
+ args = getattr(type_node, "arguments", None)
619
+ if args:
620
+ inner = ", ".join(
621
+ self._get_type_name(a.type) or "?" for a in args if hasattr(a, "type")
622
+ )
623
+ return f"{name}<{inner}>"
624
+ return name
625
+
626
+ if isinstance(type_node, javalang.tree.VoidReturn):
627
+ return "void"
628
+
629
+ # Fallback
630
+ name = getattr(type_node, "name", None)
631
+ return name
632
+
633
+ def _infer_literal_type(self, raw: Any) -> str | None:
634
+ if isinstance(raw, str):
635
+ s = raw.strip()
636
+ if s.startswith('"') or s.startswith("'"):
637
+ return "str"
638
+ if s in ("true", "false"):
639
+ return "bool"
640
+ if s == "null":
641
+ return "None"
642
+ try:
643
+ int(s)
644
+ return "int"
645
+ except ValueError:
646
+ pass
647
+ try:
648
+ float(s.rstrip("fFdD"))
649
+ return "float"
650
+ except ValueError:
651
+ pass
652
+ return None
653
+
654
+ # -------------------------------------------------------------------------
655
+ # Expression helpers
656
+ # -------------------------------------------------------------------------
657
+
658
+ def _expr_to_str(self, expr: Any) -> str:
659
+ """Best-effort string representation of an expression node."""
660
+ import javalang
661
+
662
+ if expr is None:
663
+ return ""
664
+ if isinstance(expr, javalang.tree.Literal):
665
+ return str(expr.value)
666
+ if isinstance(expr, javalang.tree.MemberReference):
667
+ qualifier = getattr(expr, "qualifier", "") or ""
668
+ member = expr.member or ""
669
+ return f"{qualifier}.{member}" if qualifier else member
670
+ if isinstance(expr, javalang.tree.MethodInvocation):
671
+ qualifier = getattr(expr, "qualifier", "") or ""
672
+ member = expr.member or ""
673
+ # Preserve literal string arguments so callers can inspect them
674
+ # (e.g. System.getenv("JWT_SECRET") stays readable downstream).
675
+ raw_args = getattr(expr, "arguments", None) or []
676
+ arg_strs: list[str] = []
677
+ for a in raw_args:
678
+ if isinstance(a, javalang.tree.Literal):
679
+ arg_strs.append(str(a.value))
680
+ else:
681
+ arg_strs.append("...")
682
+ inner = ", ".join(arg_strs) if arg_strs else "..."
683
+ callee = f"{qualifier}.{member}" if qualifier else member
684
+ return f"{callee}({inner})"
685
+ if isinstance(expr, javalang.tree.BinaryOperation):
686
+ left = self._expr_to_str(expr.operandl)
687
+ right = self._expr_to_str(expr.operandr)
688
+ op = getattr(expr, "operator", "+")
689
+ return f"{left} {op} {right}"
690
+ if isinstance(expr, javalang.tree.ClassCreator):
691
+ return f"new {self._get_type_name(expr.type)}(...)"
692
+ return repr(expr)
693
+
694
+ def _collect_vars(self, expr: Any) -> list[str]:
695
+ """Collect all MemberReference variable names from an expression tree."""
696
+ import javalang
697
+
698
+ vars_: list[str] = []
699
+ if isinstance(expr, javalang.tree.MemberReference):
700
+ vars_.append(expr.member)
701
+ elif isinstance(expr, javalang.tree.BinaryOperation):
702
+ vars_.extend(self._collect_vars(expr.operandl))
703
+ vars_.extend(self._collect_vars(expr.operandr))
704
+ return vars_
705
+
706
+ # -------------------------------------------------------------------------
707
+ # Location helper
708
+ # -------------------------------------------------------------------------
709
+
710
+ def _make_location(self, pos: Any) -> CodeLocation:
711
+ if pos is None:
712
+ return CodeLocation(file=self.file_path, line=0)
713
+ line = getattr(pos, "line", 0) or 0
714
+ col = getattr(pos, "column", None)
715
+ return CodeLocation(file=self.file_path, line=line, column=col)
716
+
717
+
718
+ # =============================================================================
719
+ # JavaParser
720
+ # =============================================================================
721
+
722
+
723
+ class JavaParser(BaseParser):
724
+ """
725
+ Parser for Java source files.
726
+
727
+ Uses the javalang library to build a CST and extract:
728
+ - Classes, methods, fields
729
+ - Imports
730
+ - Call sites (method invocations)
731
+ - Assignments (local variable declarations)
732
+ """
733
+
734
+ LANGUAGE: Language = Language.JAVA
735
+ SUPPORTED_EXTENSIONS: frozenset[str] = frozenset({".java"})
736
+
737
+ def parse_file(self, file_path: Path) -> ParsedFile:
738
+ try:
739
+ source = file_path.read_text(encoding="utf-8", errors="replace")
740
+ except OSError as e:
741
+ return ParsedFile(
742
+ path=file_path,
743
+ language=Language.JAVA,
744
+ success=False,
745
+ error=ParseError(f"Could not read file: {e}", file_path),
746
+ )
747
+ return self.parse_source(source, file_path)
748
+
749
+ def parse_source(self, source: str, file_path: Path | None = None) -> ParsedFile:
750
+ resolved_path = file_path or Path("<unknown>")
751
+ try:
752
+ import javalang
753
+
754
+ tree = javalang.parse.parse(source)
755
+ except Exception as e:
756
+ logger.debug("Failed to parse Java file %s: %s", file_path, e)
757
+ return ParsedFile(
758
+ path=resolved_path,
759
+ language=Language.JAVA,
760
+ success=False,
761
+ error=ParseError(str(e), file_path),
762
+ )
763
+
764
+ try:
765
+ extractor = JavaExtractor(resolved_path, source, tree)
766
+ return extractor.extract()
767
+ except Exception as e:
768
+ logger.warning("Extraction failed for %s: %s", file_path, e)
769
+ return ParsedFile(
770
+ path=resolved_path,
771
+ language=Language.JAVA,
772
+ success=False,
773
+ error=ParseError(f"Extraction error: {e}", file_path),
774
+ )