apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,300 @@
1
+ """
2
+ Literal-pattern scanner.
3
+
4
+ Sweeps all string literals in parsed files and detects security-relevant
5
+ patterns:
6
+
7
+ - **URLs**: External endpoints the service communicates with.
8
+ - **SQL fragments**: Query structures, even those not in tracked data flows.
9
+ - **Connection strings**: Database/cache/broker URLs with known schemes.
10
+
11
+ Results populate the ``LiteralsModel`` section of the manifest.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from dataclasses import dataclass
18
+ from typing import TYPE_CHECKING
19
+
20
+ if TYPE_CHECKING:
21
+ from ..core.manifest import LiteralPatternModel
22
+ from ..parsing.base import ParsedFile
23
+
24
+
25
+ # ============================================================================
26
+ # Pattern rules
27
+ # ============================================================================
28
+
29
+ _URL_RE = re.compile(
30
+ r'https?://[^\s\'"<>,;)\]}{]+',
31
+ )
32
+
33
+ _CONNECTION_STRING_RE = re.compile(
34
+ r"(?:postgres(?:ql)?|mysql|sqlite|mongodb(?:\+srv)?|redis|amqp|amqps|nats|kafka)://"
35
+ r'[^\s\'"<>,;)\]}{]+',
36
+ re.IGNORECASE,
37
+ )
38
+
39
+ _SQL_FRAGMENT_RE = re.compile(
40
+ r"\b(?:SELECT\s+.+?\s+FROM|INSERT\s+INTO|UPDATE\s+\w+\s+SET|DELETE\s+FROM|"
41
+ r"CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE|EXEC(?:UTE)?)\b",
42
+ re.IGNORECASE,
43
+ )
44
+
45
+ _SECRET_NAME_RE = re.compile(
46
+ r"(?:SECRET|TOKEN|PASSWORD|PASSWD|API_KEY|APIKEY|PRIVATE_KEY|"
47
+ r"ACCESS_KEY|AUTH_KEY|ENCRYPTION_KEY|SIGNING_KEY|CLIENT_SECRET|"
48
+ r"CREDENTIAL|DB_PASS|DATABASE_PASSWORD)",
49
+ re.IGNORECASE,
50
+ )
51
+
52
+
53
+ # ============================================================================
54
+ # Detected patterns
55
+ # ============================================================================
56
+
57
+
58
+ @dataclass
59
+ class DetectedLiteral:
60
+ """A single detected literal pattern."""
61
+
62
+ pattern_type: str # "url", "sql_pattern", "connection_string"
63
+ value: str # The matched text (truncated for safety)
64
+ file: str
65
+ line: int
66
+ confidence: str = "HIGH"
67
+ notes: str | None = None
68
+
69
+
70
+ # ============================================================================
71
+ # LiteralScanner
72
+ # ============================================================================
73
+
74
+
75
+ class LiteralScanner:
76
+ """
77
+ Scans all string literals in parsed files for security-relevant patterns.
78
+ """
79
+
80
+ def __init__(self, parsed_files: list[ParsedFile]) -> None:
81
+ self._parsed_files = [pf for pf in parsed_files if pf.success]
82
+
83
+ def scan(self) -> list[DetectedLiteral]:
84
+ """Run the full scan and return detected literals."""
85
+ results: list[DetectedLiteral] = []
86
+ seen: set[tuple[str, str]] = set()
87
+
88
+ for pf in self._parsed_files:
89
+ file_str = str(pf.path)
90
+
91
+ for assign in pf.assignments:
92
+ line = assign.location.line if assign.location else 0
93
+
94
+ # Normalise the candidate value for scanning.
95
+ # Covers both plain literals and string-interpolated/concatenated
96
+ # expressions (Java SQL patterns, f-strings, etc.).
97
+ is_interp = getattr(assign, "is_string_interpolation", False)
98
+ if assign.source_type == "literal" and isinstance(assign.source_value, str):
99
+ scan_val: str | None = assign.source_value or None
100
+ elif is_interp and assign.source_value:
101
+ scan_val = str(assign.source_value)
102
+ else:
103
+ scan_val = None
104
+
105
+ # Hardcoded secret detection: variable name looks sensitive
106
+ # and value is a non-trivial string (literal or concatenation).
107
+ if scan_val and _SECRET_NAME_RE.search(assign.target):
108
+ key = ("secret", file_str, assign.target)
109
+ if key not in seen:
110
+ seen.add(key)
111
+ results.append(
112
+ DetectedLiteral(
113
+ pattern_type="secret",
114
+ value=self._redact_secret(assign.target, scan_val),
115
+ file=file_str,
116
+ line=line,
117
+ notes=f"Hardcoded value in variable '{assign.target}'",
118
+ )
119
+ )
120
+
121
+ # URL / SQL / connection-string scanning
122
+ if scan_val:
123
+ self._scan_string(scan_val, file_str, line, results, seen)
124
+
125
+ # Scan class-level field declarations for hardcoded secrets.
126
+ # Java constants (e.g. private static final String JWT_SECRET = "...")
127
+ # are ParsedField objects, not local-variable assignments.
128
+ for cls in pf.classes:
129
+ for field in cls.fields:
130
+ if not field.default_value or not isinstance(field.default_value, str):
131
+ continue
132
+ val = field.default_value
133
+ if not _SECRET_NAME_RE.search(field.name):
134
+ continue
135
+ key = ("secret", file_str, field.name)
136
+ if key not in seen:
137
+ seen.add(key)
138
+ # ParsedField has no line number; use the class location
139
+ # as a best-effort pointer to the enclosing declaration.
140
+ field_line = cls.location.line if cls.location else 0
141
+ results.append(
142
+ DetectedLiteral(
143
+ pattern_type="secret",
144
+ value=self._redact_secret(field.name, val),
145
+ file=file_str,
146
+ line=field_line,
147
+ notes=f"Hardcoded value in field '{field.name}'",
148
+ )
149
+ )
150
+
151
+ # Scan string literals from call arguments
152
+ for cs in pf.call_sites:
153
+ for arg in cs.arguments:
154
+ if not arg.is_literal:
155
+ continue
156
+ if arg.literal_type != "str":
157
+ continue
158
+ val = arg.literal_value
159
+ if not val or not isinstance(val, str):
160
+ continue
161
+ line = cs.location.line
162
+ self._scan_string(str(val), file_str, line, results, seen)
163
+
164
+ return results
165
+
166
+ def _scan_string(
167
+ self,
168
+ value: str,
169
+ file: str,
170
+ line: int,
171
+ results: list[DetectedLiteral],
172
+ seen: set[tuple[str, str]],
173
+ ) -> None:
174
+ """Scan a single string for all pattern types."""
175
+ # Connection strings (check before generic URLs to get specific type)
176
+ for m in _CONNECTION_STRING_RE.finditer(value):
177
+ matched = m.group()
178
+ key = ("connection_string", matched)
179
+ if key not in seen:
180
+ seen.add(key)
181
+ # Redact credentials: scheme://user:pass@host → scheme://***@host
182
+ sanitized = self._redact_credentials(matched)
183
+ results.append(
184
+ DetectedLiteral(
185
+ pattern_type="connection_string",
186
+ value=sanitized[:200],
187
+ file=file,
188
+ line=line,
189
+ notes=self._classify_connection_scheme(matched),
190
+ )
191
+ )
192
+
193
+ # Generic URLs
194
+ for m in _URL_RE.finditer(value):
195
+ matched = m.group()
196
+ key = ("url", matched)
197
+ if key not in seen:
198
+ seen.add(key)
199
+ results.append(
200
+ DetectedLiteral(
201
+ pattern_type="url",
202
+ value=matched[:200],
203
+ file=file,
204
+ line=line,
205
+ )
206
+ )
207
+
208
+ # SQL fragments
209
+ for m in _SQL_FRAGMENT_RE.finditer(value):
210
+ matched = m.group()
211
+ # Deduplicate by normalized form
212
+ norm = matched.strip().upper()[:80]
213
+ key = ("sql_pattern", norm)
214
+ if key not in seen:
215
+ seen.add(key)
216
+ results.append(
217
+ DetectedLiteral(
218
+ pattern_type="sql_pattern",
219
+ value=matched.strip()[:200],
220
+ file=file,
221
+ line=line,
222
+ )
223
+ )
224
+
225
+ @staticmethod
226
+ def _redact_credentials(url: str) -> str:
227
+ """Replace user:password in a URL with ***."""
228
+ return re.sub(r"://[^@]+@", "://***@", url)
229
+
230
+ @staticmethod
231
+ def _redact_secret(var_name: str, value: str) -> str:
232
+ """Return a redacted representation: variable name + masked value."""
233
+ if len(value) <= 4:
234
+ masked = "***"
235
+ else:
236
+ masked = value[:2] + "*" * min(len(value) - 4, 16) + value[-2:]
237
+ return f'{var_name} = "{masked}"'
238
+
239
+ @staticmethod
240
+ def _classify_connection_scheme(url: str) -> str | None:
241
+ """Return a human-readable type for a connection string scheme."""
242
+ lower = url.lower()
243
+ if lower.startswith(("postgres", "postgresql")):
244
+ return "PostgreSQL connection"
245
+ if lower.startswith("mysql"):
246
+ return "MySQL connection"
247
+ if lower.startswith("sqlite"):
248
+ return "SQLite connection"
249
+ if lower.startswith("mongodb"):
250
+ return "MongoDB connection"
251
+ if lower.startswith("redis"):
252
+ return "Redis connection"
253
+ if lower.startswith(("amqp", "amqps")):
254
+ return "RabbitMQ connection"
255
+ if lower.startswith("nats"):
256
+ return "NATS connection"
257
+ if lower.startswith("kafka"):
258
+ return "Kafka connection"
259
+ return None
260
+
261
+ # ------------------------------------------------------------------
262
+ # Conversion to manifest models
263
+ # ------------------------------------------------------------------
264
+
265
+ def to_manifest_models(
266
+ self,
267
+ detections: list[DetectedLiteral] | None = None,
268
+ ) -> LiteralsModel: # noqa: F821
269
+ """Convert detections to manifest LiteralsModel."""
270
+ from ..core.manifest import LiteralPatternModel, LiteralsModel, LocationModel
271
+
272
+ if detections is None:
273
+ detections = self.scan()
274
+
275
+ urls: list[LiteralPatternModel] = []
276
+ sql_patterns: list[LiteralPatternModel] = []
277
+ secret_patterns: list[LiteralPatternModel] = []
278
+
279
+ for det in detections:
280
+ model = LiteralPatternModel(
281
+ type=det.pattern_type,
282
+ value=det.value,
283
+ location=LocationModel(file=det.file, line=det.line),
284
+ confidence=det.confidence,
285
+ notes=det.notes,
286
+ )
287
+ if det.pattern_type == "url":
288
+ urls.append(model)
289
+ elif det.pattern_type == "sql_pattern":
290
+ sql_patterns.append(model)
291
+ elif det.pattern_type == "connection_string":
292
+ urls.append(model)
293
+ elif det.pattern_type == "secret":
294
+ secret_patterns.append(model)
295
+
296
+ return LiteralsModel(
297
+ urls=urls,
298
+ sql_patterns=sql_patterns,
299
+ secret_patterns=secret_patterns,
300
+ )
@@ -0,0 +1,55 @@
1
+ """
2
+ Path normalization for route parameter syntax across frameworks.
3
+
4
+ Converts framework-specific path param syntax to a canonical form
5
+ for the manifest: {name} or {name:type}.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+
12
+ # Flask/Django: <int:id>, <uuid:pk>, <path:name>
13
+ FLASK_DJANGO_PARAM = re.compile(r"<(?:(?P<type>\w+):)?(?P<name>\w+)>")
14
+
15
+ # FastAPI/Starlette: {id}, {id:int}
16
+ FASTAPI_PARAM = re.compile(r"\{(\w+)(?::[^}]*)?\}")
17
+
18
+
19
+ def normalize_path(path: str, framework: str = "fastapi") -> str:
20
+ """
21
+ Normalize path parameter syntax to canonical {name} form.
22
+
23
+ Args:
24
+ path: Route path (e.g. /users/<int:id> or /users/{id})
25
+ framework: Source framework for format detection
26
+
27
+ Returns:
28
+ Path with params as {name}
29
+ """
30
+ if not path:
31
+ return path
32
+
33
+ framework = framework.lower()
34
+
35
+ if framework in ("flask", "django"):
36
+ return FLASK_DJANGO_PARAM.sub(r"{\2}", path)
37
+
38
+ # FastAPI/Starlette already use {name}; optionally strip type
39
+ if framework in ("fastapi", "starlette"):
40
+ return FASTAPI_PARAM.sub(r"{\1}", path)
41
+
42
+ # Generic: try both patterns
43
+ result = FLASK_DJANGO_PARAM.sub(r"{\2}", path)
44
+ result = FASTAPI_PARAM.sub(r"{\1}", result)
45
+ return result
46
+
47
+
48
+ def extract_path_param_names(path: str) -> list[str]:
49
+ """Extract path parameter names from a route path."""
50
+ names = []
51
+ for match in FLASK_DJANGO_PARAM.finditer(path):
52
+ names.append(match.group("name"))
53
+ for match in FASTAPI_PARAM.finditer(path):
54
+ names.append(match.group(1))
55
+ return names
@@ -0,0 +1,310 @@
1
+ """
2
+ Read-site origin detection for request and environment variable access patterns.
3
+
4
+ Identifies data origins when handlers read from the request object
5
+ directly (request.args.get("id"), request.path_params["id"], etc.)
6
+ or from environment variables (os.environ["KEY"], os.getenv("KEY"), etc.)
7
+ rather than via function parameters. Used for Flask, Django, and
8
+ any framework where request data is accessed in the handler body.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+ from typing import TYPE_CHECKING
17
+
18
+ from .request_patterns import get_request_pattern_registry
19
+
20
+ if TYPE_CHECKING:
21
+ from ..parsing.base import ParsedFile, ParsedFunction
22
+
23
+
24
+ # Regex patterns for request access in assignment RHS
25
+ # Captures: (framework_hint, origin_type, param_name_if_any)
26
+ # Pattern format: (regex, origin_type, param_group)
27
+ REQUEST_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
28
+ # request.path_params["id"] or request.path_params['id']
29
+ (re.compile(r"request\.path_params\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_PATH_PARAM", 1),
30
+ # request.path_params.get("id")
31
+ (re.compile(r"request\.path_params\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_PATH_PARAM", 1),
32
+ # request.query_params["page"]
33
+ (re.compile(r"request\.query_params\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_QUERY_PARAM", 1),
34
+ # request.query_params.get("page")
35
+ (re.compile(r"request\.query_params\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_QUERY_PARAM", 1),
36
+ # request.args.get("id") - Flask
37
+ (re.compile(r"request\.args\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_QUERY_PARAM", 1),
38
+ # request.args["id"]
39
+ (re.compile(r"request\.args\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_QUERY_PARAM", 1),
40
+ # request.GET.get("id") - Django
41
+ (re.compile(r"request\.GET\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_QUERY_PARAM", 1),
42
+ # request.POST.get("id") - Django
43
+ (re.compile(r"request\.POST\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_FORM", 1),
44
+ # request.form.get("id") - Flask
45
+ (re.compile(r"request\.form\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_FORM", 1),
46
+ # request.form["id"]
47
+ (re.compile(r"request\.form\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_FORM", 1),
48
+ # request.headers["X-API-Key"] - param group 1 is header name
49
+ (re.compile(r"request\.headers\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_HEADER", 1),
50
+ (re.compile(r"request\.headers\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_HEADER", 1),
51
+ # request.cookies.get("session")
52
+ (re.compile(r"request\.cookies\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_COOKIE", 1),
53
+ # request.json() - body, no param name
54
+ (re.compile(r"request\.json\s*\(\s*\)"), "HTTP_BODY", None),
55
+ (re.compile(r"request\.get_json\s*\(\s*\)"), "HTTP_BODY", None),
56
+ # request.body - Django/Starlette
57
+ (re.compile(r"request\.body\b"), "HTTP_BODY", None),
58
+ # request.data - Flask/Django DRF
59
+ (re.compile(r"request\.data\b"), "HTTP_BODY", None),
60
+ ]
61
+
62
+ # Java / Spring Boot read-site patterns (HttpServletRequest, etc.)
63
+ JAVA_REQUEST_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
64
+ # request.getParameter("name") or httpRequest.getParameter("name")
65
+ (re.compile(r"(?:\w+\.)?getParameter\s*\(\s*\"([^\"]+)\""), "HTTP_QUERY_PARAM", 1),
66
+ # request.getPathVariable("id") / UriComponentsBuilder pattern
67
+ (re.compile(r"(?:\w+\.)?getPathVariable\s*\(\s*\"([^\"]+)\""), "HTTP_PATH_PARAM", 1),
68
+ # uriVariables.get("id") (Spring PathVariables map)
69
+ (
70
+ re.compile(r"(?:uriVariables|pathVariables|pathParams)\s*\.get\s*\(\s*\"([^\"]+)\""),
71
+ "HTTP_PATH_PARAM",
72
+ 1,
73
+ ),
74
+ # request.getHeader("X-API-Key")
75
+ (re.compile(r"(?:\w+\.)?getHeader\s*\(\s*\"([^\"]+)\""), "HTTP_HEADER", 1),
76
+ # request.getCookies() — no param name
77
+ (re.compile(r"(?:\w+\.)?getCookies\s*\("), "HTTP_COOKIE", None),
78
+ # request.getInputStream() / getReader() — body
79
+ (re.compile(r"(?:\w+\.)?(?:getInputStream|getReader)\s*\("), "HTTP_BODY", None),
80
+ # httpExchange.getRequestBody() (Java HttpServer)
81
+ (re.compile(r"(?:\w+\.)?getRequestBody\s*\("), "HTTP_BODY", None),
82
+ # System.getenv("KEY")
83
+ (re.compile(r"System\.getenv\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
84
+ # System.getProperty("key")
85
+ (re.compile(r"System\.getProperty\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
86
+ ]
87
+
88
+ # .NET / ASP.NET Core read-site patterns
89
+ DOTNET_REQUEST_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
90
+ # HttpContext.Request.Query["key"] or Request.Query["key"]
91
+ (re.compile(r"(?:\w+\.)?Request\.Query\s*\[\s*\"([^\"]+)\""), "HTTP_QUERY_PARAM", 1),
92
+ # Request.Query.TryGetValue("key", ...) or Request.Query.ContainsKey("key")
93
+ (
94
+ re.compile(r"(?:\w+\.)?Request\.Query(?:\.TryGetValue|\.ContainsKey)\s*\(\s*\"([^\"]+)\""),
95
+ "HTTP_QUERY_PARAM",
96
+ 1,
97
+ ),
98
+ # Request.RouteValues["key"]
99
+ (re.compile(r"(?:\w+\.)?Request\.RouteValues\s*\[\s*\"([^\"]+)\""), "HTTP_PATH_PARAM", 1),
100
+ # routeValues["key"] (from RouteData.Values)
101
+ (
102
+ re.compile(r"(?:routeValues|routeData\.Values|RouteData\.Values)\s*\[\s*\"([^\"]+)\""),
103
+ "HTTP_PATH_PARAM",
104
+ 1,
105
+ ),
106
+ # Request.Headers["X-API-Key"]
107
+ (re.compile(r"(?:\w+\.)?Request\.Headers\s*\[\s*\"([^\"]+)\""), "HTTP_HEADER", 1),
108
+ (
109
+ re.compile(r"(?:\w+\.)?Request\.Headers\s*\.\s*TryGetValue\s*\(\s*\"([^\"]+)\""),
110
+ "HTTP_HEADER",
111
+ 1,
112
+ ),
113
+ # Request.Cookies["session"]
114
+ (re.compile(r"(?:\w+\.)?Request\.Cookies\s*\[\s*\"([^\"]+)\""), "HTTP_COOKIE", 1),
115
+ # Request.Form["field"]
116
+ (re.compile(r"(?:\w+\.)?Request\.Form\s*\[\s*\"([^\"]+)\""), "HTTP_FORM", 1),
117
+ # Request.Body — no param name
118
+ (re.compile(r"(?:\w+\.)?Request\.Body\b"), "HTTP_BODY", None),
119
+ # await Request.ReadFromJsonAsync<T>() or ReadAsStringAsync()
120
+ (
121
+ re.compile(
122
+ r"(?:\w+\.)?Request\.(?:ReadFromJsonAsync|ReadAsStringAsync|GetFromJsonAsync)\b"
123
+ ),
124
+ "HTTP_BODY",
125
+ None,
126
+ ),
127
+ # Environment.GetEnvironmentVariable("KEY")
128
+ (re.compile(r"Environment\.GetEnvironmentVariable\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
129
+ # config["key"] or configuration["key"] (IConfiguration)
130
+ (re.compile(r"(?:_?config(?:uration)?)\s*\[\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
131
+ ]
132
+
133
+ # Regex patterns for environment variable access in assignment RHS
134
+ ENV_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
135
+ # os.environ["KEY"] or os.environ['KEY']
136
+ (re.compile(r"(?:os\.)?environ\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "ENVIRONMENT_VAR", 1),
137
+ # os.environ.get("KEY") or os.environ.get("KEY", default)
138
+ (re.compile(r"(?:os\.)?environ\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "ENVIRONMENT_VAR", 1),
139
+ # os.getenv("KEY") or os.getenv("KEY", default)
140
+ (re.compile(r"(?:os\.)?getenv\s*\(\s*['\"]([^'\"]+)['\"]"), "ENVIRONMENT_VAR", 1),
141
+ # Java: System.getenv("KEY")
142
+ (re.compile(r"System\.getenv\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
143
+ # Java: System.getProperty("key")
144
+ (re.compile(r"System\.getProperty\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
145
+ # .NET: Environment.GetEnvironmentVariable("KEY")
146
+ (re.compile(r"Environment\.GetEnvironmentVariable\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
147
+ ]
148
+
149
+
150
+ @dataclass
151
+ class ReadSiteOrigin:
152
+ """An origin detected at a read site (assignment from request)."""
153
+
154
+ origin_type: str # OriginType name, e.g. "HTTP_PATH_PARAM"
155
+ param_name: str | None
156
+ target_variable: str
157
+ line: int
158
+ file_path: Path
159
+ framework: str = ""
160
+
161
+
162
+ class ReadSiteDetector:
163
+ """
164
+ Detects data origins from request access in assignment RHS.
165
+
166
+ Scans handler bodies for assignments like:
167
+ user_id = request.path_params["user_id"]
168
+ page = request.args.get("page", 1)
169
+ and registers them as taint origins.
170
+ """
171
+
172
+ def __init__(self, framework: str = "fastapi"):
173
+ self._framework = framework.lower()
174
+ self._registry = get_request_pattern_registry()
175
+
176
+ def detect_origins(
177
+ self,
178
+ func: ParsedFunction,
179
+ parsed_file: ParsedFile,
180
+ ) -> list[ReadSiteOrigin]:
181
+ """
182
+ Detect read-site origins from assignments in a function.
183
+
184
+ Returns list of origins found (assignments from request.xxx).
185
+ """
186
+ origins: list[ReadSiteOrigin] = []
187
+ func_qname = (
188
+ func.qualified_name.full
189
+ if hasattr(func.qualified_name, "full")
190
+ else str(func.qualified_name)
191
+ )
192
+
193
+ # Get assignments in this function
194
+ if not hasattr(parsed_file, "assignments"):
195
+ return origins
196
+
197
+ for assign in parsed_file.assignments:
198
+ assign_func_raw = assign.in_function
199
+ assign_func = (
200
+ assign_func_raw.full
201
+ if hasattr(assign_func_raw, "full")
202
+ else str(assign_func_raw or "")
203
+ )
204
+ if not assign_func:
205
+ continue
206
+ # Match: exact, or func_qname ends with .assign_func
207
+ if assign_func != func_qname and assign_func != func.name:
208
+ if not (
209
+ func_qname
210
+ and (func_qname == assign_func or func_qname.endswith(f".{assign_func}"))
211
+ ):
212
+ continue
213
+
214
+ if assign.source_type not in ("call", "expression", "variable"):
215
+ continue
216
+
217
+ source = assign.source_value or assign.value_source or ""
218
+ if not source:
219
+ continue
220
+ source_lower = source.lower()
221
+ has_request = "request" in source_lower
222
+ has_java_request = any(
223
+ kw in source_lower
224
+ for kw in (
225
+ "getparameter",
226
+ "getheader",
227
+ "getpathvariable",
228
+ "getinputstream",
229
+ "getreader",
230
+ "getrequestbody",
231
+ "urivariables",
232
+ "pathvariables",
233
+ )
234
+ )
235
+ has_dotnet_request = any(
236
+ kw in source_lower
237
+ for kw in (
238
+ "request.query",
239
+ "request.routevalues",
240
+ "request.headers",
241
+ "request.cookies",
242
+ "request.form",
243
+ "request.body",
244
+ "readfromjsonasync",
245
+ "readasstringasync",
246
+ )
247
+ )
248
+ has_env = (
249
+ "environ" in source_lower
250
+ or "getenv" in source_lower
251
+ or "getproperty" in source_lower
252
+ or "getenvironmentvariable" in source_lower
253
+ )
254
+ if not has_request and not has_java_request and not has_dotnet_request and not has_env:
255
+ continue
256
+
257
+ matched = False
258
+
259
+ # Match against request access patterns (.NET first when .NET keywords present)
260
+ if has_request or has_java_request or has_dotnet_request:
261
+ dotnet_patterns = DOTNET_REQUEST_ACCESS_PATTERNS if has_dotnet_request else []
262
+ java_patterns = JAVA_REQUEST_ACCESS_PATTERNS if has_java_request else []
263
+ for pattern, origin_type_name, param_group in (
264
+ dotnet_patterns + java_patterns + REQUEST_ACCESS_PATTERNS
265
+ ):
266
+ match = pattern.search(source)
267
+ if match:
268
+ param_name = None
269
+ if param_group is not None and param_group <= len(match.groups()):
270
+ param_name = match.group(param_group)
271
+
272
+ origins.append(
273
+ ReadSiteOrigin(
274
+ origin_type=origin_type_name,
275
+ param_name=param_name or assign.target,
276
+ target_variable=assign.target,
277
+ line=assign.location.line,
278
+ file_path=Path(str(assign.location.file))
279
+ if assign.location
280
+ else parsed_file.path,
281
+ framework=self._framework,
282
+ )
283
+ )
284
+ matched = True
285
+ break
286
+
287
+ # Match against environment variable access patterns (Python + Java)
288
+ if not matched and has_env:
289
+ for pattern, origin_type_name, param_group in ENV_ACCESS_PATTERNS:
290
+ match = pattern.search(source)
291
+ if match:
292
+ param_name = None
293
+ if param_group is not None and param_group <= len(match.groups()):
294
+ param_name = match.group(param_group)
295
+
296
+ origins.append(
297
+ ReadSiteOrigin(
298
+ origin_type=origin_type_name,
299
+ param_name=param_name or assign.target,
300
+ target_variable=assign.target,
301
+ line=assign.location.line,
302
+ file_path=Path(str(assign.location.file))
303
+ if assign.location
304
+ else parsed_file.path,
305
+ framework=self._framework,
306
+ )
307
+ )
308
+ break
309
+
310
+ return origins