apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Literal-pattern scanner.
|
|
3
|
+
|
|
4
|
+
Sweeps all string literals in parsed files and detects security-relevant
|
|
5
|
+
patterns:
|
|
6
|
+
|
|
7
|
+
- **URLs**: External endpoints the service communicates with.
|
|
8
|
+
- **SQL fragments**: Query structures, even those not in tracked data flows.
|
|
9
|
+
- **Connection strings**: Database/cache/broker URLs with known schemes.
|
|
10
|
+
|
|
11
|
+
Results populate the ``LiteralsModel`` section of the manifest.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from ..core.manifest import LiteralPatternModel
|
|
22
|
+
from ..parsing.base import ParsedFile
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ============================================================================
|
|
26
|
+
# Pattern rules
|
|
27
|
+
# ============================================================================
|
|
28
|
+
|
|
29
|
+
_URL_RE = re.compile(
|
|
30
|
+
r'https?://[^\s\'"<>,;)\]}{]+',
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
_CONNECTION_STRING_RE = re.compile(
|
|
34
|
+
r"(?:postgres(?:ql)?|mysql|sqlite|mongodb(?:\+srv)?|redis|amqp|amqps|nats|kafka)://"
|
|
35
|
+
r'[^\s\'"<>,;)\]}{]+',
|
|
36
|
+
re.IGNORECASE,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
_SQL_FRAGMENT_RE = re.compile(
|
|
40
|
+
r"\b(?:SELECT\s+.+?\s+FROM|INSERT\s+INTO|UPDATE\s+\w+\s+SET|DELETE\s+FROM|"
|
|
41
|
+
r"CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE|EXEC(?:UTE)?)\b",
|
|
42
|
+
re.IGNORECASE,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
_SECRET_NAME_RE = re.compile(
|
|
46
|
+
r"(?:SECRET|TOKEN|PASSWORD|PASSWD|API_KEY|APIKEY|PRIVATE_KEY|"
|
|
47
|
+
r"ACCESS_KEY|AUTH_KEY|ENCRYPTION_KEY|SIGNING_KEY|CLIENT_SECRET|"
|
|
48
|
+
r"CREDENTIAL|DB_PASS|DATABASE_PASSWORD)",
|
|
49
|
+
re.IGNORECASE,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ============================================================================
|
|
54
|
+
# Detected patterns
|
|
55
|
+
# ============================================================================
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class DetectedLiteral:
|
|
60
|
+
"""A single detected literal pattern."""
|
|
61
|
+
|
|
62
|
+
pattern_type: str # "url", "sql_pattern", "connection_string"
|
|
63
|
+
value: str # The matched text (truncated for safety)
|
|
64
|
+
file: str
|
|
65
|
+
line: int
|
|
66
|
+
confidence: str = "HIGH"
|
|
67
|
+
notes: str | None = None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ============================================================================
|
|
71
|
+
# LiteralScanner
|
|
72
|
+
# ============================================================================
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class LiteralScanner:
|
|
76
|
+
"""
|
|
77
|
+
Scans all string literals in parsed files for security-relevant patterns.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, parsed_files: list[ParsedFile]) -> None:
|
|
81
|
+
self._parsed_files = [pf for pf in parsed_files if pf.success]
|
|
82
|
+
|
|
83
|
+
def scan(self) -> list[DetectedLiteral]:
|
|
84
|
+
"""Run the full scan and return detected literals."""
|
|
85
|
+
results: list[DetectedLiteral] = []
|
|
86
|
+
seen: set[tuple[str, str]] = set()
|
|
87
|
+
|
|
88
|
+
for pf in self._parsed_files:
|
|
89
|
+
file_str = str(pf.path)
|
|
90
|
+
|
|
91
|
+
for assign in pf.assignments:
|
|
92
|
+
line = assign.location.line if assign.location else 0
|
|
93
|
+
|
|
94
|
+
# Normalise the candidate value for scanning.
|
|
95
|
+
# Covers both plain literals and string-interpolated/concatenated
|
|
96
|
+
# expressions (Java SQL patterns, f-strings, etc.).
|
|
97
|
+
is_interp = getattr(assign, "is_string_interpolation", False)
|
|
98
|
+
if assign.source_type == "literal" and isinstance(assign.source_value, str):
|
|
99
|
+
scan_val: str | None = assign.source_value or None
|
|
100
|
+
elif is_interp and assign.source_value:
|
|
101
|
+
scan_val = str(assign.source_value)
|
|
102
|
+
else:
|
|
103
|
+
scan_val = None
|
|
104
|
+
|
|
105
|
+
# Hardcoded secret detection: variable name looks sensitive
|
|
106
|
+
# and value is a non-trivial string (literal or concatenation).
|
|
107
|
+
if scan_val and _SECRET_NAME_RE.search(assign.target):
|
|
108
|
+
key = ("secret", file_str, assign.target)
|
|
109
|
+
if key not in seen:
|
|
110
|
+
seen.add(key)
|
|
111
|
+
results.append(
|
|
112
|
+
DetectedLiteral(
|
|
113
|
+
pattern_type="secret",
|
|
114
|
+
value=self._redact_secret(assign.target, scan_val),
|
|
115
|
+
file=file_str,
|
|
116
|
+
line=line,
|
|
117
|
+
notes=f"Hardcoded value in variable '{assign.target}'",
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# URL / SQL / connection-string scanning
|
|
122
|
+
if scan_val:
|
|
123
|
+
self._scan_string(scan_val, file_str, line, results, seen)
|
|
124
|
+
|
|
125
|
+
# Scan class-level field declarations for hardcoded secrets.
|
|
126
|
+
# Java constants (e.g. private static final String JWT_SECRET = "...")
|
|
127
|
+
# are ParsedField objects, not local-variable assignments.
|
|
128
|
+
for cls in pf.classes:
|
|
129
|
+
for field in cls.fields:
|
|
130
|
+
if not field.default_value or not isinstance(field.default_value, str):
|
|
131
|
+
continue
|
|
132
|
+
val = field.default_value
|
|
133
|
+
if not _SECRET_NAME_RE.search(field.name):
|
|
134
|
+
continue
|
|
135
|
+
key = ("secret", file_str, field.name)
|
|
136
|
+
if key not in seen:
|
|
137
|
+
seen.add(key)
|
|
138
|
+
# ParsedField has no line number; use the class location
|
|
139
|
+
# as a best-effort pointer to the enclosing declaration.
|
|
140
|
+
field_line = cls.location.line if cls.location else 0
|
|
141
|
+
results.append(
|
|
142
|
+
DetectedLiteral(
|
|
143
|
+
pattern_type="secret",
|
|
144
|
+
value=self._redact_secret(field.name, val),
|
|
145
|
+
file=file_str,
|
|
146
|
+
line=field_line,
|
|
147
|
+
notes=f"Hardcoded value in field '{field.name}'",
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Scan string literals from call arguments
|
|
152
|
+
for cs in pf.call_sites:
|
|
153
|
+
for arg in cs.arguments:
|
|
154
|
+
if not arg.is_literal:
|
|
155
|
+
continue
|
|
156
|
+
if arg.literal_type != "str":
|
|
157
|
+
continue
|
|
158
|
+
val = arg.literal_value
|
|
159
|
+
if not val or not isinstance(val, str):
|
|
160
|
+
continue
|
|
161
|
+
line = cs.location.line
|
|
162
|
+
self._scan_string(str(val), file_str, line, results, seen)
|
|
163
|
+
|
|
164
|
+
return results
|
|
165
|
+
|
|
166
|
+
def _scan_string(
|
|
167
|
+
self,
|
|
168
|
+
value: str,
|
|
169
|
+
file: str,
|
|
170
|
+
line: int,
|
|
171
|
+
results: list[DetectedLiteral],
|
|
172
|
+
seen: set[tuple[str, str]],
|
|
173
|
+
) -> None:
|
|
174
|
+
"""Scan a single string for all pattern types."""
|
|
175
|
+
# Connection strings (check before generic URLs to get specific type)
|
|
176
|
+
for m in _CONNECTION_STRING_RE.finditer(value):
|
|
177
|
+
matched = m.group()
|
|
178
|
+
key = ("connection_string", matched)
|
|
179
|
+
if key not in seen:
|
|
180
|
+
seen.add(key)
|
|
181
|
+
# Redact credentials: scheme://user:pass@host → scheme://***@host
|
|
182
|
+
sanitized = self._redact_credentials(matched)
|
|
183
|
+
results.append(
|
|
184
|
+
DetectedLiteral(
|
|
185
|
+
pattern_type="connection_string",
|
|
186
|
+
value=sanitized[:200],
|
|
187
|
+
file=file,
|
|
188
|
+
line=line,
|
|
189
|
+
notes=self._classify_connection_scheme(matched),
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Generic URLs
|
|
194
|
+
for m in _URL_RE.finditer(value):
|
|
195
|
+
matched = m.group()
|
|
196
|
+
key = ("url", matched)
|
|
197
|
+
if key not in seen:
|
|
198
|
+
seen.add(key)
|
|
199
|
+
results.append(
|
|
200
|
+
DetectedLiteral(
|
|
201
|
+
pattern_type="url",
|
|
202
|
+
value=matched[:200],
|
|
203
|
+
file=file,
|
|
204
|
+
line=line,
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# SQL fragments
|
|
209
|
+
for m in _SQL_FRAGMENT_RE.finditer(value):
|
|
210
|
+
matched = m.group()
|
|
211
|
+
# Deduplicate by normalized form
|
|
212
|
+
norm = matched.strip().upper()[:80]
|
|
213
|
+
key = ("sql_pattern", norm)
|
|
214
|
+
if key not in seen:
|
|
215
|
+
seen.add(key)
|
|
216
|
+
results.append(
|
|
217
|
+
DetectedLiteral(
|
|
218
|
+
pattern_type="sql_pattern",
|
|
219
|
+
value=matched.strip()[:200],
|
|
220
|
+
file=file,
|
|
221
|
+
line=line,
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
@staticmethod
|
|
226
|
+
def _redact_credentials(url: str) -> str:
|
|
227
|
+
"""Replace user:password in a URL with ***."""
|
|
228
|
+
return re.sub(r"://[^@]+@", "://***@", url)
|
|
229
|
+
|
|
230
|
+
@staticmethod
|
|
231
|
+
def _redact_secret(var_name: str, value: str) -> str:
|
|
232
|
+
"""Return a redacted representation: variable name + masked value."""
|
|
233
|
+
if len(value) <= 4:
|
|
234
|
+
masked = "***"
|
|
235
|
+
else:
|
|
236
|
+
masked = value[:2] + "*" * min(len(value) - 4, 16) + value[-2:]
|
|
237
|
+
return f'{var_name} = "{masked}"'
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _classify_connection_scheme(url: str) -> str | None:
|
|
241
|
+
"""Return a human-readable type for a connection string scheme."""
|
|
242
|
+
lower = url.lower()
|
|
243
|
+
if lower.startswith(("postgres", "postgresql")):
|
|
244
|
+
return "PostgreSQL connection"
|
|
245
|
+
if lower.startswith("mysql"):
|
|
246
|
+
return "MySQL connection"
|
|
247
|
+
if lower.startswith("sqlite"):
|
|
248
|
+
return "SQLite connection"
|
|
249
|
+
if lower.startswith("mongodb"):
|
|
250
|
+
return "MongoDB connection"
|
|
251
|
+
if lower.startswith("redis"):
|
|
252
|
+
return "Redis connection"
|
|
253
|
+
if lower.startswith(("amqp", "amqps")):
|
|
254
|
+
return "RabbitMQ connection"
|
|
255
|
+
if lower.startswith("nats"):
|
|
256
|
+
return "NATS connection"
|
|
257
|
+
if lower.startswith("kafka"):
|
|
258
|
+
return "Kafka connection"
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
# ------------------------------------------------------------------
|
|
262
|
+
# Conversion to manifest models
|
|
263
|
+
# ------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
def to_manifest_models(
|
|
266
|
+
self,
|
|
267
|
+
detections: list[DetectedLiteral] | None = None,
|
|
268
|
+
) -> LiteralsModel: # noqa: F821
|
|
269
|
+
"""Convert detections to manifest LiteralsModel."""
|
|
270
|
+
from ..core.manifest import LiteralPatternModel, LiteralsModel, LocationModel
|
|
271
|
+
|
|
272
|
+
if detections is None:
|
|
273
|
+
detections = self.scan()
|
|
274
|
+
|
|
275
|
+
urls: list[LiteralPatternModel] = []
|
|
276
|
+
sql_patterns: list[LiteralPatternModel] = []
|
|
277
|
+
secret_patterns: list[LiteralPatternModel] = []
|
|
278
|
+
|
|
279
|
+
for det in detections:
|
|
280
|
+
model = LiteralPatternModel(
|
|
281
|
+
type=det.pattern_type,
|
|
282
|
+
value=det.value,
|
|
283
|
+
location=LocationModel(file=det.file, line=det.line),
|
|
284
|
+
confidence=det.confidence,
|
|
285
|
+
notes=det.notes,
|
|
286
|
+
)
|
|
287
|
+
if det.pattern_type == "url":
|
|
288
|
+
urls.append(model)
|
|
289
|
+
elif det.pattern_type == "sql_pattern":
|
|
290
|
+
sql_patterns.append(model)
|
|
291
|
+
elif det.pattern_type == "connection_string":
|
|
292
|
+
urls.append(model)
|
|
293
|
+
elif det.pattern_type == "secret":
|
|
294
|
+
secret_patterns.append(model)
|
|
295
|
+
|
|
296
|
+
return LiteralsModel(
|
|
297
|
+
urls=urls,
|
|
298
|
+
sql_patterns=sql_patterns,
|
|
299
|
+
secret_patterns=secret_patterns,
|
|
300
|
+
)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Path normalization for route parameter syntax across frameworks.
|
|
3
|
+
|
|
4
|
+
Converts framework-specific path param syntax to a canonical form
|
|
5
|
+
for the manifest: {name} or {name:type}.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
# Flask/Django: <int:id>, <uuid:pk>, <path:name>
|
|
13
|
+
FLASK_DJANGO_PARAM = re.compile(r"<(?:(?P<type>\w+):)?(?P<name>\w+)>")
|
|
14
|
+
|
|
15
|
+
# FastAPI/Starlette: {id}, {id:int}
|
|
16
|
+
FASTAPI_PARAM = re.compile(r"\{(\w+)(?::[^}]*)?\}")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def normalize_path(path: str, framework: str = "fastapi") -> str:
|
|
20
|
+
"""
|
|
21
|
+
Normalize path parameter syntax to canonical {name} form.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
path: Route path (e.g. /users/<int:id> or /users/{id})
|
|
25
|
+
framework: Source framework for format detection
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Path with params as {name}
|
|
29
|
+
"""
|
|
30
|
+
if not path:
|
|
31
|
+
return path
|
|
32
|
+
|
|
33
|
+
framework = framework.lower()
|
|
34
|
+
|
|
35
|
+
if framework in ("flask", "django"):
|
|
36
|
+
return FLASK_DJANGO_PARAM.sub(r"{\2}", path)
|
|
37
|
+
|
|
38
|
+
# FastAPI/Starlette already use {name}; optionally strip type
|
|
39
|
+
if framework in ("fastapi", "starlette"):
|
|
40
|
+
return FASTAPI_PARAM.sub(r"{\1}", path)
|
|
41
|
+
|
|
42
|
+
# Generic: try both patterns
|
|
43
|
+
result = FLASK_DJANGO_PARAM.sub(r"{\2}", path)
|
|
44
|
+
result = FASTAPI_PARAM.sub(r"{\1}", result)
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def extract_path_param_names(path: str) -> list[str]:
|
|
49
|
+
"""Extract path parameter names from a route path."""
|
|
50
|
+
names = []
|
|
51
|
+
for match in FLASK_DJANGO_PARAM.finditer(path):
|
|
52
|
+
names.append(match.group("name"))
|
|
53
|
+
for match in FASTAPI_PARAM.finditer(path):
|
|
54
|
+
names.append(match.group(1))
|
|
55
|
+
return names
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Read-site origin detection for request and environment variable access patterns.
|
|
3
|
+
|
|
4
|
+
Identifies data origins when handlers read from the request object
|
|
5
|
+
directly (request.args.get("id"), request.path_params["id"], etc.)
|
|
6
|
+
or from environment variables (os.environ["KEY"], os.getenv("KEY"), etc.)
|
|
7
|
+
rather than via function parameters. Used for Flask, Django, and
|
|
8
|
+
any framework where request data is accessed in the handler body.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
from .request_patterns import get_request_pattern_registry
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from ..parsing.base import ParsedFile, ParsedFunction
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Regex patterns for request access in assignment RHS
|
|
25
|
+
# Captures: (framework_hint, origin_type, param_name_if_any)
|
|
26
|
+
# Pattern format: (regex, origin_type, param_group)
|
|
27
|
+
REQUEST_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
|
|
28
|
+
# request.path_params["id"] or request.path_params['id']
|
|
29
|
+
(re.compile(r"request\.path_params\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_PATH_PARAM", 1),
|
|
30
|
+
# request.path_params.get("id")
|
|
31
|
+
(re.compile(r"request\.path_params\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_PATH_PARAM", 1),
|
|
32
|
+
# request.query_params["page"]
|
|
33
|
+
(re.compile(r"request\.query_params\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_QUERY_PARAM", 1),
|
|
34
|
+
# request.query_params.get("page")
|
|
35
|
+
(re.compile(r"request\.query_params\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_QUERY_PARAM", 1),
|
|
36
|
+
# request.args.get("id") - Flask
|
|
37
|
+
(re.compile(r"request\.args\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_QUERY_PARAM", 1),
|
|
38
|
+
# request.args["id"]
|
|
39
|
+
(re.compile(r"request\.args\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_QUERY_PARAM", 1),
|
|
40
|
+
# request.GET.get("id") - Django
|
|
41
|
+
(re.compile(r"request\.GET\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_QUERY_PARAM", 1),
|
|
42
|
+
# request.POST.get("id") - Django
|
|
43
|
+
(re.compile(r"request\.POST\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_FORM", 1),
|
|
44
|
+
# request.form.get("id") - Flask
|
|
45
|
+
(re.compile(r"request\.form\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_FORM", 1),
|
|
46
|
+
# request.form["id"]
|
|
47
|
+
(re.compile(r"request\.form\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_FORM", 1),
|
|
48
|
+
# request.headers["X-API-Key"] - param group 1 is header name
|
|
49
|
+
(re.compile(r"request\.headers\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_HEADER", 1),
|
|
50
|
+
(re.compile(r"request\.headers\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "HTTP_HEADER", 1),
|
|
51
|
+
# request.cookies.get("session")
|
|
52
|
+
(re.compile(r"request\.cookies\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "HTTP_COOKIE", 1),
|
|
53
|
+
# request.json() - body, no param name
|
|
54
|
+
(re.compile(r"request\.json\s*\(\s*\)"), "HTTP_BODY", None),
|
|
55
|
+
(re.compile(r"request\.get_json\s*\(\s*\)"), "HTTP_BODY", None),
|
|
56
|
+
# request.body - Django/Starlette
|
|
57
|
+
(re.compile(r"request\.body\b"), "HTTP_BODY", None),
|
|
58
|
+
# request.data - Flask/Django DRF
|
|
59
|
+
(re.compile(r"request\.data\b"), "HTTP_BODY", None),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
# Java / Spring Boot read-site patterns (HttpServletRequest, etc.)
|
|
63
|
+
JAVA_REQUEST_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
|
|
64
|
+
# request.getParameter("name") or httpRequest.getParameter("name")
|
|
65
|
+
(re.compile(r"(?:\w+\.)?getParameter\s*\(\s*\"([^\"]+)\""), "HTTP_QUERY_PARAM", 1),
|
|
66
|
+
# request.getPathVariable("id") / UriComponentsBuilder pattern
|
|
67
|
+
(re.compile(r"(?:\w+\.)?getPathVariable\s*\(\s*\"([^\"]+)\""), "HTTP_PATH_PARAM", 1),
|
|
68
|
+
# uriVariables.get("id") (Spring PathVariables map)
|
|
69
|
+
(
|
|
70
|
+
re.compile(r"(?:uriVariables|pathVariables|pathParams)\s*\.get\s*\(\s*\"([^\"]+)\""),
|
|
71
|
+
"HTTP_PATH_PARAM",
|
|
72
|
+
1,
|
|
73
|
+
),
|
|
74
|
+
# request.getHeader("X-API-Key")
|
|
75
|
+
(re.compile(r"(?:\w+\.)?getHeader\s*\(\s*\"([^\"]+)\""), "HTTP_HEADER", 1),
|
|
76
|
+
# request.getCookies() — no param name
|
|
77
|
+
(re.compile(r"(?:\w+\.)?getCookies\s*\("), "HTTP_COOKIE", None),
|
|
78
|
+
# request.getInputStream() / getReader() — body
|
|
79
|
+
(re.compile(r"(?:\w+\.)?(?:getInputStream|getReader)\s*\("), "HTTP_BODY", None),
|
|
80
|
+
# httpExchange.getRequestBody() (Java HttpServer)
|
|
81
|
+
(re.compile(r"(?:\w+\.)?getRequestBody\s*\("), "HTTP_BODY", None),
|
|
82
|
+
# System.getenv("KEY")
|
|
83
|
+
(re.compile(r"System\.getenv\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
|
|
84
|
+
# System.getProperty("key")
|
|
85
|
+
(re.compile(r"System\.getProperty\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
# .NET / ASP.NET Core read-site patterns
|
|
89
|
+
DOTNET_REQUEST_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
|
|
90
|
+
# HttpContext.Request.Query["key"] or Request.Query["key"]
|
|
91
|
+
(re.compile(r"(?:\w+\.)?Request\.Query\s*\[\s*\"([^\"]+)\""), "HTTP_QUERY_PARAM", 1),
|
|
92
|
+
# Request.Query.TryGetValue("key", ...) or Request.Query.ContainsKey("key")
|
|
93
|
+
(
|
|
94
|
+
re.compile(r"(?:\w+\.)?Request\.Query(?:\.TryGetValue|\.ContainsKey)\s*\(\s*\"([^\"]+)\""),
|
|
95
|
+
"HTTP_QUERY_PARAM",
|
|
96
|
+
1,
|
|
97
|
+
),
|
|
98
|
+
# Request.RouteValues["key"]
|
|
99
|
+
(re.compile(r"(?:\w+\.)?Request\.RouteValues\s*\[\s*\"([^\"]+)\""), "HTTP_PATH_PARAM", 1),
|
|
100
|
+
# routeValues["key"] (from RouteData.Values)
|
|
101
|
+
(
|
|
102
|
+
re.compile(r"(?:routeValues|routeData\.Values|RouteData\.Values)\s*\[\s*\"([^\"]+)\""),
|
|
103
|
+
"HTTP_PATH_PARAM",
|
|
104
|
+
1,
|
|
105
|
+
),
|
|
106
|
+
# Request.Headers["X-API-Key"]
|
|
107
|
+
(re.compile(r"(?:\w+\.)?Request\.Headers\s*\[\s*\"([^\"]+)\""), "HTTP_HEADER", 1),
|
|
108
|
+
(
|
|
109
|
+
re.compile(r"(?:\w+\.)?Request\.Headers\s*\.\s*TryGetValue\s*\(\s*\"([^\"]+)\""),
|
|
110
|
+
"HTTP_HEADER",
|
|
111
|
+
1,
|
|
112
|
+
),
|
|
113
|
+
# Request.Cookies["session"]
|
|
114
|
+
(re.compile(r"(?:\w+\.)?Request\.Cookies\s*\[\s*\"([^\"]+)\""), "HTTP_COOKIE", 1),
|
|
115
|
+
# Request.Form["field"]
|
|
116
|
+
(re.compile(r"(?:\w+\.)?Request\.Form\s*\[\s*\"([^\"]+)\""), "HTTP_FORM", 1),
|
|
117
|
+
# Request.Body — no param name
|
|
118
|
+
(re.compile(r"(?:\w+\.)?Request\.Body\b"), "HTTP_BODY", None),
|
|
119
|
+
# await Request.ReadFromJsonAsync<T>() or ReadAsStringAsync()
|
|
120
|
+
(
|
|
121
|
+
re.compile(
|
|
122
|
+
r"(?:\w+\.)?Request\.(?:ReadFromJsonAsync|ReadAsStringAsync|GetFromJsonAsync)\b"
|
|
123
|
+
),
|
|
124
|
+
"HTTP_BODY",
|
|
125
|
+
None,
|
|
126
|
+
),
|
|
127
|
+
# Environment.GetEnvironmentVariable("KEY")
|
|
128
|
+
(re.compile(r"Environment\.GetEnvironmentVariable\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
|
|
129
|
+
# config["key"] or configuration["key"] (IConfiguration)
|
|
130
|
+
(re.compile(r"(?:_?config(?:uration)?)\s*\[\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
# Regex patterns for environment variable access in assignment RHS
|
|
134
|
+
ENV_ACCESS_PATTERNS: list[tuple[re.Pattern, str, int | None]] = [
|
|
135
|
+
# os.environ["KEY"] or os.environ['KEY']
|
|
136
|
+
(re.compile(r"(?:os\.)?environ\s*\[\s*['\"]([^'\"]+)['\"]\s*\]"), "ENVIRONMENT_VAR", 1),
|
|
137
|
+
# os.environ.get("KEY") or os.environ.get("KEY", default)
|
|
138
|
+
(re.compile(r"(?:os\.)?environ\.get\s*\(\s*['\"]([^'\"]+)['\"]"), "ENVIRONMENT_VAR", 1),
|
|
139
|
+
# os.getenv("KEY") or os.getenv("KEY", default)
|
|
140
|
+
(re.compile(r"(?:os\.)?getenv\s*\(\s*['\"]([^'\"]+)['\"]"), "ENVIRONMENT_VAR", 1),
|
|
141
|
+
# Java: System.getenv("KEY")
|
|
142
|
+
(re.compile(r"System\.getenv\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
|
|
143
|
+
# Java: System.getProperty("key")
|
|
144
|
+
(re.compile(r"System\.getProperty\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
|
|
145
|
+
# .NET: Environment.GetEnvironmentVariable("KEY")
|
|
146
|
+
(re.compile(r"Environment\.GetEnvironmentVariable\s*\(\s*\"([^\"]+)\""), "ENVIRONMENT_VAR", 1),
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class ReadSiteOrigin:
|
|
152
|
+
"""An origin detected at a read site (assignment from request)."""
|
|
153
|
+
|
|
154
|
+
origin_type: str # OriginType name, e.g. "HTTP_PATH_PARAM"
|
|
155
|
+
param_name: str | None
|
|
156
|
+
target_variable: str
|
|
157
|
+
line: int
|
|
158
|
+
file_path: Path
|
|
159
|
+
framework: str = ""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class ReadSiteDetector:
|
|
163
|
+
"""
|
|
164
|
+
Detects data origins from request access in assignment RHS.
|
|
165
|
+
|
|
166
|
+
Scans handler bodies for assignments like:
|
|
167
|
+
user_id = request.path_params["user_id"]
|
|
168
|
+
page = request.args.get("page", 1)
|
|
169
|
+
and registers them as taint origins.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def __init__(self, framework: str = "fastapi"):
|
|
173
|
+
self._framework = framework.lower()
|
|
174
|
+
self._registry = get_request_pattern_registry()
|
|
175
|
+
|
|
176
|
+
def detect_origins(
|
|
177
|
+
self,
|
|
178
|
+
func: ParsedFunction,
|
|
179
|
+
parsed_file: ParsedFile,
|
|
180
|
+
) -> list[ReadSiteOrigin]:
|
|
181
|
+
"""
|
|
182
|
+
Detect read-site origins from assignments in a function.
|
|
183
|
+
|
|
184
|
+
Returns list of origins found (assignments from request.xxx).
|
|
185
|
+
"""
|
|
186
|
+
origins: list[ReadSiteOrigin] = []
|
|
187
|
+
func_qname = (
|
|
188
|
+
func.qualified_name.full
|
|
189
|
+
if hasattr(func.qualified_name, "full")
|
|
190
|
+
else str(func.qualified_name)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Get assignments in this function
|
|
194
|
+
if not hasattr(parsed_file, "assignments"):
|
|
195
|
+
return origins
|
|
196
|
+
|
|
197
|
+
for assign in parsed_file.assignments:
|
|
198
|
+
assign_func_raw = assign.in_function
|
|
199
|
+
assign_func = (
|
|
200
|
+
assign_func_raw.full
|
|
201
|
+
if hasattr(assign_func_raw, "full")
|
|
202
|
+
else str(assign_func_raw or "")
|
|
203
|
+
)
|
|
204
|
+
if not assign_func:
|
|
205
|
+
continue
|
|
206
|
+
# Match: exact, or func_qname ends with .assign_func
|
|
207
|
+
if assign_func != func_qname and assign_func != func.name:
|
|
208
|
+
if not (
|
|
209
|
+
func_qname
|
|
210
|
+
and (func_qname == assign_func or func_qname.endswith(f".{assign_func}"))
|
|
211
|
+
):
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
if assign.source_type not in ("call", "expression", "variable"):
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
source = assign.source_value or assign.value_source or ""
|
|
218
|
+
if not source:
|
|
219
|
+
continue
|
|
220
|
+
source_lower = source.lower()
|
|
221
|
+
has_request = "request" in source_lower
|
|
222
|
+
has_java_request = any(
|
|
223
|
+
kw in source_lower
|
|
224
|
+
for kw in (
|
|
225
|
+
"getparameter",
|
|
226
|
+
"getheader",
|
|
227
|
+
"getpathvariable",
|
|
228
|
+
"getinputstream",
|
|
229
|
+
"getreader",
|
|
230
|
+
"getrequestbody",
|
|
231
|
+
"urivariables",
|
|
232
|
+
"pathvariables",
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
has_dotnet_request = any(
|
|
236
|
+
kw in source_lower
|
|
237
|
+
for kw in (
|
|
238
|
+
"request.query",
|
|
239
|
+
"request.routevalues",
|
|
240
|
+
"request.headers",
|
|
241
|
+
"request.cookies",
|
|
242
|
+
"request.form",
|
|
243
|
+
"request.body",
|
|
244
|
+
"readfromjsonasync",
|
|
245
|
+
"readasstringasync",
|
|
246
|
+
)
|
|
247
|
+
)
|
|
248
|
+
has_env = (
|
|
249
|
+
"environ" in source_lower
|
|
250
|
+
or "getenv" in source_lower
|
|
251
|
+
or "getproperty" in source_lower
|
|
252
|
+
or "getenvironmentvariable" in source_lower
|
|
253
|
+
)
|
|
254
|
+
if not has_request and not has_java_request and not has_dotnet_request and not has_env:
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
matched = False
|
|
258
|
+
|
|
259
|
+
# Match against request access patterns (.NET first when .NET keywords present)
|
|
260
|
+
if has_request or has_java_request or has_dotnet_request:
|
|
261
|
+
dotnet_patterns = DOTNET_REQUEST_ACCESS_PATTERNS if has_dotnet_request else []
|
|
262
|
+
java_patterns = JAVA_REQUEST_ACCESS_PATTERNS if has_java_request else []
|
|
263
|
+
for pattern, origin_type_name, param_group in (
|
|
264
|
+
dotnet_patterns + java_patterns + REQUEST_ACCESS_PATTERNS
|
|
265
|
+
):
|
|
266
|
+
match = pattern.search(source)
|
|
267
|
+
if match:
|
|
268
|
+
param_name = None
|
|
269
|
+
if param_group is not None and param_group <= len(match.groups()):
|
|
270
|
+
param_name = match.group(param_group)
|
|
271
|
+
|
|
272
|
+
origins.append(
|
|
273
|
+
ReadSiteOrigin(
|
|
274
|
+
origin_type=origin_type_name,
|
|
275
|
+
param_name=param_name or assign.target,
|
|
276
|
+
target_variable=assign.target,
|
|
277
|
+
line=assign.location.line,
|
|
278
|
+
file_path=Path(str(assign.location.file))
|
|
279
|
+
if assign.location
|
|
280
|
+
else parsed_file.path,
|
|
281
|
+
framework=self._framework,
|
|
282
|
+
)
|
|
283
|
+
)
|
|
284
|
+
matched = True
|
|
285
|
+
break
|
|
286
|
+
|
|
287
|
+
# Match against environment variable access patterns (Python + Java)
|
|
288
|
+
if not matched and has_env:
|
|
289
|
+
for pattern, origin_type_name, param_group in ENV_ACCESS_PATTERNS:
|
|
290
|
+
match = pattern.search(source)
|
|
291
|
+
if match:
|
|
292
|
+
param_name = None
|
|
293
|
+
if param_group is not None and param_group <= len(match.groups()):
|
|
294
|
+
param_name = match.group(param_group)
|
|
295
|
+
|
|
296
|
+
origins.append(
|
|
297
|
+
ReadSiteOrigin(
|
|
298
|
+
origin_type=origin_type_name,
|
|
299
|
+
param_name=param_name or assign.target,
|
|
300
|
+
target_variable=assign.target,
|
|
301
|
+
line=assign.location.line,
|
|
302
|
+
file_path=Path(str(assign.location.file))
|
|
303
|
+
if assign.location
|
|
304
|
+
else parsed_file.path,
|
|
305
|
+
framework=self._framework,
|
|
306
|
+
)
|
|
307
|
+
)
|
|
308
|
+
break
|
|
309
|
+
|
|
310
|
+
return origins
|