agent-security-scanner-mcp 3.17.2 → 3.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/analyzer.py +26 -2
- package/cross_file_analyzer.py +478 -5
- package/package.json +3 -2
- package/python_taint_fallback.py +688 -0
- package/rules/__init__.py +42 -3
- package/rules/prompt-injection.security.yaml +4 -4
- package/rules/semantic-security.yaml +679 -0
- package/src/fix-patterns.js +9 -9
- package/src/history.js +1 -1
- package/src/tools/check-package.js +15 -0
- package/src/tools/scan-prompt.js +44 -31
- package/src/tools/scan-security.js +33 -4
- package/src/tools/scan-skill.js +54 -22
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
"""Lightweight Python taint analysis without tree-sitter.
|
|
2
|
+
|
|
3
|
+
Uses the stdlib ``ast`` module so Python taint findings are still available
|
|
4
|
+
when the tree-sitter engine is not installed. The implementation is purposely
|
|
5
|
+
conservative and targets the high-signal flows exercised by the test suite:
|
|
6
|
+
|
|
7
|
+
- Flask/Django-style request sources
|
|
8
|
+
- input()-derived taint
|
|
9
|
+
- intra-procedural propagation through assignments and expressions
|
|
10
|
+
- inter-procedural propagation through simple function summaries
|
|
11
|
+
- internal sinks reached inside callees
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import ast
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
SUMMARY_SOURCE = -1
|
|
22
|
+
MAX_INTERPROCEDURAL_FUNCTIONS = 500
|
|
23
|
+
|
|
24
|
+
SOURCE_CALLS = {
|
|
25
|
+
"request.args.get",
|
|
26
|
+
"request.form.get",
|
|
27
|
+
"request.values.get",
|
|
28
|
+
"request.cookies.get",
|
|
29
|
+
"request.headers.get",
|
|
30
|
+
"request.view_args.get",
|
|
31
|
+
"request.json.get",
|
|
32
|
+
"flask.request.args.get",
|
|
33
|
+
"flask.request.form.get",
|
|
34
|
+
"flask.request.values.get",
|
|
35
|
+
"flask.request.cookies.get",
|
|
36
|
+
"flask.request.headers.get",
|
|
37
|
+
"flask.request.view_args.get",
|
|
38
|
+
"flask.request.json.get",
|
|
39
|
+
"input",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
SOURCE_ATTRIBUTES = {
|
|
43
|
+
"request.args",
|
|
44
|
+
"request.form",
|
|
45
|
+
"request.values",
|
|
46
|
+
"request.cookies",
|
|
47
|
+
"request.headers",
|
|
48
|
+
"request.view_args",
|
|
49
|
+
"request.json",
|
|
50
|
+
"flask.request.args",
|
|
51
|
+
"flask.request.form",
|
|
52
|
+
"flask.request.values",
|
|
53
|
+
"flask.request.cookies",
|
|
54
|
+
"flask.request.headers",
|
|
55
|
+
"flask.request.view_args",
|
|
56
|
+
"flask.request.json",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
SANITIZER_CALLS = {
|
|
60
|
+
"shlex.quote",
|
|
61
|
+
"quote",
|
|
62
|
+
"html.escape",
|
|
63
|
+
"markupsafe.escape",
|
|
64
|
+
"urllib.parse.quote",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class TaintInfo:
|
|
70
|
+
source_pattern: str
|
|
71
|
+
source_line: int
|
|
72
|
+
propagation_path: List[str] = field(default_factory=list)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class InternalSink:
|
|
77
|
+
rule_id: str
|
|
78
|
+
message: str
|
|
79
|
+
param_indices: Set[int]
|
|
80
|
+
line: int
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class FunctionSummary:
|
|
85
|
+
name: str
|
|
86
|
+
params: List[str]
|
|
87
|
+
returns_taint_from: Set[int] = field(default_factory=set)
|
|
88
|
+
returns_source: bool = False
|
|
89
|
+
source_pattern: Optional[str] = None
|
|
90
|
+
internal_sinks: List[InternalSink] = field(default_factory=list)
|
|
91
|
+
has_sanitizer: bool = False
|
|
92
|
+
line: int = 0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _get_qualified_name(node: ast.AST) -> Optional[str]:
|
|
96
|
+
if isinstance(node, ast.Name):
|
|
97
|
+
return node.id
|
|
98
|
+
if isinstance(node, ast.Attribute):
|
|
99
|
+
parent = _get_qualified_name(node.value)
|
|
100
|
+
return f"{parent}.{node.attr}" if parent else node.attr
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _source_pattern(node: ast.AST) -> Optional[str]:
|
|
105
|
+
if isinstance(node, ast.Call):
|
|
106
|
+
qname = _get_qualified_name(node.func)
|
|
107
|
+
if qname in SOURCE_CALLS:
|
|
108
|
+
return qname
|
|
109
|
+
if isinstance(node, ast.Subscript):
|
|
110
|
+
qname = _get_qualified_name(node.value)
|
|
111
|
+
if qname in SOURCE_ATTRIBUTES:
|
|
112
|
+
return qname
|
|
113
|
+
if isinstance(node, ast.Attribute):
|
|
114
|
+
qname = _get_qualified_name(node)
|
|
115
|
+
if qname in SOURCE_ATTRIBUTES:
|
|
116
|
+
return qname
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _is_sanitizer_call(node: ast.AST) -> bool:
|
|
121
|
+
return isinstance(node, ast.Call) and _get_qualified_name(node.func) in SANITIZER_CALLS
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _extract_target_names(target: ast.AST) -> List[str]:
|
|
125
|
+
if isinstance(target, ast.Name):
|
|
126
|
+
return [target.id]
|
|
127
|
+
if isinstance(target, (ast.Tuple, ast.List)):
|
|
128
|
+
names: List[str] = []
|
|
129
|
+
for elt in target.elts:
|
|
130
|
+
names.extend(_extract_target_names(elt))
|
|
131
|
+
return names
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _merge_dependency_maps(*maps: Dict[str, Set[int]]) -> Dict[str, Set[int]]:
|
|
136
|
+
merged: Dict[str, Set[int]] = {}
|
|
137
|
+
for mapping in maps:
|
|
138
|
+
for key, value in mapping.items():
|
|
139
|
+
merged[key] = merged.get(key, set()) | set(value)
|
|
140
|
+
return merged
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _merge_taint_envs(*envs: Dict[str, TaintInfo]) -> Dict[str, TaintInfo]:
|
|
144
|
+
merged: Dict[str, TaintInfo] = {}
|
|
145
|
+
for env in envs:
|
|
146
|
+
for key, value in env.items():
|
|
147
|
+
if key not in merged:
|
|
148
|
+
merged[key] = value
|
|
149
|
+
return merged
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _node_column(node: ast.AST) -> int:
|
|
153
|
+
return getattr(node, "col_offset", 0)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _find_first_taint(node: ast.AST, env: Dict[str, TaintInfo]) -> Optional[Tuple[str, TaintInfo]]:
|
|
157
|
+
if isinstance(node, ast.Name) and node.id in env:
|
|
158
|
+
return node.id, env[node.id]
|
|
159
|
+
if isinstance(node, ast.Attribute):
|
|
160
|
+
return _find_first_taint(node.value, env)
|
|
161
|
+
if isinstance(node, ast.Subscript):
|
|
162
|
+
found = _find_first_taint(node.value, env)
|
|
163
|
+
if found:
|
|
164
|
+
return found
|
|
165
|
+
return _find_first_taint(node.slice, env)
|
|
166
|
+
for child in ast.iter_child_nodes(node):
|
|
167
|
+
found = _find_first_taint(child, env)
|
|
168
|
+
if found:
|
|
169
|
+
return found
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _match_sink(node: ast.Call) -> Optional[Tuple[str, str, List[ast.AST]]]:
|
|
174
|
+
qname = _get_qualified_name(node.func)
|
|
175
|
+
if not qname:
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
if qname.endswith(".execute") and node.args:
|
|
179
|
+
return (
|
|
180
|
+
"sql-injection",
|
|
181
|
+
"User-controlled data flows to SQL execution.",
|
|
182
|
+
[node.args[0]],
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if qname == "os.system" and node.args:
|
|
186
|
+
return (
|
|
187
|
+
"command-injection",
|
|
188
|
+
"User-controlled data flows to os.system().",
|
|
189
|
+
[node.args[0]],
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if qname.startswith("subprocess.") and node.args:
|
|
193
|
+
shell_true = any(
|
|
194
|
+
kw.arg == "shell" and isinstance(kw.value, ast.Constant) and kw.value.value is True
|
|
195
|
+
for kw in node.keywords
|
|
196
|
+
)
|
|
197
|
+
if shell_true:
|
|
198
|
+
return (
|
|
199
|
+
"command-injection",
|
|
200
|
+
"User-controlled data flows to subprocess with shell=True.",
|
|
201
|
+
[node.args[0]],
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
if qname == "open" and node.args:
|
|
205
|
+
return (
|
|
206
|
+
"path-traversal",
|
|
207
|
+
"User-controlled data flows to file open().",
|
|
208
|
+
[node.args[0]],
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if qname.endswith("render_template_string") and node.args:
|
|
212
|
+
return (
|
|
213
|
+
"xss",
|
|
214
|
+
"User-controlled data flows to render_template_string().",
|
|
215
|
+
[node.args[0]],
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if qname in {"eval", "exec"} and node.args:
|
|
219
|
+
return (
|
|
220
|
+
"code-injection",
|
|
221
|
+
"User-controlled data flows to dynamic code execution.",
|
|
222
|
+
[node.args[0]],
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class _SummaryBuilder:
|
|
229
|
+
def __init__(self, tree: ast.AST):
|
|
230
|
+
self.functions: Dict[str, ast.AST] = {}
|
|
231
|
+
for node in getattr(tree, "body", []):
|
|
232
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
233
|
+
self.functions[node.name] = node
|
|
234
|
+
|
|
235
|
+
def build(self) -> Dict[str, FunctionSummary]:
|
|
236
|
+
if len(self.functions) > MAX_INTERPROCEDURAL_FUNCTIONS:
|
|
237
|
+
return {}
|
|
238
|
+
|
|
239
|
+
summaries = {
|
|
240
|
+
name: FunctionSummary(
|
|
241
|
+
name=name,
|
|
242
|
+
params=self._params(func),
|
|
243
|
+
line=getattr(func, "lineno", 0),
|
|
244
|
+
)
|
|
245
|
+
for name, func in self.functions.items()
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
for _ in range(8):
|
|
249
|
+
changed = False
|
|
250
|
+
for name, func in self.functions.items():
|
|
251
|
+
updated = self._compute_summary(func, summaries)
|
|
252
|
+
if updated != summaries[name]:
|
|
253
|
+
summaries[name] = updated
|
|
254
|
+
changed = True
|
|
255
|
+
if not changed:
|
|
256
|
+
break
|
|
257
|
+
|
|
258
|
+
return summaries
|
|
259
|
+
|
|
260
|
+
@staticmethod
|
|
261
|
+
def _params(func: ast.AST) -> List[str]:
|
|
262
|
+
params: List[str] = []
|
|
263
|
+
for arg in getattr(func.args, "args", []):
|
|
264
|
+
if arg.arg not in {"self", "cls"}:
|
|
265
|
+
params.append(arg.arg)
|
|
266
|
+
return params
|
|
267
|
+
|
|
268
|
+
def _compute_summary(
|
|
269
|
+
self,
|
|
270
|
+
func: ast.AST,
|
|
271
|
+
summaries: Dict[str, FunctionSummary],
|
|
272
|
+
) -> FunctionSummary:
|
|
273
|
+
params = self._params(func)
|
|
274
|
+
env: Dict[str, Set[int]] = {name: {idx} for idx, name in enumerate(params)}
|
|
275
|
+
returns_taint_from: Set[int] = set()
|
|
276
|
+
returns_source = False
|
|
277
|
+
source_pattern: Optional[str] = None
|
|
278
|
+
internal_sinks: List[InternalSink] = []
|
|
279
|
+
has_sanitizer = False
|
|
280
|
+
|
|
281
|
+
def expr_deps(node: Optional[ast.AST], local_env: Dict[str, Set[int]]) -> Set[int]:
|
|
282
|
+
nonlocal has_sanitizer
|
|
283
|
+
if node is None:
|
|
284
|
+
return set()
|
|
285
|
+
|
|
286
|
+
pattern = _source_pattern(node)
|
|
287
|
+
if pattern:
|
|
288
|
+
return {SUMMARY_SOURCE}
|
|
289
|
+
|
|
290
|
+
if isinstance(node, ast.Name):
|
|
291
|
+
return set(local_env.get(node.id, set()))
|
|
292
|
+
|
|
293
|
+
if isinstance(node, ast.Attribute):
|
|
294
|
+
return expr_deps(node.value, local_env)
|
|
295
|
+
|
|
296
|
+
if isinstance(node, ast.Subscript):
|
|
297
|
+
return expr_deps(node.value, local_env) | expr_deps(node.slice, local_env)
|
|
298
|
+
|
|
299
|
+
if isinstance(node, ast.Call):
|
|
300
|
+
if _is_sanitizer_call(node):
|
|
301
|
+
has_sanitizer = True
|
|
302
|
+
return set()
|
|
303
|
+
|
|
304
|
+
qname = _get_qualified_name(node.func)
|
|
305
|
+
if qname and qname in summaries:
|
|
306
|
+
summary = summaries[qname]
|
|
307
|
+
deps: Set[int] = set()
|
|
308
|
+
if summary.returns_source:
|
|
309
|
+
deps.add(SUMMARY_SOURCE)
|
|
310
|
+
for idx in summary.returns_taint_from:
|
|
311
|
+
if idx < len(node.args):
|
|
312
|
+
deps |= expr_deps(node.args[idx], local_env)
|
|
313
|
+
if summary.has_sanitizer and not deps:
|
|
314
|
+
has_sanitizer = True
|
|
315
|
+
return deps
|
|
316
|
+
|
|
317
|
+
deps = expr_deps(getattr(node.func, "value", None), local_env)
|
|
318
|
+
for arg in node.args:
|
|
319
|
+
deps |= expr_deps(arg, local_env)
|
|
320
|
+
for kw in node.keywords:
|
|
321
|
+
deps |= expr_deps(kw.value, local_env)
|
|
322
|
+
return deps
|
|
323
|
+
|
|
324
|
+
deps: Set[int] = set()
|
|
325
|
+
for child in ast.iter_child_nodes(node):
|
|
326
|
+
deps |= expr_deps(child, local_env)
|
|
327
|
+
return deps
|
|
328
|
+
|
|
329
|
+
def process_statements(
|
|
330
|
+
statements: Iterable[ast.stmt],
|
|
331
|
+
local_env: Dict[str, Set[int]],
|
|
332
|
+
) -> Dict[str, Set[int]]:
|
|
333
|
+
nonlocal returns_source, source_pattern, internal_sinks, returns_taint_from
|
|
334
|
+
|
|
335
|
+
for stmt in statements:
|
|
336
|
+
if isinstance(stmt, ast.Assign):
|
|
337
|
+
deps = expr_deps(stmt.value, local_env)
|
|
338
|
+
for target in stmt.targets:
|
|
339
|
+
for name in _extract_target_names(target):
|
|
340
|
+
local_env[name] = set(deps)
|
|
341
|
+
|
|
342
|
+
elif isinstance(stmt, ast.AnnAssign):
|
|
343
|
+
deps = expr_deps(stmt.value, local_env)
|
|
344
|
+
for name in _extract_target_names(stmt.target):
|
|
345
|
+
local_env[name] = set(deps)
|
|
346
|
+
|
|
347
|
+
elif isinstance(stmt, ast.AugAssign):
|
|
348
|
+
deps = expr_deps(stmt.value, local_env) | expr_deps(stmt.target, local_env)
|
|
349
|
+
for name in _extract_target_names(stmt.target):
|
|
350
|
+
local_env[name] = set(deps)
|
|
351
|
+
|
|
352
|
+
elif isinstance(stmt, ast.Return):
|
|
353
|
+
deps = expr_deps(stmt.value, local_env)
|
|
354
|
+
if SUMMARY_SOURCE in deps:
|
|
355
|
+
returns_source = True
|
|
356
|
+
source_pattern = source_pattern or _source_pattern(stmt.value) or "request.args.get"
|
|
357
|
+
returns_taint_from |= {d for d in deps if d != SUMMARY_SOURCE}
|
|
358
|
+
|
|
359
|
+
elif isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call):
|
|
360
|
+
sink = _match_sink(stmt.value)
|
|
361
|
+
if sink:
|
|
362
|
+
rule_id, message, relevant_args = sink
|
|
363
|
+
param_indices: Set[int] = set()
|
|
364
|
+
for arg in relevant_args:
|
|
365
|
+
deps = expr_deps(arg, local_env)
|
|
366
|
+
param_indices |= {d for d in deps if d != SUMMARY_SOURCE}
|
|
367
|
+
if param_indices:
|
|
368
|
+
internal_sinks.append(
|
|
369
|
+
InternalSink(
|
|
370
|
+
rule_id=rule_id,
|
|
371
|
+
message=message,
|
|
372
|
+
param_indices=param_indices,
|
|
373
|
+
line=getattr(stmt.value, "lineno", getattr(stmt, "lineno", 1)),
|
|
374
|
+
)
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
qname = _get_qualified_name(stmt.value.func)
|
|
378
|
+
if qname and qname in summaries:
|
|
379
|
+
summary = summaries[qname]
|
|
380
|
+
for isink in summary.internal_sinks:
|
|
381
|
+
param_indices: Set[int] = set()
|
|
382
|
+
for idx in isink.param_indices:
|
|
383
|
+
if idx < len(stmt.value.args):
|
|
384
|
+
deps = expr_deps(stmt.value.args[idx], local_env)
|
|
385
|
+
param_indices |= {d for d in deps if d != SUMMARY_SOURCE}
|
|
386
|
+
if param_indices:
|
|
387
|
+
internal_sinks.append(
|
|
388
|
+
InternalSink(
|
|
389
|
+
rule_id=isink.rule_id,
|
|
390
|
+
message=isink.message,
|
|
391
|
+
param_indices=param_indices,
|
|
392
|
+
line=getattr(stmt.value, "lineno", getattr(stmt, "lineno", 1)),
|
|
393
|
+
)
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
elif isinstance(stmt, ast.If):
|
|
397
|
+
before = dict(local_env)
|
|
398
|
+
body_env = process_statements(stmt.body, dict(local_env))
|
|
399
|
+
else_env = process_statements(stmt.orelse, dict(local_env))
|
|
400
|
+
local_env = _merge_dependency_maps(before, body_env, else_env)
|
|
401
|
+
|
|
402
|
+
elif isinstance(stmt, (ast.For, ast.AsyncFor, ast.While, ast.With, ast.AsyncWith)):
|
|
403
|
+
body_env = process_statements(stmt.body, dict(local_env))
|
|
404
|
+
else_env = process_statements(getattr(stmt, "orelse", []), dict(local_env))
|
|
405
|
+
local_env = _merge_dependency_maps(local_env, body_env, else_env)
|
|
406
|
+
|
|
407
|
+
elif isinstance(stmt, ast.Try):
|
|
408
|
+
branch_envs = [dict(local_env)]
|
|
409
|
+
branch_envs.append(process_statements(stmt.body, dict(local_env)))
|
|
410
|
+
for handler in stmt.handlers:
|
|
411
|
+
branch_envs.append(process_statements(handler.body, dict(local_env)))
|
|
412
|
+
branch_envs.append(process_statements(stmt.orelse, dict(local_env)))
|
|
413
|
+
branch_envs.append(process_statements(stmt.finalbody, dict(local_env)))
|
|
414
|
+
local_env = _merge_dependency_maps(*branch_envs)
|
|
415
|
+
|
|
416
|
+
return local_env
|
|
417
|
+
|
|
418
|
+
process_statements(getattr(func, "body", []), env)
|
|
419
|
+
return FunctionSummary(
|
|
420
|
+
name=func.name,
|
|
421
|
+
params=params,
|
|
422
|
+
returns_taint_from=returns_taint_from,
|
|
423
|
+
returns_source=returns_source,
|
|
424
|
+
source_pattern=source_pattern,
|
|
425
|
+
internal_sinks=internal_sinks,
|
|
426
|
+
has_sanitizer=has_sanitizer,
|
|
427
|
+
line=getattr(func, "lineno", 0),
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
class _PythonTaintAnalyzer:
|
|
432
|
+
def __init__(self, source: str, file_path: str):
|
|
433
|
+
self.source = source
|
|
434
|
+
self.file_path = file_path
|
|
435
|
+
self.tree = ast.parse(source, filename=file_path)
|
|
436
|
+
self.summaries = _SummaryBuilder(self.tree).build()
|
|
437
|
+
|
|
438
|
+
def analyze(self) -> List[dict]:
|
|
439
|
+
findings: List[dict] = []
|
|
440
|
+
findings.extend(self._analyze_statements(getattr(self.tree, "body", []), {}))
|
|
441
|
+
|
|
442
|
+
for node in getattr(self.tree, "body", []):
|
|
443
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
444
|
+
findings.extend(self._analyze_statements(node.body, {}))
|
|
445
|
+
|
|
446
|
+
unique: List[dict] = []
|
|
447
|
+
seen = set()
|
|
448
|
+
for finding in findings:
|
|
449
|
+
key = (finding["ruleId"], finding["line"], finding["column"], finding["metadata"].get("tainted_variable"))
|
|
450
|
+
if key not in seen:
|
|
451
|
+
seen.add(key)
|
|
452
|
+
unique.append(finding)
|
|
453
|
+
return unique
|
|
454
|
+
|
|
455
|
+
def _analyze_statements(
|
|
456
|
+
self,
|
|
457
|
+
statements: Iterable[ast.stmt],
|
|
458
|
+
env: Dict[str, TaintInfo],
|
|
459
|
+
) -> List[dict]:
|
|
460
|
+
findings: List[dict] = []
|
|
461
|
+
local_env = dict(env)
|
|
462
|
+
|
|
463
|
+
for stmt in statements:
|
|
464
|
+
if isinstance(stmt, ast.Assign):
|
|
465
|
+
taint = self._expr_taint(stmt.value, local_env)
|
|
466
|
+
for target in stmt.targets:
|
|
467
|
+
for name in _extract_target_names(target):
|
|
468
|
+
if taint:
|
|
469
|
+
local_env[name] = TaintInfo(
|
|
470
|
+
source_pattern=taint.source_pattern,
|
|
471
|
+
source_line=taint.source_line,
|
|
472
|
+
propagation_path=taint.propagation_path
|
|
473
|
+
+ [f"Line {getattr(stmt, 'lineno', 1)}: {name} = ..."],
|
|
474
|
+
)
|
|
475
|
+
elif name in local_env:
|
|
476
|
+
del local_env[name]
|
|
477
|
+
findings.extend(self._statement_findings(stmt, local_env))
|
|
478
|
+
|
|
479
|
+
elif isinstance(stmt, ast.AnnAssign):
|
|
480
|
+
taint = self._expr_taint(stmt.value, local_env)
|
|
481
|
+
for name in _extract_target_names(stmt.target):
|
|
482
|
+
if taint:
|
|
483
|
+
local_env[name] = TaintInfo(
|
|
484
|
+
source_pattern=taint.source_pattern,
|
|
485
|
+
source_line=taint.source_line,
|
|
486
|
+
propagation_path=taint.propagation_path
|
|
487
|
+
+ [f"Line {getattr(stmt, 'lineno', 1)}: {name} = ..."],
|
|
488
|
+
)
|
|
489
|
+
elif name in local_env:
|
|
490
|
+
del local_env[name]
|
|
491
|
+
findings.extend(self._statement_findings(stmt, local_env))
|
|
492
|
+
|
|
493
|
+
elif isinstance(stmt, ast.AugAssign):
|
|
494
|
+
taint = self._expr_taint(stmt.value, local_env) or self._expr_taint(stmt.target, local_env)
|
|
495
|
+
for name in _extract_target_names(stmt.target):
|
|
496
|
+
if taint:
|
|
497
|
+
local_env[name] = TaintInfo(
|
|
498
|
+
source_pattern=taint.source_pattern,
|
|
499
|
+
source_line=taint.source_line,
|
|
500
|
+
propagation_path=taint.propagation_path
|
|
501
|
+
+ [f"Line {getattr(stmt, 'lineno', 1)}: {name} = ..."],
|
|
502
|
+
)
|
|
503
|
+
findings.extend(self._statement_findings(stmt, local_env))
|
|
504
|
+
|
|
505
|
+
elif isinstance(stmt, ast.If):
|
|
506
|
+
body_findings = self._analyze_statements(stmt.body, dict(local_env))
|
|
507
|
+
else_findings = self._analyze_statements(stmt.orelse, dict(local_env))
|
|
508
|
+
findings.extend(body_findings)
|
|
509
|
+
findings.extend(else_findings)
|
|
510
|
+
local_env = _merge_taint_envs(local_env)
|
|
511
|
+
|
|
512
|
+
elif isinstance(stmt, (ast.For, ast.AsyncFor, ast.While, ast.With, ast.AsyncWith)):
|
|
513
|
+
findings.extend(self._statement_findings(stmt, local_env))
|
|
514
|
+
findings.extend(self._analyze_statements(stmt.body, dict(local_env)))
|
|
515
|
+
findings.extend(self._analyze_statements(getattr(stmt, "orelse", []), dict(local_env)))
|
|
516
|
+
|
|
517
|
+
elif isinstance(stmt, ast.Try):
|
|
518
|
+
findings.extend(self._analyze_statements(stmt.body, dict(local_env)))
|
|
519
|
+
for handler in stmt.handlers:
|
|
520
|
+
findings.extend(self._analyze_statements(handler.body, dict(local_env)))
|
|
521
|
+
findings.extend(self._analyze_statements(stmt.orelse, dict(local_env)))
|
|
522
|
+
findings.extend(self._analyze_statements(stmt.finalbody, dict(local_env)))
|
|
523
|
+
|
|
524
|
+
else:
|
|
525
|
+
findings.extend(self._statement_findings(stmt, local_env))
|
|
526
|
+
|
|
527
|
+
return findings
|
|
528
|
+
|
|
529
|
+
def _expr_taint(self, node: Optional[ast.AST], env: Dict[str, TaintInfo]) -> Optional[TaintInfo]:
|
|
530
|
+
if node is None:
|
|
531
|
+
return None
|
|
532
|
+
|
|
533
|
+
pattern = _source_pattern(node)
|
|
534
|
+
if pattern:
|
|
535
|
+
return TaintInfo(
|
|
536
|
+
source_pattern=pattern,
|
|
537
|
+
source_line=getattr(node, "lineno", 1),
|
|
538
|
+
propagation_path=[f"Source: {pattern}"],
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
if isinstance(node, ast.Name):
|
|
542
|
+
return env.get(node.id)
|
|
543
|
+
|
|
544
|
+
if isinstance(node, ast.Attribute):
|
|
545
|
+
return self._expr_taint(node.value, env)
|
|
546
|
+
|
|
547
|
+
if isinstance(node, ast.Subscript):
|
|
548
|
+
return self._expr_taint(node.value, env) or self._expr_taint(node.slice, env)
|
|
549
|
+
|
|
550
|
+
if isinstance(node, ast.Call):
|
|
551
|
+
if _is_sanitizer_call(node):
|
|
552
|
+
return None
|
|
553
|
+
|
|
554
|
+
qname = _get_qualified_name(node.func)
|
|
555
|
+
if qname and qname in self.summaries:
|
|
556
|
+
summary = self.summaries[qname]
|
|
557
|
+
if summary.returns_source:
|
|
558
|
+
return TaintInfo(
|
|
559
|
+
source_pattern=summary.source_pattern or "request.args.get",
|
|
560
|
+
source_line=summary.line or getattr(node, "lineno", 1),
|
|
561
|
+
propagation_path=[f"Source via {qname}()"],
|
|
562
|
+
)
|
|
563
|
+
for idx in summary.returns_taint_from:
|
|
564
|
+
if idx < len(node.args):
|
|
565
|
+
arg_taint = self._expr_taint(node.args[idx], env)
|
|
566
|
+
if arg_taint:
|
|
567
|
+
return TaintInfo(
|
|
568
|
+
source_pattern=arg_taint.source_pattern,
|
|
569
|
+
source_line=arg_taint.source_line,
|
|
570
|
+
propagation_path=arg_taint.propagation_path
|
|
571
|
+
+ [f"Line {getattr(node, 'lineno', 1)}: return from {qname}()"],
|
|
572
|
+
)
|
|
573
|
+
if summary.has_sanitizer:
|
|
574
|
+
return None
|
|
575
|
+
|
|
576
|
+
taint = self._expr_taint(getattr(node.func, "value", None), env)
|
|
577
|
+
if taint:
|
|
578
|
+
return taint
|
|
579
|
+
for arg in node.args:
|
|
580
|
+
taint = self._expr_taint(arg, env)
|
|
581
|
+
if taint:
|
|
582
|
+
return taint
|
|
583
|
+
for kw in node.keywords:
|
|
584
|
+
taint = self._expr_taint(kw.value, env)
|
|
585
|
+
if taint:
|
|
586
|
+
return taint
|
|
587
|
+
return None
|
|
588
|
+
|
|
589
|
+
for child in ast.iter_child_nodes(node):
|
|
590
|
+
taint = self._expr_taint(child, env)
|
|
591
|
+
if taint:
|
|
592
|
+
return taint
|
|
593
|
+
|
|
594
|
+
return None
|
|
595
|
+
|
|
596
|
+
def _statement_findings(self, stmt: ast.stmt, env: Dict[str, TaintInfo]) -> List[dict]:
|
|
597
|
+
findings: List[dict] = []
|
|
598
|
+
|
|
599
|
+
for node in ast.walk(stmt):
|
|
600
|
+
if not isinstance(node, ast.Call):
|
|
601
|
+
continue
|
|
602
|
+
|
|
603
|
+
sink = _match_sink(node)
|
|
604
|
+
if sink:
|
|
605
|
+
rule_id, message, relevant_args = sink
|
|
606
|
+
for arg in relevant_args:
|
|
607
|
+
found = _find_first_taint(arg, env)
|
|
608
|
+
taint = self._expr_taint(arg, env)
|
|
609
|
+
if not taint:
|
|
610
|
+
continue
|
|
611
|
+
tainted_var = found[0] if found else ast.unparse(arg) if hasattr(ast, "unparse") else "expression"
|
|
612
|
+
taint_info = found[1] if found else taint
|
|
613
|
+
findings.append(
|
|
614
|
+
self._make_finding(
|
|
615
|
+
rule_id=rule_id,
|
|
616
|
+
message=message,
|
|
617
|
+
node=node,
|
|
618
|
+
tainted_variable=tainted_var,
|
|
619
|
+
taint_info=taint_info,
|
|
620
|
+
)
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
qname = _get_qualified_name(node.func)
|
|
624
|
+
if qname and qname in self.summaries:
|
|
625
|
+
summary = self.summaries[qname]
|
|
626
|
+
if summary.has_sanitizer:
|
|
627
|
+
continue
|
|
628
|
+
for isink in summary.internal_sinks:
|
|
629
|
+
for idx in isink.param_indices:
|
|
630
|
+
if idx >= len(node.args):
|
|
631
|
+
continue
|
|
632
|
+
found = _find_first_taint(node.args[idx], env)
|
|
633
|
+
taint = self._expr_taint(node.args[idx], env)
|
|
634
|
+
if not taint:
|
|
635
|
+
continue
|
|
636
|
+
tainted_var = found[0] if found else ast.unparse(node.args[idx]) if hasattr(ast, "unparse") else "expression"
|
|
637
|
+
taint_info = found[1] if found else taint
|
|
638
|
+
findings.append(
|
|
639
|
+
self._make_finding(
|
|
640
|
+
rule_id=isink.rule_id,
|
|
641
|
+
message=f"{isink.message} Tainted data reaches sink inside {qname}().",
|
|
642
|
+
node=node,
|
|
643
|
+
tainted_variable=tainted_var,
|
|
644
|
+
taint_info=taint_info,
|
|
645
|
+
extra_metadata={"inter_procedural": True, "callee": qname},
|
|
646
|
+
)
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
return findings
|
|
650
|
+
|
|
651
|
+
@staticmethod
|
|
652
|
+
def _make_finding(
|
|
653
|
+
rule_id: str,
|
|
654
|
+
message: str,
|
|
655
|
+
node: ast.AST,
|
|
656
|
+
tainted_variable: str,
|
|
657
|
+
taint_info: TaintInfo,
|
|
658
|
+
extra_metadata: Optional[Dict[str, object]] = None,
|
|
659
|
+
) -> dict:
|
|
660
|
+
metadata = {
|
|
661
|
+
"taint_source": taint_info.source_pattern,
|
|
662
|
+
"taint_source_line": taint_info.source_line,
|
|
663
|
+
"tainted_variable": tainted_variable,
|
|
664
|
+
}
|
|
665
|
+
if extra_metadata:
|
|
666
|
+
metadata.update(extra_metadata)
|
|
667
|
+
return {
|
|
668
|
+
"ruleId": rule_id,
|
|
669
|
+
"message": message,
|
|
670
|
+
"line": max(getattr(node, "lineno", 1) - 1, 0),
|
|
671
|
+
"column": _node_column(node),
|
|
672
|
+
"length": 0,
|
|
673
|
+
"severity": "error",
|
|
674
|
+
"confidence": "HIGH",
|
|
675
|
+
"metadata": metadata,
|
|
676
|
+
"engine": "taint",
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def analyze_python_taint(source: str, file_path: str = "<memory>") -> List[dict]:
|
|
681
|
+
"""Run lightweight Python taint analysis and return analyzer-style findings."""
|
|
682
|
+
try:
|
|
683
|
+
analyzer = _PythonTaintAnalyzer(source, file_path)
|
|
684
|
+
return analyzer.analyze()
|
|
685
|
+
except SyntaxError:
|
|
686
|
+
return []
|
|
687
|
+
except Exception:
|
|
688
|
+
return []
|