skylos 2.2.4__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skylos might be problematic. Click here for more details.
- skylos/__init__.py +1 -1
- skylos/analyzer.py +107 -76
- skylos/cli.py +156 -8
- skylos/rules/danger/__init__.py +0 -0
- skylos/rules/danger/danger.py +141 -0
- skylos/rules/danger/danger_cmd/__init__.py +0 -0
- skylos/rules/danger/danger_cmd/cmd_flow.py +208 -0
- skylos/rules/danger/danger_fs/__init__.py +0 -0
- skylos/rules/danger/danger_fs/path_flow.py +188 -0
- skylos/rules/danger/danger_net/__init__.py +0 -0
- skylos/rules/danger/danger_net/ssrf_flow.py +198 -0
- skylos/rules/danger/danger_sql/__init__.py +0 -0
- skylos/rules/danger/danger_sql/sql_flow.py +175 -0
- skylos/rules/danger/danger_sql/sql_raw_flow.py +202 -0
- skylos/rules/danger/danger_web/__init__.py +0 -0
- skylos/rules/danger/danger_web/xss_flow.py +279 -0
- {skylos-2.2.4.dist-info → skylos-2.4.0.dist-info}/METADATA +1 -1
- {skylos-2.2.4.dist-info → skylos-2.4.0.dist-info}/RECORD +26 -10
- test/test_cmd_injection.py +41 -0
- test/test_dangerous.py +32 -1
- test/test_path_traversal.py +40 -0
- test/test_sql_injection.py +54 -0
- test/test_ssrf.py +51 -0
- skylos/rules/dangerous.py +0 -135
- {skylos-2.2.4.dist-info → skylos-2.4.0.dist-info}/WHEEL +0 -0
- {skylos-2.2.4.dist-info → skylos-2.4.0.dist-info}/entry_points.txt +0 -0
- {skylos-2.2.4.dist-info → skylos-2.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import ast
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
name = input()
|
|
7
|
+
sql = f"SELECT * FROM users WHERE name='{name}'"
|
|
8
|
+
# attacker types: '; DROP TABLE users; --
|
|
9
|
+
cur.execute(sql) # adios amigos. table is gone
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def _qualified_name_from_call(node):
|
|
13
|
+
func = node.func
|
|
14
|
+
parts = []
|
|
15
|
+
while isinstance(func, ast.Attribute):
|
|
16
|
+
parts.append(func.attr)
|
|
17
|
+
func = func.value
|
|
18
|
+
|
|
19
|
+
if isinstance(func, ast.Name):
|
|
20
|
+
parts.append(func.id)
|
|
21
|
+
parts.reverse()
|
|
22
|
+
return ".".join(parts)
|
|
23
|
+
|
|
24
|
+
if isinstance(func, ast.Name):
|
|
25
|
+
return func.id
|
|
26
|
+
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
def _is_interpolated_string(node):
|
|
30
|
+
|
|
31
|
+
if isinstance(node, ast.JoinedStr):
|
|
32
|
+
return True
|
|
33
|
+
|
|
34
|
+
if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "format":
|
|
38
|
+
return True
|
|
39
|
+
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
def _add_finding(findings, file_path, node, rule_id, severity, message):
|
|
43
|
+
findings.append({
|
|
44
|
+
"rule_id": rule_id,
|
|
45
|
+
"severity": severity,
|
|
46
|
+
"message": message,
|
|
47
|
+
"file": str(file_path),
|
|
48
|
+
"line": getattr(node, "lineno", 1),
|
|
49
|
+
"col": getattr(node, "col_offset", 0),
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
class _SQLFlowChecker(ast.NodeVisitor):
|
|
53
|
+
|
|
54
|
+
SQL_SINK_SUFFIXES = (".execute", ".executemany", ".executescript")
|
|
55
|
+
|
|
56
|
+
def __init__(self, file_path, findings):
|
|
57
|
+
self.file_path = file_path
|
|
58
|
+
self.findings = findings
|
|
59
|
+
self.env_stack = []
|
|
60
|
+
|
|
61
|
+
def _push(self):
|
|
62
|
+
self.env_stack.append({})
|
|
63
|
+
|
|
64
|
+
def _pop(self):
|
|
65
|
+
self.env_stack.pop()
|
|
66
|
+
|
|
67
|
+
def _set(self, name, tainted):
|
|
68
|
+
if not self.env_stack:
|
|
69
|
+
self._push()
|
|
70
|
+
self.env_stack[-1][name] = bool(tainted)
|
|
71
|
+
|
|
72
|
+
def _get(self, name):
|
|
73
|
+
for env in reversed(self.env_stack):
|
|
74
|
+
if name in env:
|
|
75
|
+
return env[name]
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
def _tainted(self, node):
|
|
79
|
+
if _is_interpolated_string(node):
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "input":
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
if isinstance(node, (ast.Attribute, ast.Subscript)):
|
|
86
|
+
if isinstance(node, ast.Subscript):
|
|
87
|
+
base = node.value
|
|
88
|
+
else:
|
|
89
|
+
base = node.value
|
|
90
|
+
while isinstance(base, ast.Attribute):
|
|
91
|
+
base = base.value
|
|
92
|
+
if isinstance(base, ast.Name) and base.id == "request":
|
|
93
|
+
return True
|
|
94
|
+
|
|
95
|
+
if isinstance(node, ast.Name):
|
|
96
|
+
tainted = self._get(node.id)
|
|
97
|
+
return tainted
|
|
98
|
+
|
|
99
|
+
if isinstance(node, ast.BinOp):
|
|
100
|
+
return self._tainted(node.left) or self._tainted(node.right)
|
|
101
|
+
|
|
102
|
+
if isinstance(node, ast.Call):
|
|
103
|
+
for arg in node.args:
|
|
104
|
+
if self._tainted(arg):
|
|
105
|
+
return True
|
|
106
|
+
return False
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
def visit_FunctionDef(self, node):
|
|
110
|
+
self._push()
|
|
111
|
+
self.generic_visit(node)
|
|
112
|
+
self._pop()
|
|
113
|
+
|
|
114
|
+
def visit_AsyncFunctionDef(self, node):
|
|
115
|
+
self._push()
|
|
116
|
+
self.generic_visit(node)
|
|
117
|
+
self._pop()
|
|
118
|
+
|
|
119
|
+
def visit_Assign(self, node):
|
|
120
|
+
taint = self._tainted(node.value)
|
|
121
|
+
for tgt in node.targets:
|
|
122
|
+
if isinstance(tgt, ast.Name):
|
|
123
|
+
self._set(tgt.id, taint)
|
|
124
|
+
self.generic_visit(node)
|
|
125
|
+
|
|
126
|
+
def visit_AnnAssign(self, node):
|
|
127
|
+
|
|
128
|
+
if node.value is not None:
|
|
129
|
+
taint = self._tainted(node.value)
|
|
130
|
+
else:
|
|
131
|
+
taint = False
|
|
132
|
+
|
|
133
|
+
if isinstance(node.target, ast.Name):
|
|
134
|
+
self._set(node.target.id, taint)
|
|
135
|
+
self.generic_visit(node)
|
|
136
|
+
|
|
137
|
+
def visit_AugAssign(self, node):
|
|
138
|
+
taint = self._tainted(node.target) or self._tainted(node.value)
|
|
139
|
+
if isinstance(node.target, ast.Name):
|
|
140
|
+
self._set(node.target.id, taint)
|
|
141
|
+
self.generic_visit(node)
|
|
142
|
+
|
|
143
|
+
def visit_Call(self, node):
|
|
144
|
+
qn = _qualified_name_from_call(node)
|
|
145
|
+
|
|
146
|
+
if qn and qn.endswith(self.SQL_SINK_SUFFIXES) and node.args:
|
|
147
|
+
arg0 = node.args[0]
|
|
148
|
+
|
|
149
|
+
is_interp = _is_interpolated_string(arg0)
|
|
150
|
+
is_tainted = self._tainted(arg0)
|
|
151
|
+
|
|
152
|
+
if is_interp or is_tainted:
|
|
153
|
+
_add_finding(
|
|
154
|
+
self.findings, self.file_path, node,
|
|
155
|
+
"SKY-D211", "CRITICAL",
|
|
156
|
+
"Possible SQL injection: tainted SQL passed to SQL execution method."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self.generic_visit(node)
|
|
160
|
+
|
|
161
|
+
def generic_visit(self, node):
|
|
162
|
+
for field, value in ast.iter_fields(node):
|
|
163
|
+
if isinstance(value, list):
|
|
164
|
+
for item in value:
|
|
165
|
+
if isinstance(item, ast.AST):
|
|
166
|
+
self.visit(item)
|
|
167
|
+
elif isinstance(value, ast.AST):
|
|
168
|
+
self.visit(value)
|
|
169
|
+
|
|
170
|
+
def scan(tree, file_path, findings):
|
|
171
|
+
try:
|
|
172
|
+
checker = _SQLFlowChecker(file_path, findings)
|
|
173
|
+
checker.visit(tree)
|
|
174
|
+
except Exception as e:
|
|
175
|
+
print(f"SQL flow analysis failed for {file_path}: {e}", file=sys.stderr)
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import ast
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
raw sql injection flow analysis for sqlalchemy.text, pandas.read_sql, django .raw()
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
def _is_interpolated_string(n: ast.AST):
|
|
9
|
+
if isinstance(n, ast.JoinedStr):
|
|
10
|
+
return True
|
|
11
|
+
if isinstance(n, ast.BinOp) and isinstance(n.op, (ast.Add, ast.Mod)):
|
|
12
|
+
return True
|
|
13
|
+
if isinstance(n, ast.Call) and isinstance(n.func, ast.Attribute) and n.func.attr == "format":
|
|
14
|
+
return True
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
def _qualified_name_from_call(node: ast.Call):
|
|
18
|
+
f = node.func
|
|
19
|
+
parts = []
|
|
20
|
+
|
|
21
|
+
while isinstance(f, ast.Attribute):
|
|
22
|
+
parts.append(f.attr); f = f.value
|
|
23
|
+
|
|
24
|
+
if isinstance(f, ast.Name):
|
|
25
|
+
parts.append(f.id); parts.reverse()
|
|
26
|
+
return ".".join(parts)
|
|
27
|
+
|
|
28
|
+
if isinstance(f, ast.Name):
|
|
29
|
+
return f.id
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
def _add_finding(findings, file_path, node: ast.AST, rule_id, severity, message):
|
|
33
|
+
findings.append({
|
|
34
|
+
"rule_id": rule_id,
|
|
35
|
+
"severity": severity,
|
|
36
|
+
"message": message,
|
|
37
|
+
"file": str(file_path),
|
|
38
|
+
"line": getattr(node, "lineno", 1),
|
|
39
|
+
"col": getattr(node, "col_offset", 0),
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
class _SQLRawFlowChecker(ast.NodeVisitor):
|
|
43
|
+
|
|
44
|
+
# PANDAS_FUNCS = {"read_sql", "read_sql_query"}
|
|
45
|
+
# SQLALCHEMY_TEXT = "sqlalchemy.text"
|
|
46
|
+
|
|
47
|
+
def __init__(self, file_path, findings):
|
|
48
|
+
self.file_path = file_path
|
|
49
|
+
self.findings = findings
|
|
50
|
+
self.env_stack = []
|
|
51
|
+
|
|
52
|
+
def _push(self):
|
|
53
|
+
self.env_stack.append({})
|
|
54
|
+
|
|
55
|
+
def _pop(self):
|
|
56
|
+
self.env_stack.pop()
|
|
57
|
+
|
|
58
|
+
def _set(self, name, tainted):
|
|
59
|
+
if not self.env_stack: self._push()
|
|
60
|
+
self.env_stack[-1][name] = bool(tainted)
|
|
61
|
+
|
|
62
|
+
def _get(self, name):
|
|
63
|
+
for env in reversed(self.env_stack):
|
|
64
|
+
if name in env:
|
|
65
|
+
return env[name]
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
def _traverse_children(self, node):
|
|
69
|
+
for child in ast.iter_child_nodes(node):
|
|
70
|
+
self.visit(child)
|
|
71
|
+
|
|
72
|
+
def expr_is_tainted(self, n: ast.AST):
|
|
73
|
+
if _is_interpolated_string(n):
|
|
74
|
+
return True
|
|
75
|
+
|
|
76
|
+
if isinstance(n, ast.Call) and isinstance(n.func, ast.Name) and n.func.id == "input":
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
if isinstance(n, (ast.Attribute, ast.Subscript)):
|
|
80
|
+
base = n.value
|
|
81
|
+
while isinstance(base, ast.Attribute):
|
|
82
|
+
base = base.value
|
|
83
|
+
|
|
84
|
+
if isinstance(base, ast.Name) and base.id == "request":
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
if isinstance(n, ast.Name):
|
|
88
|
+
return self._get(n.id)
|
|
89
|
+
|
|
90
|
+
if isinstance(n, (ast.Attribute, ast.Subscript)):
|
|
91
|
+
return self.expr_is_tainted(n.value)
|
|
92
|
+
|
|
93
|
+
if isinstance(n, ast.Call):
|
|
94
|
+
for arg in n.args:
|
|
95
|
+
if self.expr_is_tainted(arg):
|
|
96
|
+
return True
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
if isinstance(n, ast.BinOp):
|
|
100
|
+
return self.expr_is_tainted(n.left) or self.expr_is_tainted(n.right)
|
|
101
|
+
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def visit_FunctionDef(self, node: ast.FunctionDef):
|
|
105
|
+
self._push()
|
|
106
|
+
for arg in node.args.args:
|
|
107
|
+
self._set(arg.arg, True)
|
|
108
|
+
self._traverse_children(node)
|
|
109
|
+
self._pop()
|
|
110
|
+
|
|
111
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
|
|
112
|
+
self._push()
|
|
113
|
+
for arg in node.args.args:
|
|
114
|
+
self._set(arg.arg, True)
|
|
115
|
+
self._traverse_children(node)
|
|
116
|
+
self._pop()
|
|
117
|
+
|
|
118
|
+
def visit_Assign(self, node: ast.Assign):
|
|
119
|
+
tainted = self.expr_is_tainted(node.value)
|
|
120
|
+
for tgt in node.targets:
|
|
121
|
+
if isinstance(tgt, ast.Name):
|
|
122
|
+
self._set(tgt.id, tainted)
|
|
123
|
+
self._traverse_children(node)
|
|
124
|
+
|
|
125
|
+
def visit_AnnAssign(self, node: ast.AnnAssign):
|
|
126
|
+
if isinstance(node.target, ast.Name):
|
|
127
|
+
if node.value is not None:
|
|
128
|
+
tainted = self.expr_is_tainted(node.value)
|
|
129
|
+
else:
|
|
130
|
+
tainted = False
|
|
131
|
+
self._set(node.target.id, tainted)
|
|
132
|
+
|
|
133
|
+
self._traverse_children(node)
|
|
134
|
+
|
|
135
|
+
def visit_AugAssign(self, node: ast.AugAssign):
|
|
136
|
+
tainted = self.expr_is_tainted(node.target) or self.expr_is_tainted(node.value)
|
|
137
|
+
if isinstance(node.target, ast.Name):
|
|
138
|
+
self._set(node.target.id, tainted)
|
|
139
|
+
|
|
140
|
+
self._traverse_children(node)
|
|
141
|
+
|
|
142
|
+
def visit_Call(self, node: ast.Call):
|
|
143
|
+
qn = _qualified_name_from_call(node)
|
|
144
|
+
if not qn:
|
|
145
|
+
return self._traverse_children(node)
|
|
146
|
+
|
|
147
|
+
"""
|
|
148
|
+
import sqlalchemy as sa
|
|
149
|
+
ip = input()
|
|
150
|
+
# attacker runs: "'; DROP TABLE logs; --"
|
|
151
|
+
sa.text("DELETE FROM logs WHERE ip='" + ip + "'") # tainted SQL
|
|
152
|
+
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
if qn.endswith(".text") and node.args:
|
|
156
|
+
sql = node.args[0]
|
|
157
|
+
if _is_interpolated_string(sql) or self.expr_is_tainted(sql):
|
|
158
|
+
_add_finding(
|
|
159
|
+
self.findings, self.file_path, node,
|
|
160
|
+
"SKY-D217", "CRITICAL",
|
|
161
|
+
"Possible SQL injection: tainted SQL passed to sqlalchemy.text()."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
"""
|
|
165
|
+
import pandas as pd
|
|
166
|
+
name = input()
|
|
167
|
+
# attacker runs: "' OR 1=1; --"
|
|
168
|
+
pd.read_sql(f"SELECT * FROM users WHERE name='{name}'", conn) # tainted SQL
|
|
169
|
+
"""
|
|
170
|
+
if (qn.endswith(".read_sql") or qn.endswith(".read_sql_query")) and node.args:
|
|
171
|
+
sql = node.args[0]
|
|
172
|
+
if _is_interpolated_string(sql) or self.expr_is_tainted(sql):
|
|
173
|
+
_add_finding(
|
|
174
|
+
self.findings, self.file_path, node,
|
|
175
|
+
"SKY-D217", "CRITICAL",
|
|
176
|
+
"Possible SQL injection: tainted SQL passed to pandas.read_sql()."
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
"""
|
|
180
|
+
## note this is for double quotation mark
|
|
181
|
+
u = input()
|
|
182
|
+
# attacker: "'; DROP TABLE auth_user; --"
|
|
183
|
+
User.objects.raw("SELECT * FROM auth_user WHERE username='" + u + "'") # tainted SQL
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
if qn.endswith(".objects.raw") and node.args:
|
|
187
|
+
sql = node.args[0]
|
|
188
|
+
if _is_interpolated_string(sql) or self.expr_is_tainted(sql):
|
|
189
|
+
_add_finding(
|
|
190
|
+
self.findings, self.file_path, node,
|
|
191
|
+
"SKY-D217", "CRITICAL",
|
|
192
|
+
"Possible SQL injection: tainted SQL passed to Django .raw()."
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
self._traverse_children(node)
|
|
196
|
+
|
|
197
|
+
def generic_visit(self, node):
|
|
198
|
+
for child in ast.iter_child_nodes(node):
|
|
199
|
+
self.visit(child)
|
|
200
|
+
|
|
201
|
+
def scan(tree: ast.AST, file_path, findings):
|
|
202
|
+
_SQLRawFlowChecker(file_path, findings).visit(tree)
|
|
File without changes
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import ast
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Common XSS patterns the LLMs love to generate:
|
|
7
|
+
|
|
8
|
+
1) Marking untrusted HTML as safe
|
|
9
|
+
|
|
10
|
+
from django.utils.safestring import mark_safe
|
|
11
|
+
return HttpResponse(mark_safe(input())) # -> XSS
|
|
12
|
+
|
|
13
|
+
2) Unsafe inline templates
|
|
14
|
+
from flask import render_template_string
|
|
15
|
+
render_template_string("<p>{{ body|safe }}</p>", body=request.args["body"]) # -> XSS
|
|
16
|
+
render_template_string("{% autoescape false %}{{ x }}{% endautoescape %}", x=input()) # -> XSS
|
|
17
|
+
|
|
18
|
+
3) Returning string-built HTML directly with user input
|
|
19
|
+
return "<div>" + request.args["q"] + "</div>" # -> XSS
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def _qualified_name_from_call(node: ast.Call):
|
|
23
|
+
func = node.func
|
|
24
|
+
parts = []
|
|
25
|
+
while isinstance(func, ast.Attribute):
|
|
26
|
+
parts.append(func.attr)
|
|
27
|
+
func = func.value
|
|
28
|
+
|
|
29
|
+
if isinstance(func, ast.Name):
|
|
30
|
+
parts.append(func.id)
|
|
31
|
+
parts.reverse()
|
|
32
|
+
return ".".join(parts)
|
|
33
|
+
|
|
34
|
+
if isinstance(func, ast.Name):
|
|
35
|
+
return func.id
|
|
36
|
+
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
def _is_interpolated_string(node: ast.AST):
|
|
40
|
+
if isinstance(node, ast.JoinedStr):
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
|
|
44
|
+
return True
|
|
45
|
+
|
|
46
|
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "format":
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
def _add_finding(findings, file_path, node, rule_id, severity, message):
|
|
52
|
+
findings.append({
|
|
53
|
+
"rule_id": rule_id,
|
|
54
|
+
"severity": severity,
|
|
55
|
+
"message": message,
|
|
56
|
+
"file": str(file_path),
|
|
57
|
+
"line": getattr(node, "lineno", 1),
|
|
58
|
+
"col": getattr(node, "col_offset", 0),
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
def _const_str_value(node: ast.AST):
|
|
62
|
+
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
|
63
|
+
return node.value
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def _const_contains_html(node: ast.AST):
|
|
67
|
+
|
|
68
|
+
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
|
69
|
+
s = node.value
|
|
70
|
+
return ("<" in s) and (">" in s)
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
class _XSSFlowChecker(ast.NodeVisitor):
|
|
74
|
+
|
|
75
|
+
SAFE_MARK_FUNCS = {"Markup", "mark_safe"}
|
|
76
|
+
|
|
77
|
+
def __init__(self, file_path, findings):
|
|
78
|
+
self.file_path = file_path
|
|
79
|
+
self.findings = findings
|
|
80
|
+
self.env_stack = []
|
|
81
|
+
|
|
82
|
+
def _push(self):
|
|
83
|
+
self.env_stack.append({})
|
|
84
|
+
|
|
85
|
+
def _pop(self):
|
|
86
|
+
self.env_stack.pop()
|
|
87
|
+
|
|
88
|
+
def _set(self, name, tainted):
|
|
89
|
+
if not self.env_stack:
|
|
90
|
+
self._push()
|
|
91
|
+
self.env_stack[-1][name] = bool(tainted)
|
|
92
|
+
|
|
93
|
+
def _get(self, name):
|
|
94
|
+
for env in reversed(self.env_stack):
|
|
95
|
+
if name in env:
|
|
96
|
+
return env[name]
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
def _tainted(self, node: ast.AST):
|
|
100
|
+
if _is_interpolated_string(node):
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "input":
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
if isinstance(node, (ast.Attribute, ast.Subscript)):
|
|
107
|
+
base = node.value if isinstance(node, ast.Subscript) else node.value
|
|
108
|
+
while isinstance(base, ast.Attribute):
|
|
109
|
+
base = base.value
|
|
110
|
+
if isinstance(base, ast.Name) and base.id == "request":
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
if isinstance(node, ast.Name):
|
|
114
|
+
return self._get(node.id)
|
|
115
|
+
|
|
116
|
+
if isinstance(node, (ast.Attribute, ast.Subscript)):
|
|
117
|
+
return self._tainted(node.value)
|
|
118
|
+
|
|
119
|
+
if isinstance(node, ast.BinOp):
|
|
120
|
+
return self._tainted(node.left) or self._tainted(node.right)
|
|
121
|
+
|
|
122
|
+
if isinstance(node, ast.Call):
|
|
123
|
+
for arg in node.args:
|
|
124
|
+
if self._tainted(arg):
|
|
125
|
+
return True
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
def _template_is_unsafe_literal(self, node: ast.AST):
|
|
131
|
+
|
|
132
|
+
s = _const_str_value(node)
|
|
133
|
+
if not s:
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
low = s.lower()
|
|
137
|
+
if "|safe" in low:
|
|
138
|
+
return True
|
|
139
|
+
if "{% autoescape false %}" in low:
|
|
140
|
+
return True
|
|
141
|
+
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
def _html_built_with_taint(self, node: ast.AST):
|
|
145
|
+
|
|
146
|
+
if isinstance(node, ast.JoinedStr):
|
|
147
|
+
has_html = False
|
|
148
|
+
for v in node.values:
|
|
149
|
+
if isinstance(v, ast.Constant):
|
|
150
|
+
if _const_contains_html(v):
|
|
151
|
+
has_html = True
|
|
152
|
+
break
|
|
153
|
+
|
|
154
|
+
if not has_html:
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
for v in node.values:
|
|
158
|
+
if isinstance(v, ast.FormattedValue):
|
|
159
|
+
if self._tainted(v.value):
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
|
|
165
|
+
left_html = _const_contains_html(node.left)
|
|
166
|
+
right_html = _const_contains_html(node.right)
|
|
167
|
+
any_html = left_html or right_html
|
|
168
|
+
if not any_html:
|
|
169
|
+
left_html = self._binop_has_html_const(node.left)
|
|
170
|
+
right_html = self._binop_has_html_const(node.right)
|
|
171
|
+
any_html = left_html or right_html
|
|
172
|
+
if not any_html:
|
|
173
|
+
return False
|
|
174
|
+
# taint on either side?
|
|
175
|
+
return self._tainted(node.left) or self._tainted(node.right)
|
|
176
|
+
|
|
177
|
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "format":
|
|
178
|
+
base = node.func.value
|
|
179
|
+
if _const_contains_html(base):
|
|
180
|
+
for a in node.args:
|
|
181
|
+
if self._tainted(a):
|
|
182
|
+
return True
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
def _binop_has_html_const(self, node: ast.AST):
|
|
188
|
+
if _const_contains_html(node):
|
|
189
|
+
return True
|
|
190
|
+
if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
|
|
191
|
+
return self._binop_has_html_const(node.left) or self._binop_has_html_const(node.right)
|
|
192
|
+
return False
|
|
193
|
+
|
|
194
|
+
def visit_FunctionDef(self, node: ast.FunctionDef):
|
|
195
|
+
self._push()
|
|
196
|
+
self.generic_visit(node)
|
|
197
|
+
self._pop()
|
|
198
|
+
|
|
199
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
|
|
200
|
+
self._push()
|
|
201
|
+
self.generic_visit(node)
|
|
202
|
+
self._pop()
|
|
203
|
+
|
|
204
|
+
def visit_Assign(self, node: ast.Assign):
|
|
205
|
+
t = self._tainted(node.value)
|
|
206
|
+
for tgt in node.targets:
|
|
207
|
+
if isinstance(tgt, ast.Name):
|
|
208
|
+
self._set(tgt.id, t)
|
|
209
|
+
self.generic_visit(node)
|
|
210
|
+
|
|
211
|
+
def visit_AnnAssign(self, node: ast.AnnAssign):
|
|
212
|
+
if node.value is not None:
|
|
213
|
+
tainted = self._tainted(node.value)
|
|
214
|
+
else:
|
|
215
|
+
tainted = False
|
|
216
|
+
|
|
217
|
+
if isinstance(node.target, ast.Name):
|
|
218
|
+
self._set(node.target.id, tainted)
|
|
219
|
+
self.generic_visit(node)
|
|
220
|
+
|
|
221
|
+
def visit_AugAssign(self, node: ast.AugAssign):
|
|
222
|
+
t = self._tainted(node.target) or self._tainted(node.value)
|
|
223
|
+
if isinstance(node.target, ast.Name):
|
|
224
|
+
self._set(node.target.id, t)
|
|
225
|
+
self.generic_visit(node)
|
|
226
|
+
|
|
227
|
+
def visit_Call(self, node: ast.Call):
|
|
228
|
+
qn = _qualified_name_from_call(node)
|
|
229
|
+
|
|
230
|
+
if qn and node.args:
|
|
231
|
+
func_name = qn.split(".")[-1]
|
|
232
|
+
if func_name in self.SAFE_MARK_FUNCS:
|
|
233
|
+
arg0 = node.args[0]
|
|
234
|
+
is_interp = _is_interpolated_string(arg0)
|
|
235
|
+
is_tainted = self._tainted(arg0)
|
|
236
|
+
if is_interp or is_tainted:
|
|
237
|
+
_add_finding(
|
|
238
|
+
self.findings, self.file_path, node,
|
|
239
|
+
"SKY-D226", "CRITICAL",
|
|
240
|
+
"Possible XSS: untrusted content marked safe"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if qn and qn.split(".")[-1] == "render_template_string" and node.args:
|
|
244
|
+
tmpl = node.args[0]
|
|
245
|
+
if self._template_is_unsafe_literal(tmpl):
|
|
246
|
+
_add_finding(
|
|
247
|
+
self.findings, self.file_path, node,
|
|
248
|
+
"SKY-D227", "HIGH",
|
|
249
|
+
"Possible XSS: unsafe inline template disables escaping"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
self.generic_visit(node)
|
|
253
|
+
|
|
254
|
+
def visit_Return(self, node: ast.Return):
|
|
255
|
+
|
|
256
|
+
if node.value is not None:
|
|
257
|
+
if self._html_built_with_taint(node.value):
|
|
258
|
+
_add_finding(
|
|
259
|
+
self.findings, self.file_path, node,
|
|
260
|
+
"SKY-D228", "HIGH",
|
|
261
|
+
"XSS (HTML built from unescaped user input)"
|
|
262
|
+
)
|
|
263
|
+
self.generic_visit(node)
|
|
264
|
+
|
|
265
|
+
def generic_visit(self, node):
|
|
266
|
+
for field, value in ast.iter_fields(node):
|
|
267
|
+
if isinstance(value, list):
|
|
268
|
+
for item in value:
|
|
269
|
+
if isinstance(item, ast.AST):
|
|
270
|
+
self.visit(item)
|
|
271
|
+
elif isinstance(value, ast.AST):
|
|
272
|
+
self.visit(value)
|
|
273
|
+
|
|
274
|
+
def scan(tree, file_path, findings):
|
|
275
|
+
try:
|
|
276
|
+
checker = _XSSFlowChecker(file_path, findings)
|
|
277
|
+
checker.visit(tree)
|
|
278
|
+
except Exception as e:
|
|
279
|
+
print(f"XSS analysis failed for {file_path}: {e}", file=sys.stderr)
|