skylos 2.2.3__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skylos might be problematic. Click here for more details.

@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+ import ast
3
+ import sys
4
+
5
+ """
6
+ name = input()
7
+ sql = f"SELECT * FROM users WHERE name='{name}'"
8
+ # attacker types: '; DROP TABLE users; --
9
+ cur.execute(sql) # adios amigos. table is gone
10
+ """
11
+
12
+ def _qualified_name_from_call(node):
13
+ func = node.func
14
+ parts = []
15
+ while isinstance(func, ast.Attribute):
16
+ parts.append(func.attr)
17
+ func = func.value
18
+
19
+ if isinstance(func, ast.Name):
20
+ parts.append(func.id)
21
+ parts.reverse()
22
+ return ".".join(parts)
23
+
24
+ if isinstance(func, ast.Name):
25
+ return func.id
26
+
27
+ return None
28
+
29
+ def _is_interpolated_string(node):
30
+
31
+ if isinstance(node, ast.JoinedStr):
32
+ return True
33
+
34
+ if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
35
+ return True
36
+
37
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "format":
38
+ return True
39
+
40
+ return False
41
+
42
+ def _add_finding(findings, file_path, node, rule_id, severity, message):
43
+ findings.append({
44
+ "rule_id": rule_id,
45
+ "severity": severity,
46
+ "message": message,
47
+ "file": str(file_path),
48
+ "line": getattr(node, "lineno", 1),
49
+ "col": getattr(node, "col_offset", 0),
50
+ })
51
+
52
+ class _SQLFlowChecker(ast.NodeVisitor):
53
+
54
+ SQL_SINK_SUFFIXES = (".execute", ".executemany", ".executescript")
55
+
56
+ def __init__(self, file_path, findings):
57
+ self.file_path = file_path
58
+ self.findings = findings
59
+ self.env_stack = []
60
+
61
+ def _push(self):
62
+ self.env_stack.append({})
63
+
64
+ def _pop(self):
65
+ self.env_stack.pop()
66
+
67
+ def _set(self, name, tainted):
68
+ if not self.env_stack:
69
+ self._push()
70
+ self.env_stack[-1][name] = bool(tainted)
71
+
72
+ def _get(self, name):
73
+ for env in reversed(self.env_stack):
74
+ if name in env:
75
+ return env[name]
76
+ return False
77
+
78
+ def _tainted(self, node):
79
+ if _is_interpolated_string(node):
80
+ return True
81
+
82
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "input":
83
+ return True
84
+
85
+ if isinstance(node, (ast.Attribute, ast.Subscript)):
86
+ if isinstance(node, ast.Subscript):
87
+ base = node.value
88
+ else:
89
+ base = node.value
90
+ while isinstance(base, ast.Attribute):
91
+ base = base.value
92
+ if isinstance(base, ast.Name) and base.id == "request":
93
+ return True
94
+
95
+ if isinstance(node, ast.Name):
96
+ tainted = self._get(node.id)
97
+ return tainted
98
+
99
+ if isinstance(node, ast.BinOp):
100
+ return self._tainted(node.left) or self._tainted(node.right)
101
+
102
+ if isinstance(node, ast.Call):
103
+ for arg in node.args:
104
+ if self._tainted(arg):
105
+ return True
106
+ return False
107
+ return False
108
+
109
+ def visit_FunctionDef(self, node):
110
+ self._push()
111
+ self.generic_visit(node)
112
+ self._pop()
113
+
114
+ def visit_AsyncFunctionDef(self, node):
115
+ self._push()
116
+ self.generic_visit(node)
117
+ self._pop()
118
+
119
+ def visit_Assign(self, node):
120
+ taint = self._tainted(node.value)
121
+ for tgt in node.targets:
122
+ if isinstance(tgt, ast.Name):
123
+ self._set(tgt.id, taint)
124
+ self.generic_visit(node)
125
+
126
+ def visit_AnnAssign(self, node):
127
+
128
+ if node.value is not None:
129
+ taint = self._tainted(node.value)
130
+ else:
131
+ taint = False
132
+
133
+ if isinstance(node.target, ast.Name):
134
+ self._set(node.target.id, taint)
135
+ self.generic_visit(node)
136
+
137
+ def visit_AugAssign(self, node):
138
+ taint = self._tainted(node.target) or self._tainted(node.value)
139
+ if isinstance(node.target, ast.Name):
140
+ self._set(node.target.id, taint)
141
+ self.generic_visit(node)
142
+
143
+ def visit_Call(self, node):
144
+ qn = _qualified_name_from_call(node)
145
+
146
+ if qn and qn.endswith(self.SQL_SINK_SUFFIXES) and node.args:
147
+ arg0 = node.args[0]
148
+
149
+ is_interp = _is_interpolated_string(arg0)
150
+ is_tainted = self._tainted(arg0)
151
+
152
+ if is_interp or is_tainted:
153
+ _add_finding(
154
+ self.findings, self.file_path, node,
155
+ "SKY-D211", "CRITICAL",
156
+ "Possible SQL injection: tainted SQL passed to SQL execution method."
157
+ )
158
+
159
+ self.generic_visit(node)
160
+
161
+ def generic_visit(self, node):
162
+ for field, value in ast.iter_fields(node):
163
+ if isinstance(value, list):
164
+ for item in value:
165
+ if isinstance(item, ast.AST):
166
+ self.visit(item)
167
+ elif isinstance(value, ast.AST):
168
+ self.visit(value)
169
+
170
+ def scan(tree, file_path, findings):
171
+ try:
172
+ checker = _SQLFlowChecker(file_path, findings)
173
+ checker.visit(tree)
174
+ except Exception as e:
175
+ print(f"SQL flow analysis failed for {file_path}: {e}", file=sys.stderr)
@@ -0,0 +1,202 @@
1
+ from __future__ import annotations
2
+ import ast
3
+
4
+ """
5
+ raw sql injection flow analysis for sqlalchemy.text, pandas.read_sql, django .raw()
6
+ """
7
+
8
+ def _is_interpolated_string(n: ast.AST):
9
+ if isinstance(n, ast.JoinedStr):
10
+ return True
11
+ if isinstance(n, ast.BinOp) and isinstance(n.op, (ast.Add, ast.Mod)):
12
+ return True
13
+ if isinstance(n, ast.Call) and isinstance(n.func, ast.Attribute) and n.func.attr == "format":
14
+ return True
15
+ return False
16
+
17
+ def _qualified_name_from_call(node: ast.Call):
18
+ f = node.func
19
+ parts = []
20
+
21
+ while isinstance(f, ast.Attribute):
22
+ parts.append(f.attr); f = f.value
23
+
24
+ if isinstance(f, ast.Name):
25
+ parts.append(f.id); parts.reverse()
26
+ return ".".join(parts)
27
+
28
+ if isinstance(f, ast.Name):
29
+ return f.id
30
+ return None
31
+
32
+ def _add_finding(findings, file_path, node: ast.AST, rule_id, severity, message):
33
+ findings.append({
34
+ "rule_id": rule_id,
35
+ "severity": severity,
36
+ "message": message,
37
+ "file": str(file_path),
38
+ "line": getattr(node, "lineno", 1),
39
+ "col": getattr(node, "col_offset", 0),
40
+ })
41
+
42
+ class _SQLRawFlowChecker(ast.NodeVisitor):
43
+
44
+ # PANDAS_FUNCS = {"read_sql", "read_sql_query"}
45
+ # SQLALCHEMY_TEXT = "sqlalchemy.text"
46
+
47
+ def __init__(self, file_path, findings):
48
+ self.file_path = file_path
49
+ self.findings = findings
50
+ self.env_stack = []
51
+
52
+ def _push(self):
53
+ self.env_stack.append({})
54
+
55
+ def _pop(self):
56
+ self.env_stack.pop()
57
+
58
+ def _set(self, name, tainted):
59
+ if not self.env_stack: self._push()
60
+ self.env_stack[-1][name] = bool(tainted)
61
+
62
+ def _get(self, name):
63
+ for env in reversed(self.env_stack):
64
+ if name in env:
65
+ return env[name]
66
+ return False
67
+
68
+ def _traverse_children(self, node):
69
+ for child in ast.iter_child_nodes(node):
70
+ self.visit(child)
71
+
72
+ def expr_is_tainted(self, n: ast.AST):
73
+ if _is_interpolated_string(n):
74
+ return True
75
+
76
+ if isinstance(n, ast.Call) and isinstance(n.func, ast.Name) and n.func.id == "input":
77
+ return True
78
+
79
+ if isinstance(n, (ast.Attribute, ast.Subscript)):
80
+ base = n.value
81
+ while isinstance(base, ast.Attribute):
82
+ base = base.value
83
+
84
+ if isinstance(base, ast.Name) and base.id == "request":
85
+ return True
86
+
87
+ if isinstance(n, ast.Name):
88
+ return self._get(n.id)
89
+
90
+ if isinstance(n, (ast.Attribute, ast.Subscript)):
91
+ return self.expr_is_tainted(n.value)
92
+
93
+ if isinstance(n, ast.Call):
94
+ for arg in n.args:
95
+ if self.expr_is_tainted(arg):
96
+ return True
97
+ return False
98
+
99
+ if isinstance(n, ast.BinOp):
100
+ return self.expr_is_tainted(n.left) or self.expr_is_tainted(n.right)
101
+
102
+ return False
103
+
104
+ def visit_FunctionDef(self, node: ast.FunctionDef):
105
+ self._push()
106
+ for arg in node.args.args:
107
+ self._set(arg.arg, True)
108
+ self._traverse_children(node)
109
+ self._pop()
110
+
111
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
112
+ self._push()
113
+ for arg in node.args.args:
114
+ self._set(arg.arg, True)
115
+ self._traverse_children(node)
116
+ self._pop()
117
+
118
+ def visit_Assign(self, node: ast.Assign):
119
+ tainted = self.expr_is_tainted(node.value)
120
+ for tgt in node.targets:
121
+ if isinstance(tgt, ast.Name):
122
+ self._set(tgt.id, tainted)
123
+ self._traverse_children(node)
124
+
125
+ def visit_AnnAssign(self, node: ast.AnnAssign):
126
+ if isinstance(node.target, ast.Name):
127
+ if node.value is not None:
128
+ tainted = self.expr_is_tainted(node.value)
129
+ else:
130
+ tainted = False
131
+ self._set(node.target.id, tainted)
132
+
133
+ self._traverse_children(node)
134
+
135
+ def visit_AugAssign(self, node: ast.AugAssign):
136
+ tainted = self.expr_is_tainted(node.target) or self.expr_is_tainted(node.value)
137
+ if isinstance(node.target, ast.Name):
138
+ self._set(node.target.id, tainted)
139
+
140
+ self._traverse_children(node)
141
+
142
+ def visit_Call(self, node: ast.Call):
143
+ qn = _qualified_name_from_call(node)
144
+ if not qn:
145
+ return self._traverse_children(node)
146
+
147
+ """
148
+ import sqlalchemy as sa
149
+ ip = input()
150
+ # attacker runs: "'; DROP TABLE logs; --"
151
+ sa.text("DELETE FROM logs WHERE ip='" + ip + "'") # tainted SQL
152
+
153
+ """
154
+
155
+ if qn.endswith(".text") and node.args:
156
+ sql = node.args[0]
157
+ if _is_interpolated_string(sql) or self.expr_is_tainted(sql):
158
+ _add_finding(
159
+ self.findings, self.file_path, node,
160
+ "SKY-D217", "CRITICAL",
161
+ "Possible SQL injection: tainted SQL passed to sqlalchemy.text()."
162
+ )
163
+
164
+ """
165
+ import pandas as pd
166
+ name = input()
167
+ # attacker runs: "' OR 1=1; --"
168
+ pd.read_sql(f"SELECT * FROM users WHERE name='{name}'", conn) # tainted SQL
169
+ """
170
+ if (qn.endswith(".read_sql") or qn.endswith(".read_sql_query")) and node.args:
171
+ sql = node.args[0]
172
+ if _is_interpolated_string(sql) or self.expr_is_tainted(sql):
173
+ _add_finding(
174
+ self.findings, self.file_path, node,
175
+ "SKY-D217", "CRITICAL",
176
+ "Possible SQL injection: tainted SQL passed to pandas.read_sql()."
177
+ )
178
+
179
+ """
180
+ ## note this is for double quotation mark
181
+ u = input()
182
+ # attacker: "'; DROP TABLE auth_user; --"
183
+ User.objects.raw("SELECT * FROM auth_user WHERE username='" + u + "'") # tainted SQL
184
+ """
185
+
186
+ if qn.endswith(".objects.raw") and node.args:
187
+ sql = node.args[0]
188
+ if _is_interpolated_string(sql) or self.expr_is_tainted(sql):
189
+ _add_finding(
190
+ self.findings, self.file_path, node,
191
+ "SKY-D217", "CRITICAL",
192
+ "Possible SQL injection: tainted SQL passed to Django .raw()."
193
+ )
194
+
195
+ self._traverse_children(node)
196
+
197
+ def generic_visit(self, node):
198
+ for child in ast.iter_child_nodes(node):
199
+ self.visit(child)
200
+
201
+ def scan(tree: ast.AST, file_path, findings):
202
+ _SQLRawFlowChecker(file_path, findings).visit(tree)
File without changes
@@ -0,0 +1,279 @@
1
+ from __future__ import annotations
2
+ import ast
3
+ import sys
4
+
5
+ """
6
+ Common XSS patterns the LLMs love to generate:
7
+
8
+ 1) Marking untrusted HTML as safe
9
+
10
+ from django.utils.safestring import mark_safe
11
+ return HttpResponse(mark_safe(input())) # -> XSS
12
+
13
+ 2) Unsafe inline templates
14
+ from flask import render_template_string
15
+ render_template_string("<p>{{ body|safe }}</p>", body=request.args["body"]) # -> XSS
16
+ render_template_string("{% autoescape false %}{{ x }}{% endautoescape %}", x=input()) # -> XSS
17
+
18
+ 3) Returning string-built HTML directly with user input
19
+ return "<div>" + request.args["q"] + "</div>" # -> XSS
20
+ """
21
+
22
+ def _qualified_name_from_call(node: ast.Call):
23
+ func = node.func
24
+ parts = []
25
+ while isinstance(func, ast.Attribute):
26
+ parts.append(func.attr)
27
+ func = func.value
28
+
29
+ if isinstance(func, ast.Name):
30
+ parts.append(func.id)
31
+ parts.reverse()
32
+ return ".".join(parts)
33
+
34
+ if isinstance(func, ast.Name):
35
+ return func.id
36
+
37
+ return None
38
+
39
+ def _is_interpolated_string(node: ast.AST):
40
+ if isinstance(node, ast.JoinedStr):
41
+ return True
42
+
43
+ if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
44
+ return True
45
+
46
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "format":
47
+ return True
48
+
49
+ return False
50
+
51
+ def _add_finding(findings, file_path, node, rule_id, severity, message):
52
+ findings.append({
53
+ "rule_id": rule_id,
54
+ "severity": severity,
55
+ "message": message,
56
+ "file": str(file_path),
57
+ "line": getattr(node, "lineno", 1),
58
+ "col": getattr(node, "col_offset", 0),
59
+ })
60
+
61
+ def _const_str_value(node: ast.AST):
62
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
63
+ return node.value
64
+ return None
65
+
66
+ def _const_contains_html(node: ast.AST):
67
+
68
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
69
+ s = node.value
70
+ return ("<" in s) and (">" in s)
71
+ return False
72
+
73
+ class _XSSFlowChecker(ast.NodeVisitor):
74
+
75
+ SAFE_MARK_FUNCS = {"Markup", "mark_safe"}
76
+
77
+ def __init__(self, file_path, findings):
78
+ self.file_path = file_path
79
+ self.findings = findings
80
+ self.env_stack = []
81
+
82
+ def _push(self):
83
+ self.env_stack.append({})
84
+
85
+ def _pop(self):
86
+ self.env_stack.pop()
87
+
88
+ def _set(self, name, tainted):
89
+ if not self.env_stack:
90
+ self._push()
91
+ self.env_stack[-1][name] = bool(tainted)
92
+
93
+ def _get(self, name):
94
+ for env in reversed(self.env_stack):
95
+ if name in env:
96
+ return env[name]
97
+ return False
98
+
99
+ def _tainted(self, node: ast.AST):
100
+ if _is_interpolated_string(node):
101
+ return True
102
+
103
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "input":
104
+ return True
105
+
106
+ if isinstance(node, (ast.Attribute, ast.Subscript)):
107
+ base = node.value if isinstance(node, ast.Subscript) else node.value
108
+ while isinstance(base, ast.Attribute):
109
+ base = base.value
110
+ if isinstance(base, ast.Name) and base.id == "request":
111
+ return True
112
+
113
+ if isinstance(node, ast.Name):
114
+ return self._get(node.id)
115
+
116
+ if isinstance(node, (ast.Attribute, ast.Subscript)):
117
+ return self._tainted(node.value)
118
+
119
+ if isinstance(node, ast.BinOp):
120
+ return self._tainted(node.left) or self._tainted(node.right)
121
+
122
+ if isinstance(node, ast.Call):
123
+ for arg in node.args:
124
+ if self._tainted(arg):
125
+ return True
126
+ return False
127
+
128
+ return False
129
+
130
+ def _template_is_unsafe_literal(self, node: ast.AST):
131
+
132
+ s = _const_str_value(node)
133
+ if not s:
134
+ return False
135
+
136
+ low = s.lower()
137
+ if "|safe" in low:
138
+ return True
139
+ if "{% autoescape false %}" in low:
140
+ return True
141
+
142
+ return False
143
+
144
+ def _html_built_with_taint(self, node: ast.AST):
145
+
146
+ if isinstance(node, ast.JoinedStr):
147
+ has_html = False
148
+ for v in node.values:
149
+ if isinstance(v, ast.Constant):
150
+ if _const_contains_html(v):
151
+ has_html = True
152
+ break
153
+
154
+ if not has_html:
155
+ return False
156
+
157
+ for v in node.values:
158
+ if isinstance(v, ast.FormattedValue):
159
+ if self._tainted(v.value):
160
+ return True
161
+
162
+ return False
163
+
164
+ if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
165
+ left_html = _const_contains_html(node.left)
166
+ right_html = _const_contains_html(node.right)
167
+ any_html = left_html or right_html
168
+ if not any_html:
169
+ left_html = self._binop_has_html_const(node.left)
170
+ right_html = self._binop_has_html_const(node.right)
171
+ any_html = left_html or right_html
172
+ if not any_html:
173
+ return False
174
+ # taint on either side?
175
+ return self._tainted(node.left) or self._tainted(node.right)
176
+
177
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "format":
178
+ base = node.func.value
179
+ if _const_contains_html(base):
180
+ for a in node.args:
181
+ if self._tainted(a):
182
+ return True
183
+ return False
184
+
185
+ return False
186
+
187
+ def _binop_has_html_const(self, node: ast.AST):
188
+ if _const_contains_html(node):
189
+ return True
190
+ if isinstance(node, ast.BinOp) and isinstance(node.op, (ast.Add, ast.Mod)):
191
+ return self._binop_has_html_const(node.left) or self._binop_has_html_const(node.right)
192
+ return False
193
+
194
+ def visit_FunctionDef(self, node: ast.FunctionDef):
195
+ self._push()
196
+ self.generic_visit(node)
197
+ self._pop()
198
+
199
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
200
+ self._push()
201
+ self.generic_visit(node)
202
+ self._pop()
203
+
204
+ def visit_Assign(self, node: ast.Assign):
205
+ t = self._tainted(node.value)
206
+ for tgt in node.targets:
207
+ if isinstance(tgt, ast.Name):
208
+ self._set(tgt.id, t)
209
+ self.generic_visit(node)
210
+
211
+ def visit_AnnAssign(self, node: ast.AnnAssign):
212
+ if node.value is not None:
213
+ tainted = self._tainted(node.value)
214
+ else:
215
+ tainted = False
216
+
217
+ if isinstance(node.target, ast.Name):
218
+ self._set(node.target.id, tainted)
219
+ self.generic_visit(node)
220
+
221
+ def visit_AugAssign(self, node: ast.AugAssign):
222
+ t = self._tainted(node.target) or self._tainted(node.value)
223
+ if isinstance(node.target, ast.Name):
224
+ self._set(node.target.id, t)
225
+ self.generic_visit(node)
226
+
227
+ def visit_Call(self, node: ast.Call):
228
+ qn = _qualified_name_from_call(node)
229
+
230
+ if qn and node.args:
231
+ func_name = qn.split(".")[-1]
232
+ if func_name in self.SAFE_MARK_FUNCS:
233
+ arg0 = node.args[0]
234
+ is_interp = _is_interpolated_string(arg0)
235
+ is_tainted = self._tainted(arg0)
236
+ if is_interp or is_tainted:
237
+ _add_finding(
238
+ self.findings, self.file_path, node,
239
+ "SKY-D226", "CRITICAL",
240
+ "Possible XSS: untrusted content marked safe"
241
+ )
242
+
243
+ if qn and qn.split(".")[-1] == "render_template_string" and node.args:
244
+ tmpl = node.args[0]
245
+ if self._template_is_unsafe_literal(tmpl):
246
+ _add_finding(
247
+ self.findings, self.file_path, node,
248
+ "SKY-D227", "HIGH",
249
+ "Possible XSS: unsafe inline template disables escaping"
250
+ )
251
+
252
+ self.generic_visit(node)
253
+
254
+ def visit_Return(self, node: ast.Return):
255
+
256
+ if node.value is not None:
257
+ if self._html_built_with_taint(node.value):
258
+ _add_finding(
259
+ self.findings, self.file_path, node,
260
+ "SKY-D228", "HIGH",
261
+ "XSS (HTML built from unescaped user input)"
262
+ )
263
+ self.generic_visit(node)
264
+
265
+ def generic_visit(self, node):
266
+ for field, value in ast.iter_fields(node):
267
+ if isinstance(value, list):
268
+ for item in value:
269
+ if isinstance(item, ast.AST):
270
+ self.visit(item)
271
+ elif isinstance(value, ast.AST):
272
+ self.visit(value)
273
+
274
+ def scan(tree, file_path, findings):
275
+ try:
276
+ checker = _XSSFlowChecker(file_path, findings)
277
+ checker.visit(tree)
278
+ except Exception as e:
279
+ print(f"XSS analysis failed for {file_path}: {e}", file=sys.stderr)