skylos 1.0.10__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skylos/__init__.py +9 -3
- skylos/analyzer.py +674 -168
- skylos/cfg_visitor.py +60 -0
- skylos/cli.py +719 -235
- skylos/codemods.py +277 -0
- skylos/config.py +50 -0
- skylos/constants.py +78 -0
- skylos/gatekeeper.py +147 -0
- skylos/linter.py +18 -0
- skylos/rules/base.py +20 -0
- skylos/rules/danger/calls.py +119 -0
- skylos/rules/danger/danger.py +157 -0
- skylos/rules/danger/danger_cmd/cmd_flow.py +75 -0
- skylos/rules/danger/danger_fs/__init__.py +0 -0
- skylos/rules/danger/danger_fs/path_flow.py +79 -0
- skylos/rules/danger/danger_net/__init__.py +0 -0
- skylos/rules/danger/danger_net/ssrf_flow.py +80 -0
- skylos/rules/danger/danger_sql/__init__.py +0 -0
- skylos/rules/danger/danger_sql/sql_flow.py +245 -0
- skylos/rules/danger/danger_sql/sql_raw_flow.py +96 -0
- skylos/rules/danger/danger_web/__init__.py +0 -0
- skylos/rules/danger/danger_web/xss_flow.py +170 -0
- skylos/rules/danger/taint.py +110 -0
- skylos/rules/quality/__init__.py +0 -0
- skylos/rules/quality/complexity.py +95 -0
- skylos/rules/quality/logic.py +96 -0
- skylos/rules/quality/nesting.py +101 -0
- skylos/rules/quality/structure.py +99 -0
- skylos/rules/secrets.py +325 -0
- skylos/server.py +554 -0
- skylos/visitor.py +502 -90
- skylos/visitors/__init__.py +0 -0
- skylos/visitors/framework_aware.py +437 -0
- skylos/visitors/test_aware.py +74 -0
- skylos-2.5.2.dist-info/METADATA +21 -0
- skylos-2.5.2.dist-info/RECORD +42 -0
- {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/WHEEL +1 -1
- {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/top_level.txt +0 -1
- skylos-1.0.10.dist-info/METADATA +0 -8
- skylos-1.0.10.dist-info/RECORD +0 -21
- test/compare_tools.py +0 -604
- test/diagnostics.py +0 -364
- test/sample_repo/app.py +0 -13
- test/sample_repo/sample_repo/commands.py +0 -81
- test/sample_repo/sample_repo/models.py +0 -122
- test/sample_repo/sample_repo/routes.py +0 -89
- test/sample_repo/sample_repo/utils.py +0 -36
- test/test_skylos.py +0 -456
- test/test_visitor.py +0 -220
- {test → skylos/rules}/__init__.py +0 -0
- {test/sample_repo → skylos/rules/danger}/__init__.py +0 -0
- {test/sample_repo/sample_repo → skylos/rules/danger/danger_cmd}/__init__.py +0 -0
- {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/entry_points.txt +0 -0
skylos/rules/secrets.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import re, ast
|
|
3
|
+
from math import log2
|
|
4
|
+
|
|
5
|
+
__all__ = ["scan_ctx"]
|
|
6
|
+
|
|
7
|
+
ALLOWED_FILE_SUFFIXES = (".py", ".pyi", ".pyw")
|
|
8
|
+
|
|
9
|
+
PROVIDER_PATTERNS = [
|
|
10
|
+
("github", re.compile(r"(ghp|gho|ghu|ghs|ghr|gpat)_[A-Za-z0-9]{36,}")),
|
|
11
|
+
("gitlab", re.compile(r"glpat-[A-Za-z0-9_-]{20,}")),
|
|
12
|
+
("slack", re.compile(r"xox[abprs]-[A-Za-z0-9-]{10,48}")),
|
|
13
|
+
("stripe", re.compile(r"sk_(live|test)_[A-Za-z0-9]{16,}")),
|
|
14
|
+
(
|
|
15
|
+
"aws_access_key_id",
|
|
16
|
+
re.compile(r"\b(AKIA|ASIA|AGPA|AIDA|AROA|AIPA)[0-9A-Z]{16}\b"),
|
|
17
|
+
),
|
|
18
|
+
("google_api_key", re.compile(r"\bAIza[0-9A-Za-z\-_]{35}\b")),
|
|
19
|
+
("sendgrid", re.compile(r"\bSG\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b")),
|
|
20
|
+
("twilio", re.compile(r"\bSK[0-9a-fA-F]{32}\b")),
|
|
21
|
+
(
|
|
22
|
+
"private_key_block",
|
|
23
|
+
re.compile(r"-----BEGIN (?:RSA|DSA|EC|OPENSSH|PGP) PRIVATE KEY-----"),
|
|
24
|
+
),
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
GENERIC_VALUE = re.compile(r"""(?ix)
|
|
28
|
+
(?:
|
|
29
|
+
(token|api[_-]?key|secret|password|passwd|pwd|bearer|auth[_-]?token|access[_-]?token)
|
|
30
|
+
\s*[:=]\s*(?P<q>['"])(?P<val>[^'"]{16,})(?P=q)
|
|
31
|
+
)
|
|
32
|
+
|
|
|
33
|
+
(?P<bare>
|
|
34
|
+
(?=[A-Za-z0-9_-]{32,}\b)
|
|
35
|
+
(?=.*[A-Z])
|
|
36
|
+
(?=.*[a-z])
|
|
37
|
+
(?=.*\d)
|
|
38
|
+
[A-Za-z0-9_-]+
|
|
39
|
+
)
|
|
40
|
+
""")
|
|
41
|
+
|
|
42
|
+
SAFE_TEST_HINTS = {
|
|
43
|
+
"example",
|
|
44
|
+
"sample",
|
|
45
|
+
"fake",
|
|
46
|
+
"placeholder",
|
|
47
|
+
"dummy",
|
|
48
|
+
"test_",
|
|
49
|
+
"_test",
|
|
50
|
+
"test_test_",
|
|
51
|
+
"changeme",
|
|
52
|
+
"password",
|
|
53
|
+
"secret",
|
|
54
|
+
"not_a_real",
|
|
55
|
+
"do_not_use",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
_IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
59
|
+
|
|
60
|
+
IGNORE_DIRECTIVE = "skylos: ignore[SKY-S101]"
|
|
61
|
+
DEFAULT_MIN_ENTROPY = 3.9
|
|
62
|
+
|
|
63
|
+
IS_TEST_PATH = re.compile(r"(^|/)(tests?(/|$)|test_[^/]+\.py$)")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _entropy(s):
|
|
67
|
+
if len(s) == 0:
|
|
68
|
+
return 0.0
|
|
69
|
+
|
|
70
|
+
char_counts = {}
|
|
71
|
+
for character in s:
|
|
72
|
+
if character in char_counts:
|
|
73
|
+
char_counts[character] += 1
|
|
74
|
+
else:
|
|
75
|
+
char_counts[character] = 1
|
|
76
|
+
|
|
77
|
+
total_chars = len(s)
|
|
78
|
+
entropy = 0.0
|
|
79
|
+
|
|
80
|
+
for count in char_counts.values():
|
|
81
|
+
probability = count / total_chars
|
|
82
|
+
entropy -= probability * log2(probability)
|
|
83
|
+
|
|
84
|
+
return entropy
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _mask(tok):
|
|
88
|
+
token_length = len(tok)
|
|
89
|
+
|
|
90
|
+
if token_length <= 8:
|
|
91
|
+
return "*" * token_length
|
|
92
|
+
|
|
93
|
+
else:
|
|
94
|
+
first_part = tok[:4]
|
|
95
|
+
last_part = tok[-4:]
|
|
96
|
+
return first_part + "…" + last_part
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _looks_like_identifier(s):
|
|
100
|
+
return bool(_IDENTIFIER.fullmatch(s))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _docstring_lines(tree):
|
|
104
|
+
if tree is None:
|
|
105
|
+
return set()
|
|
106
|
+
|
|
107
|
+
docstring_line_numbers = set()
|
|
108
|
+
|
|
109
|
+
def find_docstring_lines(node):
|
|
110
|
+
if not hasattr(node, "body") or not node.body:
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
first_statement = node.body[0]
|
|
114
|
+
|
|
115
|
+
is_expression = isinstance(first_statement, ast.Expr)
|
|
116
|
+
if not is_expression:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
value = getattr(first_statement, "value", None)
|
|
120
|
+
if not isinstance(value, ast.Constant):
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
if not isinstance(value.value, str):
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
start_line = getattr(first_statement, "lineno", None)
|
|
127
|
+
end_line = getattr(first_statement, "end_lineno", start_line)
|
|
128
|
+
|
|
129
|
+
if start_line is not None:
|
|
130
|
+
if end_line is None:
|
|
131
|
+
end_line = start_line
|
|
132
|
+
|
|
133
|
+
for line_num in range(start_line, end_line + 1):
|
|
134
|
+
docstring_line_numbers.add(line_num)
|
|
135
|
+
|
|
136
|
+
if isinstance(tree, ast.Module):
|
|
137
|
+
find_docstring_lines(tree)
|
|
138
|
+
|
|
139
|
+
for node in ast.walk(tree):
|
|
140
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
141
|
+
find_docstring_lines(node)
|
|
142
|
+
|
|
143
|
+
return docstring_line_numbers
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def scan_ctx(
|
|
147
|
+
ctx,
|
|
148
|
+
*,
|
|
149
|
+
min_entropy=DEFAULT_MIN_ENTROPY,
|
|
150
|
+
scan_comments=True,
|
|
151
|
+
scan_docstrings=True,
|
|
152
|
+
allowlist_patterns=None,
|
|
153
|
+
ignore_path_substrings=None,
|
|
154
|
+
ignore_tests=True,
|
|
155
|
+
):
|
|
156
|
+
rel_path = ctx.get("relpath", "")
|
|
157
|
+
if not rel_path.endswith(ALLOWED_FILE_SUFFIXES):
|
|
158
|
+
return []
|
|
159
|
+
|
|
160
|
+
if ignore_tests and IS_TEST_PATH.search(rel_path.replace("\\", "/")):
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
if ignore_path_substrings:
|
|
164
|
+
for substring in ignore_path_substrings:
|
|
165
|
+
if substring and substring in rel_path:
|
|
166
|
+
return []
|
|
167
|
+
|
|
168
|
+
file_lines = ctx.get("lines") or []
|
|
169
|
+
syntax_tree = ctx.get("tree")
|
|
170
|
+
|
|
171
|
+
allowlist_regexes = []
|
|
172
|
+
if allowlist_patterns:
|
|
173
|
+
for pattern in allowlist_patterns:
|
|
174
|
+
compiled_regex = re.compile(pattern)
|
|
175
|
+
allowlist_regexes.append(compiled_regex)
|
|
176
|
+
|
|
177
|
+
if scan_docstrings:
|
|
178
|
+
docstring_lines = set()
|
|
179
|
+
else:
|
|
180
|
+
docstring_lines = _docstring_lines(syntax_tree)
|
|
181
|
+
|
|
182
|
+
findings = []
|
|
183
|
+
|
|
184
|
+
for line_number, raw_line in enumerate(file_lines, start=1):
|
|
185
|
+
line_content = raw_line.rstrip("\n")
|
|
186
|
+
|
|
187
|
+
if IGNORE_DIRECTIVE in line_content:
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
stripped_line = line_content.lstrip()
|
|
191
|
+
if not scan_comments and stripped_line.startswith("#"):
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
if not scan_docstrings and line_number in docstring_lines:
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
should_skip_line = False
|
|
198
|
+
for regex_pattern in allowlist_regexes:
|
|
199
|
+
if regex_pattern.search(line_content):
|
|
200
|
+
should_skip_line = True
|
|
201
|
+
break
|
|
202
|
+
|
|
203
|
+
if should_skip_line:
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
for provider_name, pattern_regex in PROVIDER_PATTERNS:
|
|
207
|
+
pattern_matches = pattern_regex.finditer(line_content)
|
|
208
|
+
|
|
209
|
+
for regex_match in pattern_matches:
|
|
210
|
+
potential_secret = regex_match.group(0)
|
|
211
|
+
|
|
212
|
+
token_lowercase = potential_secret.lower()
|
|
213
|
+
has_safe_hint = False
|
|
214
|
+
|
|
215
|
+
for safe_hint in SAFE_TEST_HINTS:
|
|
216
|
+
if safe_hint in token_lowercase:
|
|
217
|
+
has_safe_hint = True
|
|
218
|
+
break
|
|
219
|
+
|
|
220
|
+
if has_safe_hint:
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
col_pos = line_content.find(potential_secret)
|
|
224
|
+
|
|
225
|
+
finding = {
|
|
226
|
+
"rule_id": "SKY-S101",
|
|
227
|
+
"severity": "CRITICAL",
|
|
228
|
+
"provider": provider_name,
|
|
229
|
+
"message": f"Potential {provider_name} secret detected",
|
|
230
|
+
"file": rel_path,
|
|
231
|
+
"line": line_number,
|
|
232
|
+
"col": max(0, col_pos),
|
|
233
|
+
"end_col": max(1, col_pos + len(potential_secret)),
|
|
234
|
+
"preview": _mask(potential_secret),
|
|
235
|
+
}
|
|
236
|
+
findings.append(finding)
|
|
237
|
+
|
|
238
|
+
aws_key_indicators = ["AWS_SECRET_ACCESS_KEY", "aws_secret_access_key"]
|
|
239
|
+
line_has_aws_key = False
|
|
240
|
+
|
|
241
|
+
for indicator in aws_key_indicators:
|
|
242
|
+
if indicator in line_content or indicator in line_content.lower():
|
|
243
|
+
line_has_aws_key = True
|
|
244
|
+
break
|
|
245
|
+
|
|
246
|
+
if line_has_aws_key:
|
|
247
|
+
aws_secret_pattern = r"['\"]?([A-Za-z0-9/+=]{40})['\"]?"
|
|
248
|
+
aws_match = re.search(aws_secret_pattern, line_content)
|
|
249
|
+
|
|
250
|
+
if aws_match:
|
|
251
|
+
aws_token = aws_match.group(1)
|
|
252
|
+
tok_entropy = _entropy(aws_token)
|
|
253
|
+
|
|
254
|
+
if tok_entropy >= min_entropy:
|
|
255
|
+
col_pos = line_content.find(aws_token)
|
|
256
|
+
|
|
257
|
+
aws_finding = {
|
|
258
|
+
"rule_id": "SKY-S101",
|
|
259
|
+
"severity": "CRITICAL",
|
|
260
|
+
"provider": "aws_secret_access_key",
|
|
261
|
+
"message": "Potential AWS secret access key detected",
|
|
262
|
+
"file": rel_path,
|
|
263
|
+
"line": line_number,
|
|
264
|
+
"col": max(0, col_pos),
|
|
265
|
+
"end_col": max(1, col_pos + len(aws_token)),
|
|
266
|
+
"preview": _mask(aws_token),
|
|
267
|
+
"entropy": round(tok_entropy, 2),
|
|
268
|
+
}
|
|
269
|
+
findings.append(aws_finding)
|
|
270
|
+
|
|
271
|
+
in_tests = bool(IS_TEST_PATH.search(rel_path.replace("\\", "/")))
|
|
272
|
+
|
|
273
|
+
if in_tests:
|
|
274
|
+
generic_match = None
|
|
275
|
+
else:
|
|
276
|
+
generic_match = GENERIC_VALUE.search(line_content)
|
|
277
|
+
|
|
278
|
+
if generic_match:
|
|
279
|
+
val_group = generic_match.group("val")
|
|
280
|
+
bare_group = generic_match.group("bare")
|
|
281
|
+
|
|
282
|
+
is_bare = False
|
|
283
|
+
if val_group:
|
|
284
|
+
extracted_token = val_group
|
|
285
|
+
elif bare_group:
|
|
286
|
+
extracted_token = bare_group
|
|
287
|
+
is_bare = True
|
|
288
|
+
else:
|
|
289
|
+
extracted_token = ""
|
|
290
|
+
|
|
291
|
+
clean_token = extracted_token.strip()
|
|
292
|
+
|
|
293
|
+
if clean_token:
|
|
294
|
+
if is_bare and _looks_like_identifier(clean_token):
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
token_lowercase = clean_token.lower()
|
|
298
|
+
has_safe_hint = False
|
|
299
|
+
|
|
300
|
+
for safe_hint in SAFE_TEST_HINTS:
|
|
301
|
+
if safe_hint in token_lowercase:
|
|
302
|
+
has_safe_hint = True
|
|
303
|
+
break
|
|
304
|
+
|
|
305
|
+
if not has_safe_hint:
|
|
306
|
+
tok_entropy = _entropy(clean_token)
|
|
307
|
+
|
|
308
|
+
if tok_entropy >= min_entropy and len(clean_token) >= 20:
|
|
309
|
+
col_pos = line_content.find(clean_token)
|
|
310
|
+
|
|
311
|
+
generic_finding = {
|
|
312
|
+
"rule_id": "SKY-S101",
|
|
313
|
+
"severity": "CRITICAL",
|
|
314
|
+
"provider": "generic",
|
|
315
|
+
"message": f"High-entropy value detected (entropy={tok_entropy:.2f})",
|
|
316
|
+
"file": rel_path,
|
|
317
|
+
"line": line_number,
|
|
318
|
+
"col": max(0, col_pos),
|
|
319
|
+
"end_col": max(1, col_pos + len(clean_token)),
|
|
320
|
+
"preview": _mask(clean_token),
|
|
321
|
+
"entropy": round(tok_entropy, 2),
|
|
322
|
+
}
|
|
323
|
+
findings.append(generic_finding)
|
|
324
|
+
|
|
325
|
+
return findings
|