skylos 1.0.10__py3-none-any.whl → 2.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. skylos/__init__.py +9 -3
  2. skylos/analyzer.py +674 -168
  3. skylos/cfg_visitor.py +60 -0
  4. skylos/cli.py +719 -235
  5. skylos/codemods.py +277 -0
  6. skylos/config.py +50 -0
  7. skylos/constants.py +78 -0
  8. skylos/gatekeeper.py +147 -0
  9. skylos/linter.py +18 -0
  10. skylos/rules/base.py +20 -0
  11. skylos/rules/danger/calls.py +119 -0
  12. skylos/rules/danger/danger.py +157 -0
  13. skylos/rules/danger/danger_cmd/cmd_flow.py +75 -0
  14. skylos/rules/danger/danger_fs/__init__.py +0 -0
  15. skylos/rules/danger/danger_fs/path_flow.py +79 -0
  16. skylos/rules/danger/danger_net/__init__.py +0 -0
  17. skylos/rules/danger/danger_net/ssrf_flow.py +80 -0
  18. skylos/rules/danger/danger_sql/__init__.py +0 -0
  19. skylos/rules/danger/danger_sql/sql_flow.py +245 -0
  20. skylos/rules/danger/danger_sql/sql_raw_flow.py +96 -0
  21. skylos/rules/danger/danger_web/__init__.py +0 -0
  22. skylos/rules/danger/danger_web/xss_flow.py +170 -0
  23. skylos/rules/danger/taint.py +110 -0
  24. skylos/rules/quality/__init__.py +0 -0
  25. skylos/rules/quality/complexity.py +95 -0
  26. skylos/rules/quality/logic.py +96 -0
  27. skylos/rules/quality/nesting.py +101 -0
  28. skylos/rules/quality/structure.py +99 -0
  29. skylos/rules/secrets.py +325 -0
  30. skylos/server.py +554 -0
  31. skylos/visitor.py +502 -90
  32. skylos/visitors/__init__.py +0 -0
  33. skylos/visitors/framework_aware.py +437 -0
  34. skylos/visitors/test_aware.py +74 -0
  35. skylos-2.5.2.dist-info/METADATA +21 -0
  36. skylos-2.5.2.dist-info/RECORD +42 -0
  37. {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/WHEEL +1 -1
  38. {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/top_level.txt +0 -1
  39. skylos-1.0.10.dist-info/METADATA +0 -8
  40. skylos-1.0.10.dist-info/RECORD +0 -21
  41. test/compare_tools.py +0 -604
  42. test/diagnostics.py +0 -364
  43. test/sample_repo/app.py +0 -13
  44. test/sample_repo/sample_repo/commands.py +0 -81
  45. test/sample_repo/sample_repo/models.py +0 -122
  46. test/sample_repo/sample_repo/routes.py +0 -89
  47. test/sample_repo/sample_repo/utils.py +0 -36
  48. test/test_skylos.py +0 -456
  49. test/test_visitor.py +0 -220
  50. {test → skylos/rules}/__init__.py +0 -0
  51. {test/sample_repo → skylos/rules/danger}/__init__.py +0 -0
  52. {test/sample_repo/sample_repo → skylos/rules/danger/danger_cmd}/__init__.py +0 -0
  53. {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,325 @@
1
+ from __future__ import annotations
2
+ import re, ast
3
+ from math import log2
4
+
5
+ __all__ = ["scan_ctx"]
6
+
7
+ ALLOWED_FILE_SUFFIXES = (".py", ".pyi", ".pyw")
8
+
9
+ PROVIDER_PATTERNS = [
10
+ ("github", re.compile(r"(ghp|gho|ghu|ghs|ghr|gpat)_[A-Za-z0-9]{36,}")),
11
+ ("gitlab", re.compile(r"glpat-[A-Za-z0-9_-]{20,}")),
12
+ ("slack", re.compile(r"xox[abprs]-[A-Za-z0-9-]{10,48}")),
13
+ ("stripe", re.compile(r"sk_(live|test)_[A-Za-z0-9]{16,}")),
14
+ (
15
+ "aws_access_key_id",
16
+ re.compile(r"\b(AKIA|ASIA|AGPA|AIDA|AROA|AIPA)[0-9A-Z]{16}\b"),
17
+ ),
18
+ ("google_api_key", re.compile(r"\bAIza[0-9A-Za-z\-_]{35}\b")),
19
+ ("sendgrid", re.compile(r"\bSG\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b")),
20
+ ("twilio", re.compile(r"\bSK[0-9a-fA-F]{32}\b")),
21
+ (
22
+ "private_key_block",
23
+ re.compile(r"-----BEGIN (?:RSA|DSA|EC|OPENSSH|PGP) PRIVATE KEY-----"),
24
+ ),
25
+ ]
26
+
27
+ GENERIC_VALUE = re.compile(r"""(?ix)
28
+ (?:
29
+ (token|api[_-]?key|secret|password|passwd|pwd|bearer|auth[_-]?token|access[_-]?token)
30
+ \s*[:=]\s*(?P<q>['"])(?P<val>[^'"]{16,})(?P=q)
31
+ )
32
+ |
33
+ (?P<bare>
34
+ (?=[A-Za-z0-9_-]{32,}\b)
35
+ (?=.*[A-Z])
36
+ (?=.*[a-z])
37
+ (?=.*\d)
38
+ [A-Za-z0-9_-]+
39
+ )
40
+ """)
41
+
42
+ SAFE_TEST_HINTS = {
43
+ "example",
44
+ "sample",
45
+ "fake",
46
+ "placeholder",
47
+ "dummy",
48
+ "test_",
49
+ "_test",
50
+ "test_test_",
51
+ "changeme",
52
+ "password",
53
+ "secret",
54
+ "not_a_real",
55
+ "do_not_use",
56
+ }
57
+
58
+ _IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
59
+
60
+ IGNORE_DIRECTIVE = "skylos: ignore[SKY-S101]"
61
+ DEFAULT_MIN_ENTROPY = 3.9
62
+
63
+ IS_TEST_PATH = re.compile(r"(^|/)(tests?(/|$)|test_[^/]+\.py$)")
64
+
65
+
66
+ def _entropy(s):
67
+ if len(s) == 0:
68
+ return 0.0
69
+
70
+ char_counts = {}
71
+ for character in s:
72
+ if character in char_counts:
73
+ char_counts[character] += 1
74
+ else:
75
+ char_counts[character] = 1
76
+
77
+ total_chars = len(s)
78
+ entropy = 0.0
79
+
80
+ for count in char_counts.values():
81
+ probability = count / total_chars
82
+ entropy -= probability * log2(probability)
83
+
84
+ return entropy
85
+
86
+
87
+ def _mask(tok):
88
+ token_length = len(tok)
89
+
90
+ if token_length <= 8:
91
+ return "*" * token_length
92
+
93
+ else:
94
+ first_part = tok[:4]
95
+ last_part = tok[-4:]
96
+ return first_part + "…" + last_part
97
+
98
+
99
+ def _looks_like_identifier(s):
100
+ return bool(_IDENTIFIER.fullmatch(s))
101
+
102
+
103
+ def _docstring_lines(tree):
104
+ if tree is None:
105
+ return set()
106
+
107
+ docstring_line_numbers = set()
108
+
109
+ def find_docstring_lines(node):
110
+ if not hasattr(node, "body") or not node.body:
111
+ return
112
+
113
+ first_statement = node.body[0]
114
+
115
+ is_expression = isinstance(first_statement, ast.Expr)
116
+ if not is_expression:
117
+ return
118
+
119
+ value = getattr(first_statement, "value", None)
120
+ if not isinstance(value, ast.Constant):
121
+ return
122
+
123
+ if not isinstance(value.value, str):
124
+ return
125
+
126
+ start_line = getattr(first_statement, "lineno", None)
127
+ end_line = getattr(first_statement, "end_lineno", start_line)
128
+
129
+ if start_line is not None:
130
+ if end_line is None:
131
+ end_line = start_line
132
+
133
+ for line_num in range(start_line, end_line + 1):
134
+ docstring_line_numbers.add(line_num)
135
+
136
+ if isinstance(tree, ast.Module):
137
+ find_docstring_lines(tree)
138
+
139
+ for node in ast.walk(tree):
140
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
141
+ find_docstring_lines(node)
142
+
143
+ return docstring_line_numbers
144
+
145
+
146
+ def scan_ctx(
147
+ ctx,
148
+ *,
149
+ min_entropy=DEFAULT_MIN_ENTROPY,
150
+ scan_comments=True,
151
+ scan_docstrings=True,
152
+ allowlist_patterns=None,
153
+ ignore_path_substrings=None,
154
+ ignore_tests=True,
155
+ ):
156
+ rel_path = ctx.get("relpath", "")
157
+ if not rel_path.endswith(ALLOWED_FILE_SUFFIXES):
158
+ return []
159
+
160
+ if ignore_tests and IS_TEST_PATH.search(rel_path.replace("\\", "/")):
161
+ return []
162
+
163
+ if ignore_path_substrings:
164
+ for substring in ignore_path_substrings:
165
+ if substring and substring in rel_path:
166
+ return []
167
+
168
+ file_lines = ctx.get("lines") or []
169
+ syntax_tree = ctx.get("tree")
170
+
171
+ allowlist_regexes = []
172
+ if allowlist_patterns:
173
+ for pattern in allowlist_patterns:
174
+ compiled_regex = re.compile(pattern)
175
+ allowlist_regexes.append(compiled_regex)
176
+
177
+ if scan_docstrings:
178
+ docstring_lines = set()
179
+ else:
180
+ docstring_lines = _docstring_lines(syntax_tree)
181
+
182
+ findings = []
183
+
184
+ for line_number, raw_line in enumerate(file_lines, start=1):
185
+ line_content = raw_line.rstrip("\n")
186
+
187
+ if IGNORE_DIRECTIVE in line_content:
188
+ continue
189
+
190
+ stripped_line = line_content.lstrip()
191
+ if not scan_comments and stripped_line.startswith("#"):
192
+ continue
193
+
194
+ if not scan_docstrings and line_number in docstring_lines:
195
+ continue
196
+
197
+ should_skip_line = False
198
+ for regex_pattern in allowlist_regexes:
199
+ if regex_pattern.search(line_content):
200
+ should_skip_line = True
201
+ break
202
+
203
+ if should_skip_line:
204
+ continue
205
+
206
+ for provider_name, pattern_regex in PROVIDER_PATTERNS:
207
+ pattern_matches = pattern_regex.finditer(line_content)
208
+
209
+ for regex_match in pattern_matches:
210
+ potential_secret = regex_match.group(0)
211
+
212
+ token_lowercase = potential_secret.lower()
213
+ has_safe_hint = False
214
+
215
+ for safe_hint in SAFE_TEST_HINTS:
216
+ if safe_hint in token_lowercase:
217
+ has_safe_hint = True
218
+ break
219
+
220
+ if has_safe_hint:
221
+ continue
222
+
223
+ col_pos = line_content.find(potential_secret)
224
+
225
+ finding = {
226
+ "rule_id": "SKY-S101",
227
+ "severity": "CRITICAL",
228
+ "provider": provider_name,
229
+ "message": f"Potential {provider_name} secret detected",
230
+ "file": rel_path,
231
+ "line": line_number,
232
+ "col": max(0, col_pos),
233
+ "end_col": max(1, col_pos + len(potential_secret)),
234
+ "preview": _mask(potential_secret),
235
+ }
236
+ findings.append(finding)
237
+
238
+ aws_key_indicators = ["AWS_SECRET_ACCESS_KEY", "aws_secret_access_key"]
239
+ line_has_aws_key = False
240
+
241
+ for indicator in aws_key_indicators:
242
+ if indicator in line_content or indicator in line_content.lower():
243
+ line_has_aws_key = True
244
+ break
245
+
246
+ if line_has_aws_key:
247
+ aws_secret_pattern = r"['\"]?([A-Za-z0-9/+=]{40})['\"]?"
248
+ aws_match = re.search(aws_secret_pattern, line_content)
249
+
250
+ if aws_match:
251
+ aws_token = aws_match.group(1)
252
+ tok_entropy = _entropy(aws_token)
253
+
254
+ if tok_entropy >= min_entropy:
255
+ col_pos = line_content.find(aws_token)
256
+
257
+ aws_finding = {
258
+ "rule_id": "SKY-S101",
259
+ "severity": "CRITICAL",
260
+ "provider": "aws_secret_access_key",
261
+ "message": "Potential AWS secret access key detected",
262
+ "file": rel_path,
263
+ "line": line_number,
264
+ "col": max(0, col_pos),
265
+ "end_col": max(1, col_pos + len(aws_token)),
266
+ "preview": _mask(aws_token),
267
+ "entropy": round(tok_entropy, 2),
268
+ }
269
+ findings.append(aws_finding)
270
+
271
+ in_tests = bool(IS_TEST_PATH.search(rel_path.replace("\\", "/")))
272
+
273
+ if in_tests:
274
+ generic_match = None
275
+ else:
276
+ generic_match = GENERIC_VALUE.search(line_content)
277
+
278
+ if generic_match:
279
+ val_group = generic_match.group("val")
280
+ bare_group = generic_match.group("bare")
281
+
282
+ is_bare = False
283
+ if val_group:
284
+ extracted_token = val_group
285
+ elif bare_group:
286
+ extracted_token = bare_group
287
+ is_bare = True
288
+ else:
289
+ extracted_token = ""
290
+
291
+ clean_token = extracted_token.strip()
292
+
293
+ if clean_token:
294
+ if is_bare and _looks_like_identifier(clean_token):
295
+ continue
296
+
297
+ token_lowercase = clean_token.lower()
298
+ has_safe_hint = False
299
+
300
+ for safe_hint in SAFE_TEST_HINTS:
301
+ if safe_hint in token_lowercase:
302
+ has_safe_hint = True
303
+ break
304
+
305
+ if not has_safe_hint:
306
+ tok_entropy = _entropy(clean_token)
307
+
308
+ if tok_entropy >= min_entropy and len(clean_token) >= 20:
309
+ col_pos = line_content.find(clean_token)
310
+
311
+ generic_finding = {
312
+ "rule_id": "SKY-S101",
313
+ "severity": "CRITICAL",
314
+ "provider": "generic",
315
+ "message": f"High-entropy value detected (entropy={tok_entropy:.2f})",
316
+ "file": rel_path,
317
+ "line": line_number,
318
+ "col": max(0, col_pos),
319
+ "end_col": max(1, col_pos + len(clean_token)),
320
+ "preview": _mask(clean_token),
321
+ "entropy": round(tok_entropy, 2),
322
+ }
323
+ findings.append(generic_finding)
324
+
325
+ return findings