skylos 2.2.3__tar.gz → 2.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skylos might be problematic. Click here for more details.
- {skylos-2.2.3 → skylos-2.2.4}/PKG-INFO +1 -1
- {skylos-2.2.3 → skylos-2.2.4}/README.md +90 -84
- {skylos-2.2.3 → skylos-2.2.4}/pyproject.toml +1 -1
- {skylos-2.2.3 → skylos-2.2.4}/setup.py +1 -1
- {skylos-2.2.3 → skylos-2.2.4}/skylos/__init__.py +1 -1
- {skylos-2.2.3 → skylos-2.2.4}/skylos/analyzer.py +28 -7
- {skylos-2.2.3 → skylos-2.2.4}/skylos/cli.py +16 -2
- skylos-2.2.4/skylos/rules/dangerous.py +135 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/rules/secrets.py +34 -5
- {skylos-2.2.3 → skylos-2.2.4}/skylos.egg-info/PKG-INFO +1 -1
- {skylos-2.2.3 → skylos-2.2.4}/skylos.egg-info/SOURCES.txt +2 -0
- skylos-2.2.4/test/test_dangerous.py +70 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_secrets.py +24 -10
- {skylos-2.2.3 → skylos-2.2.4}/setup.cfg +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/codemods.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/constants.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/rules/__init__.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/server.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/visitor.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/visitors/__init__.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/visitors/framework_aware.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos/visitors/test_aware.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos.egg-info/dependency_links.txt +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos.egg-info/entry_points.txt +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos.egg-info/requires.txt +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/skylos.egg-info/top_level.txt +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/__init__.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/compare_tools.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/conftest.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/diagnostics.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/sample_repo/__init__.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/sample_repo/app.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/sample_repo/sample_repo/__init__.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/sample_repo/sample_repo/commands.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/sample_repo/sample_repo/models.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/sample_repo/sample_repo/routes.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/sample_repo/sample_repo/utils.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_analyzer.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_changes_analyzer.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_cli.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_codemods.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_constants.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_framework_aware.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_integration.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_new_behaviours.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_skylos.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_test_aware.py +0 -0
- {skylos-2.2.3 → skylos-2.2.4}/test/test_visitor.py +0 -0
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
* **Folder Management**: Inclusion/exclusion of directories
|
|
54
54
|
* **Ignore Pragmas**: Skip lines tagged with `# pragma: no skylos`, `# pragma: no cover`, or `# noqa`
|
|
55
55
|
**NEW** **Secrets Scanning (PoC, opt-in)**: Detects API keys & secrets (GitHub, GitLab, Slack, Stripe, AWS, Google, SendGrid, Twilio, private key blocks)
|
|
56
|
-
|
|
56
|
+
**NEW** **Dangerous Patterns**: Flags risky code such as `eval/exec`, `os.system`, `subprocess(shell=True)`, `pickle.load/loads`, `yaml.load` without SafeLoader, hashlib.md5/sha1. Refer to `DANGEROUS_CODE.md` for the whole list.
|
|
57
57
|
|
|
58
58
|
## Benchmark (You can find this benchmark test in `test` folder)
|
|
59
59
|
|
|
@@ -98,6 +98,7 @@ pip install .
|
|
|
98
98
|
skylos /path/to/your/project
|
|
99
99
|
|
|
100
100
|
skylos /path/to/your/project --secrets ## include api key scan
|
|
101
|
+
skylos /path/to/your/project --danger ## include safety scan for dangerous code
|
|
101
102
|
|
|
102
103
|
# To launch the front end
|
|
103
104
|
skylos run
|
|
@@ -262,6 +263,7 @@ Options:
|
|
|
262
263
|
--list-default-excludes List the default excluded folders and
|
|
263
264
|
-c, --confidence LEVEL Confidence threshold (0-100). Lower values will show more items.
|
|
264
265
|
-- secrets Scan for api keys/secrets
|
|
266
|
+
-- danger Scan for dangerous code
|
|
265
267
|
```
|
|
266
268
|
|
|
267
269
|
## Interactive Mode
|
|
@@ -276,22 +278,97 @@ The interactive mode lets you select specific functions and imports to remove:
|
|
|
276
278
|
|
|
277
279
|
Pick **one** (or use **both**)
|
|
278
280
|
|
|
279
|
-
1.
|
|
281
|
+
1. GitHub Actions: runs Skylos on pushes/PRs in CI.
|
|
282
|
+
- No local install needed
|
|
283
|
+
|
|
284
|
+
2. Pre-commit (local + CI): runs Skylos before commits/PRs.
|
|
280
285
|
- You must install pre-commit locally once. Skylos gets installed automatically by the hook.
|
|
281
286
|
|
|
282
|
-
|
|
283
|
-
|
|
287
|
+
### Option A — Github Actions
|
|
288
|
+
|
|
289
|
+
1. Create .github/workflows/skylos.yml **(COPY THE ENTIRE SKYLOS.YAML FROM BELOW)**:
|
|
290
|
+
|
|
291
|
+
```yaml
|
|
292
|
+
name: Skylos Deadcode Scan
|
|
293
|
+
|
|
294
|
+
on:
|
|
295
|
+
pull_request:
|
|
296
|
+
push:
|
|
297
|
+
branches: [ main, master ]
|
|
298
|
+
workflow_dispatch:
|
|
299
|
+
|
|
300
|
+
jobs:
|
|
301
|
+
scan:
|
|
302
|
+
runs-on: ubuntu-latest
|
|
303
|
+
env:
|
|
304
|
+
SKYLOS_STRICT: ${{ vars.SKYLOS_STRICT || 'false' }}
|
|
305
|
+
steps:
|
|
306
|
+
- uses: actions/checkout@v4
|
|
307
|
+
|
|
308
|
+
- uses: actions/setup-python@v5
|
|
309
|
+
with:
|
|
310
|
+
python-version: '3.11'
|
|
311
|
+
cache: 'pip'
|
|
312
|
+
|
|
313
|
+
- name: Install Skylos
|
|
314
|
+
run: pip install skylos
|
|
315
|
+
|
|
316
|
+
- name: Run Skylos
|
|
317
|
+
env:
|
|
318
|
+
REPORT: skylos_${{ github.run_number }}_${{ github.sha }}.json
|
|
319
|
+
run: |
|
|
320
|
+
echo "REPORT=$REPORT" >> "$GITHUB_OUTPUT"
|
|
321
|
+
skylos . --json > "$REPORT"
|
|
322
|
+
id: scan
|
|
323
|
+
|
|
324
|
+
- name: Fail if there are findings
|
|
325
|
+
continue-on-error: ${{ env.SKYLOS_STRICT != 'true' }}
|
|
326
|
+
env:
|
|
327
|
+
REPORT: ${{ steps.scan.outputs.REPORT }}
|
|
328
|
+
run: |
|
|
329
|
+
python - << 'PY'
|
|
330
|
+
import json, sys, os
|
|
331
|
+
report = os.environ["REPORT"]
|
|
332
|
+
data = json.load(open(report, "r", encoding="utf-8"))
|
|
333
|
+
count = 0
|
|
334
|
+
for value in data.values():
|
|
335
|
+
if isinstance(value, list):
|
|
336
|
+
count += len(value)
|
|
337
|
+
print(f"Findings: {count}")
|
|
338
|
+
if count > 0:
|
|
339
|
+
print(f"::warning title=Skylos findings::{count} potential issues found. See {report}")
|
|
340
|
+
sys.exit(1 if count > 0 else 0)
|
|
341
|
+
PY
|
|
342
|
+
|
|
343
|
+
- name: Upload report artifact
|
|
344
|
+
if: always()
|
|
345
|
+
uses: actions/upload-artifact@v4
|
|
346
|
+
with:
|
|
347
|
+
name: ${{ steps.scan.outputs.REPORT }}
|
|
348
|
+
path: ${{ steps.scan.outputs.REPORT }}
|
|
349
|
+
|
|
350
|
+
- name: Summarize in job log
|
|
351
|
+
if: always()
|
|
352
|
+
run: |
|
|
353
|
+
echo "Skylos report: ${{ steps.scan.outputs.REPORT }}" >> $GITHUB_STEP_SUMMARY
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
**To make the job fail on findings (strict mode)**:
|
|
357
|
+
|
|
358
|
+
1. Go to GitHub -> Settings -> Secrets and variables -> Actions -> Variables
|
|
359
|
+
|
|
360
|
+
2. Add variable SKYLOS_STRICT with value true
|
|
284
361
|
|
|
285
|
-
### Option
|
|
362
|
+
### Option B — Pre-commit (local + CI)
|
|
286
363
|
|
|
287
|
-
|
|
364
|
+
. Create or edit `.pre-commit-config.yaml` at the repo root:
|
|
288
365
|
|
|
289
366
|
**A: Skylos hook repo**
|
|
290
367
|
```yaml
|
|
291
368
|
## .pre-commit-config.yaml
|
|
292
369
|
repos:
|
|
293
370
|
- repo: https://github.com/duriantaco/skylos
|
|
294
|
-
rev: v2.2.
|
|
371
|
+
rev: v2.2.4
|
|
295
372
|
hooks:
|
|
296
373
|
- id: skylos-scan
|
|
297
374
|
name: skylos report
|
|
@@ -300,7 +377,7 @@ repos:
|
|
|
300
377
|
types_or: [python]
|
|
301
378
|
pass_filenames: false
|
|
302
379
|
require_serial: true
|
|
303
|
-
args: [".", "--output", "report.json", "--confidence", "70"]
|
|
380
|
+
args: [".", "--output", "report.json", "--confidence", "70", "--danger"]
|
|
304
381
|
|
|
305
382
|
- repo: local
|
|
306
383
|
hooks:
|
|
@@ -341,7 +418,7 @@ repos:
|
|
|
341
418
|
entry: python -m skylos.cli
|
|
342
419
|
pass_filenames: false
|
|
343
420
|
require_serial: true
|
|
344
|
-
additional_dependencies: [skylos==2.2.
|
|
421
|
+
additional_dependencies: [skylos==2.2.4]
|
|
345
422
|
args: [".", "--output", "report.json", "--confidence", "70"]
|
|
346
423
|
|
|
347
424
|
- id: skylos-fail-on-findings
|
|
@@ -397,81 +474,6 @@ jobs:
|
|
|
397
474
|
|
|
398
475
|
**Pre commit behavior:** the second hook is soft by default (SKYLOS_SOFT=1). This means that it prints findings and passes. You can remove the env/logic if you want pre-commit to block commits on finding
|
|
399
476
|
|
|
400
|
-
### Option B — Github Actions
|
|
401
|
-
|
|
402
|
-
1. Create .github/workflows/skylos.yml:
|
|
403
|
-
|
|
404
|
-
```yaml
|
|
405
|
-
name: Skylos Deadcode Scan
|
|
406
|
-
|
|
407
|
-
on:
|
|
408
|
-
pull_request:
|
|
409
|
-
push:
|
|
410
|
-
branches: [ main, master ]
|
|
411
|
-
workflow_dispatch:
|
|
412
|
-
|
|
413
|
-
jobs:
|
|
414
|
-
scan:
|
|
415
|
-
runs-on: ubuntu-latest
|
|
416
|
-
env:
|
|
417
|
-
SKYLOS_STRICT: ${{ vars.SKYLOS_STRICT || 'false' }}
|
|
418
|
-
steps:
|
|
419
|
-
- uses: actions/checkout@v4
|
|
420
|
-
|
|
421
|
-
- uses: actions/setup-python@v5
|
|
422
|
-
with:
|
|
423
|
-
python-version: '3.11'
|
|
424
|
-
cache: 'pip'
|
|
425
|
-
|
|
426
|
-
- name: Install Skylos
|
|
427
|
-
run: pip install skylos
|
|
428
|
-
|
|
429
|
-
- name: Run Skylos
|
|
430
|
-
env:
|
|
431
|
-
REPORT: skylos_${{ github.run_number }}_${{ github.sha }}.json
|
|
432
|
-
run: |
|
|
433
|
-
echo "REPORT=$REPORT" >> "$GITHUB_OUTPUT"
|
|
434
|
-
skylos . --json > "$REPORT"
|
|
435
|
-
id: scan
|
|
436
|
-
|
|
437
|
-
- name: Fail if there are findings
|
|
438
|
-
continue-on-error: ${{ env.SKYLOS_STRICT != 'true' }}
|
|
439
|
-
env:
|
|
440
|
-
REPORT: ${{ steps.scan.outputs.REPORT }}
|
|
441
|
-
run: |
|
|
442
|
-
python - << 'PY'
|
|
443
|
-
import json, sys, os
|
|
444
|
-
report = os.environ["REPORT"]
|
|
445
|
-
data = json.load(open(report, "r", encoding="utf-8"))
|
|
446
|
-
count = 0
|
|
447
|
-
for value in data.values():
|
|
448
|
-
if isinstance(value, list):
|
|
449
|
-
count += len(value)
|
|
450
|
-
print(f"Findings: {count}")
|
|
451
|
-
if count > 0:
|
|
452
|
-
print(f"::warning title=Skylos findings::{count} potential issues found. See {report}")
|
|
453
|
-
sys.exit(1 if count > 0 else 0)
|
|
454
|
-
PY
|
|
455
|
-
|
|
456
|
-
- name: Upload report artifact
|
|
457
|
-
if: always()
|
|
458
|
-
uses: actions/upload-artifact@v4
|
|
459
|
-
with:
|
|
460
|
-
name: ${{ steps.scan.outputs.REPORT }}
|
|
461
|
-
path: ${{ steps.scan.outputs.REPORT }}
|
|
462
|
-
|
|
463
|
-
- name: Summarize in job log
|
|
464
|
-
if: always()
|
|
465
|
-
run: |
|
|
466
|
-
echo "Skylos report: ${{ steps.scan.outputs.REPORT }}" >> $GITHUB_STEP_SUMMARY
|
|
467
|
-
```
|
|
468
|
-
|
|
469
|
-
**To make the job fail on findings (strict mode)**:
|
|
470
|
-
|
|
471
|
-
1. Go to GitHub -> Settings -> Secrets and variables -> Actions -> Variables
|
|
472
|
-
|
|
473
|
-
2. Add variable SKYLOS_STRICT with value true
|
|
474
|
-
|
|
475
477
|
## Development
|
|
476
478
|
|
|
477
479
|
### Prerequisites
|
|
@@ -519,6 +521,9 @@ A: Web framework routes are given low confidence (20) because they might be call
|
|
|
519
521
|
**Q: What confidence level should I use?**
|
|
520
522
|
A: Start with 60 (default) for safe cleanup. Use 30 for framework applications. Use 20 for more comprehensive auditing.
|
|
521
523
|
|
|
524
|
+
**Q: What does `--danger` check**?
|
|
525
|
+
A: It flags common security problems. Refer to `DANGEROUS_CODE.md` for the full details
|
|
526
|
+
|
|
522
527
|
## Limitations
|
|
523
528
|
|
|
524
529
|
- **Dynamic code**: `getattr()`, `globals()`, runtime imports are hard to detect
|
|
@@ -562,6 +567,7 @@ We welcome contributions! Please read our [Contributing Guidelines](CONTRIBUTING
|
|
|
562
567
|
- [x] CI/CD integration examples
|
|
563
568
|
- [ ] Further optimization
|
|
564
569
|
- [ ] Add new rules
|
|
570
|
+
- [ ] Expanding on the `dangerous.py` list
|
|
565
571
|
|
|
566
572
|
## License
|
|
567
573
|
|
|
@@ -9,6 +9,7 @@ from skylos.visitor import Visitor
|
|
|
9
9
|
from skylos.constants import ( PENALTIES, AUTO_CALLED )
|
|
10
10
|
from skylos.visitors.test_aware import TestAwareVisitor
|
|
11
11
|
from skylos.rules.secrets import scan_ctx as _secrets_scan_ctx
|
|
12
|
+
from skylos.rules.dangerous import scan_ctx as scan_dangerous
|
|
12
13
|
import os
|
|
13
14
|
import traceback
|
|
14
15
|
from skylos.visitors.framework_aware import FrameworkAwareVisitor, detect_framework_usage
|
|
@@ -238,7 +239,7 @@ class Skylos:
|
|
|
238
239
|
if method.simple_name == "format" and cls.endswith("Formatter"):
|
|
239
240
|
method.references += 1
|
|
240
241
|
|
|
241
|
-
def analyze(self, path, thr=60, exclude_folders= None, enable_secrets = False):
|
|
242
|
+
def analyze(self, path, thr=60, exclude_folders= None, enable_secrets = False, enable_dangerous = False):
|
|
242
243
|
files, root = self._get_python_files(path, exclude_folders)
|
|
243
244
|
|
|
244
245
|
if not files:
|
|
@@ -262,6 +263,7 @@ class Skylos:
|
|
|
262
263
|
modmap[f] = self._module(root, f)
|
|
263
264
|
|
|
264
265
|
all_secrets = []
|
|
266
|
+
all_dangers = []
|
|
265
267
|
for file in files:
|
|
266
268
|
mod = modmap[file]
|
|
267
269
|
defs, refs, dyn, exports, test_flags, framework_flags = proc_file(file, mod)
|
|
@@ -276,13 +278,23 @@ class Skylos:
|
|
|
276
278
|
|
|
277
279
|
if enable_secrets and _secrets_scan_ctx is not None:
|
|
278
280
|
try:
|
|
279
|
-
|
|
280
|
-
|
|
281
|
+
src = Path(file).read_text(encoding="utf-8", errors="ignore")
|
|
282
|
+
src_lines = src.splitlines(True)
|
|
283
|
+
rel = str(Path(file).relative_to(root))
|
|
284
|
+
ctx = {"relpath": rel, "lines": src_lines, "tree": None}
|
|
281
285
|
findings = list(_secrets_scan_ctx(ctx))
|
|
282
286
|
if findings:
|
|
283
287
|
all_secrets.extend(findings)
|
|
284
288
|
except Exception:
|
|
285
289
|
pass
|
|
290
|
+
|
|
291
|
+
if enable_dangerous and scan_dangerous is not None:
|
|
292
|
+
try:
|
|
293
|
+
findings = scan_dangerous(root, [file])
|
|
294
|
+
if findings:
|
|
295
|
+
all_dangers.extend(findings)
|
|
296
|
+
except Exception:
|
|
297
|
+
pass
|
|
286
298
|
|
|
287
299
|
self._mark_refs()
|
|
288
300
|
self._apply_heuristics()
|
|
@@ -296,7 +308,6 @@ class Skylos:
|
|
|
296
308
|
for d in sorted(self.defs.values(), key=def_sort_key):
|
|
297
309
|
if shown >= 50:
|
|
298
310
|
break
|
|
299
|
-
print(f" type={d.type} refs={d.references} conf={d.confidence} exported={d.is_exported} line={d.line} name={d.name}")
|
|
300
311
|
shown += 1
|
|
301
312
|
|
|
302
313
|
unused = []
|
|
@@ -318,7 +329,12 @@ class Skylos:
|
|
|
318
329
|
|
|
319
330
|
if enable_secrets and all_secrets:
|
|
320
331
|
result["secrets"] = all_secrets
|
|
332
|
+
result["analysis_summary"]["secrets_count"] = len(all_secrets)
|
|
321
333
|
|
|
334
|
+
if enable_dangerous and all_dangers:
|
|
335
|
+
result["dangerous"] = all_dangers
|
|
336
|
+
result["analysis_summary"]["dangerous_count"] = len(all_dangers)
|
|
337
|
+
|
|
322
338
|
for u in unused:
|
|
323
339
|
if u["type"] in ("function", "method"):
|
|
324
340
|
result["unused_functions"].append(u)
|
|
@@ -370,13 +386,18 @@ def proc_file(file_or_args, mod=None):
|
|
|
370
386
|
|
|
371
387
|
return [], [], set(), set(), dummy_visitor, dummy_framework_visitor
|
|
372
388
|
|
|
373
|
-
def analyze(path, conf=60, exclude_folders=None, enable_secrets=False):
|
|
374
|
-
return Skylos().analyze(path,conf, exclude_folders, enable_secrets)
|
|
389
|
+
def analyze(path, conf=60, exclude_folders=None, enable_secrets=False, enable_dangerous=False):
|
|
390
|
+
return Skylos().analyze(path,conf, exclude_folders, enable_secrets, enable_dangerous)
|
|
375
391
|
|
|
376
392
|
if __name__ == "__main__":
|
|
377
393
|
if len(sys.argv)>1:
|
|
378
394
|
p = sys.argv[1]
|
|
379
|
-
|
|
395
|
+
|
|
396
|
+
if len(sys.argv) > 2:
|
|
397
|
+
confidence = int(sys.argv[2])
|
|
398
|
+
else:
|
|
399
|
+
confidence = 60
|
|
400
|
+
|
|
380
401
|
result = analyze(p,confidence)
|
|
381
402
|
|
|
382
403
|
data = json.loads(result)
|
|
@@ -43,7 +43,7 @@ def setup_logger(output_file=None):
|
|
|
43
43
|
|
|
44
44
|
formatter = CleanFormatter()
|
|
45
45
|
|
|
46
|
-
console_handler = logging.StreamHandler(sys.
|
|
46
|
+
console_handler = logging.StreamHandler(sys.stderr)
|
|
47
47
|
console_handler.setFormatter(formatter)
|
|
48
48
|
logger.addHandler(console_handler)
|
|
49
49
|
|
|
@@ -270,6 +270,9 @@ def main():
|
|
|
270
270
|
parser.add_argument("--secrets", action="store_true",
|
|
271
271
|
help="Scan for API keys. Off by default.")
|
|
272
272
|
|
|
273
|
+
parser.add_argument("--danger", action="store_true",
|
|
274
|
+
help="Scan for security issues. Off by default.")
|
|
275
|
+
|
|
273
276
|
args = parser.parse_args()
|
|
274
277
|
|
|
275
278
|
if args.list_default_excludes:
|
|
@@ -305,6 +308,11 @@ def main():
|
|
|
305
308
|
|
|
306
309
|
try:
|
|
307
310
|
result_json = run_analyze(args.path, conf=args.confidence, enable_secrets=bool(args.secrets), exclude_folders=list(final_exclude_folders))
|
|
311
|
+
|
|
312
|
+
if args.json:
|
|
313
|
+
print(result_json)
|
|
314
|
+
return
|
|
315
|
+
|
|
308
316
|
result = json.loads(result_json)
|
|
309
317
|
|
|
310
318
|
except Exception as e:
|
|
@@ -312,8 +320,14 @@ def main():
|
|
|
312
320
|
sys.exit(1)
|
|
313
321
|
|
|
314
322
|
if args.json:
|
|
315
|
-
|
|
323
|
+
lg = logging.getLogger('skylos')
|
|
324
|
+
for h in list(lg.handlers):
|
|
325
|
+
if isinstance(h, logging.StreamHandler):
|
|
326
|
+
lg.removeHandler(h)
|
|
327
|
+
print(result_json)
|
|
316
328
|
return
|
|
329
|
+
|
|
330
|
+
result = json.loads(result_json)
|
|
317
331
|
|
|
318
332
|
unused_functions = result.get("unused_functions", [])
|
|
319
333
|
unused_imports = result.get("unused_imports", [])
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import ast
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
ALLOWED_SUFFIXES = (".py", ".pyi", ".pyw")
|
|
6
|
+
|
|
7
|
+
## will expand this list later with more rules
|
|
8
|
+
DANGEROUS_CALLS = {
|
|
9
|
+
"eval": ("SKY-D201", "HIGH", "Use of eval()"),
|
|
10
|
+
"exec": ("SKY-D202", "HIGH", "Use of exec()"),
|
|
11
|
+
"os.system": ("SKY-D203", "MEDIUM", "Use of os.system"),
|
|
12
|
+
"pickle.load": ("SKY-D204", "CRITICAL", "Untrusted deserialization via pickle.load"),
|
|
13
|
+
"pickle.loads": ("SKY-D205", "CRITICAL", "Untrusted deserialization via pickle.loads"),
|
|
14
|
+
"yaml.load": ("SKY-D206", "HIGH", "yaml.load without SafeLoader"),
|
|
15
|
+
"hashlib.md5": ("SKY-D207", "MEDIUM", "Weak hash (MD5)"),
|
|
16
|
+
"hashlib.sha1": ("SKY-D208", "MEDIUM", "Weak hash (SHA1)"),
|
|
17
|
+
## this is for arguments like process
|
|
18
|
+
"subprocess.*": ("SKY-D209", "HIGH", "subprocess.* with shell=True",
|
|
19
|
+
{"kw_equals": {"shell": True}}),
|
|
20
|
+
|
|
21
|
+
"requests.*": ("SKY-D210", "HIGH", "requests call with verify=False",
|
|
22
|
+
{"kw_equals": {"verify": False}}),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
def _matches_rule(name, rule_key):
|
|
26
|
+
if not name:
|
|
27
|
+
return False
|
|
28
|
+
if rule_key.endswith(".*"):
|
|
29
|
+
return name.startswith(rule_key[:-2] + ".")
|
|
30
|
+
return name == rule_key
|
|
31
|
+
|
|
32
|
+
def _kw_equals(node: ast.Call, requirements):
|
|
33
|
+
if not requirements:
|
|
34
|
+
return True
|
|
35
|
+
kw_map = {}
|
|
36
|
+
keywords = node.keywords or []
|
|
37
|
+
for kw in keywords:
|
|
38
|
+
if kw.arg:
|
|
39
|
+
kw_map[kw.arg] = kw.value
|
|
40
|
+
|
|
41
|
+
for key, expected in requirements.items():
|
|
42
|
+
val = kw_map.get(key)
|
|
43
|
+
if not isinstance(val, ast.Constant):
|
|
44
|
+
return False
|
|
45
|
+
if val.value is not expected:
|
|
46
|
+
return False
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
def qualified_name_from_call(node: ast.Call):
|
|
50
|
+
f = node.func
|
|
51
|
+
parts = []
|
|
52
|
+
while isinstance(f, ast.Attribute):
|
|
53
|
+
parts.append(f.attr)
|
|
54
|
+
f = f.value
|
|
55
|
+
if isinstance(f, ast.Name):
|
|
56
|
+
parts.append(f.id)
|
|
57
|
+
parts.reverse()
|
|
58
|
+
return ".".join(parts)
|
|
59
|
+
if isinstance(f, ast.Name):
|
|
60
|
+
return f.id
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
def _yaml_load_without_safeloader(node: ast.Call):
|
|
64
|
+
name = qualified_name_from_call(node)
|
|
65
|
+
if name != "yaml.load":
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
for kw in node.keywords or []:
|
|
69
|
+
if kw.arg == "Loader":
|
|
70
|
+
try:
|
|
71
|
+
text = ast.unparse(kw.value)
|
|
72
|
+
return "SafeLoader" not in text
|
|
73
|
+
except Exception:
|
|
74
|
+
return True
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
def _add_finding(findings,
|
|
78
|
+
file_path: Path,
|
|
79
|
+
node: ast.AST,
|
|
80
|
+
rule_id,
|
|
81
|
+
severity,
|
|
82
|
+
message):
|
|
83
|
+
findings.append({
|
|
84
|
+
"rule_id": rule_id,
|
|
85
|
+
"severity": severity,
|
|
86
|
+
"message": message,
|
|
87
|
+
"file": str(file_path),
|
|
88
|
+
"line": getattr(node, "lineno", 1),
|
|
89
|
+
"col": getattr(node, "col_offset", 0),
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
def scan_ctx(root, files):
|
|
93
|
+
findings = []
|
|
94
|
+
|
|
95
|
+
for file_path in files:
|
|
96
|
+
if file_path.suffix.lower() not in ALLOWED_SUFFIXES:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
src = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
101
|
+
tree = ast.parse(src)
|
|
102
|
+
except Exception:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
for node in ast.walk(tree):
|
|
106
|
+
if not isinstance(node, ast.Call):
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
name = qualified_name_from_call(node)
|
|
110
|
+
if not name:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
for rule_key, tup in DANGEROUS_CALLS.items():
|
|
114
|
+
rule_id, severity, message, *rest = tup
|
|
115
|
+
|
|
116
|
+
if rest:
|
|
117
|
+
opts = rest[0]
|
|
118
|
+
else:
|
|
119
|
+
opts = None
|
|
120
|
+
|
|
121
|
+
if not _matches_rule(name, rule_key):
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if rule_key == "yaml.load":
|
|
125
|
+
if not _yaml_load_without_safeloader(node):
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
if opts and "kw_equals" in opts:
|
|
129
|
+
if not _kw_equals(node, opts["kw_equals"]):
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
_add_finding(findings, file_path, node, rule_id, severity, message)
|
|
133
|
+
break
|
|
134
|
+
|
|
135
|
+
return findings
|
|
@@ -22,7 +22,15 @@ GENERIC_VALUE = re.compile(r"""(?ix)
|
|
|
22
22
|
(?:
|
|
23
23
|
(token|api[_-]?key|secret|password|passwd|pwd|bearer|auth[_-]?token|access[_-]?token)
|
|
24
24
|
\s*[:=]\s*(?P<q>['"])(?P<val>[^'"]{16,})(?P=q)
|
|
25
|
-
)
|
|
25
|
+
)
|
|
26
|
+
|
|
|
27
|
+
(?P<bare>
|
|
28
|
+
(?=[A-Za-z0-9_-]{32,}\b)
|
|
29
|
+
(?=.*[A-Z])
|
|
30
|
+
(?=.*[a-z])
|
|
31
|
+
(?=.*\d)
|
|
32
|
+
[A-Za-z0-9_-]+
|
|
33
|
+
)
|
|
26
34
|
""")
|
|
27
35
|
|
|
28
36
|
SAFE_TEST_HINTS = {
|
|
@@ -30,8 +38,12 @@ SAFE_TEST_HINTS = {
|
|
|
30
38
|
"changeme", "password", "secret", "not_a_real", "do_not_use",
|
|
31
39
|
}
|
|
32
40
|
|
|
41
|
+
_IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
42
|
+
|
|
33
43
|
IGNORE_DIRECTIVE = "skylos: ignore[SKY-S101]"
|
|
34
|
-
DEFAULT_MIN_ENTROPY = 3.
|
|
44
|
+
DEFAULT_MIN_ENTROPY = 3.9
|
|
45
|
+
|
|
46
|
+
IS_TEST_PATH = re.compile(r"(^|/)(tests?(/|$)|test_[^/]+\.py$)")
|
|
35
47
|
|
|
36
48
|
def _entropy(s):
|
|
37
49
|
if len(s) == 0:
|
|
@@ -64,6 +76,9 @@ def _mask(tok):
|
|
|
64
76
|
last_part = tok[-4:]
|
|
65
77
|
return first_part + "…" + last_part
|
|
66
78
|
|
|
79
|
+
def _looks_like_identifier(s):
|
|
80
|
+
return bool(_IDENTIFIER.fullmatch(s))
|
|
81
|
+
|
|
67
82
|
def _docstring_lines(tree):
|
|
68
83
|
if tree is None:
|
|
69
84
|
return set()
|
|
@@ -107,12 +122,15 @@ def _docstring_lines(tree):
|
|
|
107
122
|
return docstring_line_numbers
|
|
108
123
|
|
|
109
124
|
def scan_ctx(ctx, *, min_entropy= DEFAULT_MIN_ENTROPY, scan_comments= True,
|
|
110
|
-
scan_docstrings= True, allowlist_patterns= None, ignore_path_substrings= None):
|
|
125
|
+
scan_docstrings= True, allowlist_patterns= None, ignore_path_substrings= None, ignore_tests=True):
|
|
111
126
|
|
|
112
127
|
rel_path = ctx.get("relpath", "")
|
|
113
128
|
if not rel_path.endswith(ALLOWED_FILE_SUFFIXES):
|
|
114
129
|
return []
|
|
115
130
|
|
|
131
|
+
if ignore_tests and IS_TEST_PATH.search(rel_path.replace("\\", "/")):
|
|
132
|
+
return []
|
|
133
|
+
|
|
116
134
|
if ignore_path_substrings:
|
|
117
135
|
for substring in ignore_path_substrings:
|
|
118
136
|
if substring and substring in rel_path:
|
|
@@ -220,22 +238,33 @@ def scan_ctx(ctx, *, min_entropy= DEFAULT_MIN_ENTROPY, scan_comments= True,
|
|
|
220
238
|
"entropy": round(tok_entropy, 2),
|
|
221
239
|
}
|
|
222
240
|
findings.append(aws_finding)
|
|
223
|
-
|
|
224
|
-
|
|
241
|
+
|
|
242
|
+
in_tests = bool(IS_TEST_PATH.search(rel_path.replace("\\", "/")))
|
|
243
|
+
|
|
244
|
+
if in_tests:
|
|
245
|
+
generic_match = None
|
|
246
|
+
else:
|
|
247
|
+
generic_match= GENERIC_VALUE.search(line_content)
|
|
248
|
+
|
|
225
249
|
if generic_match:
|
|
226
250
|
val_group = generic_match.group("val")
|
|
227
251
|
bare_group = generic_match.group("bare")
|
|
228
252
|
|
|
253
|
+
is_bare = False
|
|
229
254
|
if val_group:
|
|
230
255
|
extracted_token = val_group
|
|
231
256
|
elif bare_group:
|
|
232
257
|
extracted_token = bare_group
|
|
258
|
+
is_bare = True
|
|
233
259
|
else:
|
|
234
260
|
extracted_token = ""
|
|
235
261
|
|
|
236
262
|
clean_token = extracted_token.strip()
|
|
237
263
|
|
|
238
264
|
if clean_token:
|
|
265
|
+
if is_bare and _looks_like_identifier(clean_token):
|
|
266
|
+
continue
|
|
267
|
+
|
|
239
268
|
token_lowercase = clean_token.lower()
|
|
240
269
|
has_safe_hint = False
|
|
241
270
|
|
|
@@ -15,6 +15,7 @@ skylos.egg-info/entry_points.txt
|
|
|
15
15
|
skylos.egg-info/requires.txt
|
|
16
16
|
skylos.egg-info/top_level.txt
|
|
17
17
|
skylos/rules/__init__.py
|
|
18
|
+
skylos/rules/dangerous.py
|
|
18
19
|
skylos/rules/secrets.py
|
|
19
20
|
skylos/visitors/__init__.py
|
|
20
21
|
skylos/visitors/framework_aware.py
|
|
@@ -28,6 +29,7 @@ test/test_changes_analyzer.py
|
|
|
28
29
|
test/test_cli.py
|
|
29
30
|
test/test_codemods.py
|
|
30
31
|
test/test_constants.py
|
|
32
|
+
test/test_dangerous.py
|
|
31
33
|
test/test_framework_aware.py
|
|
32
34
|
test/test_integration.py
|
|
33
35
|
test/test_new_behaviours.py
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from skylos.rules.dangerous import scan_ctx
|
|
3
|
+
|
|
4
|
+
def _write(tmp_path: Path, name, code):
|
|
5
|
+
p = tmp_path / name
|
|
6
|
+
p.write_text(code, encoding="utf-8")
|
|
7
|
+
return p
|
|
8
|
+
|
|
9
|
+
def _rule_ids(findings):
|
|
10
|
+
rule_ids = set()
|
|
11
|
+
for f in findings:
|
|
12
|
+
rule_ids.add(f["rule_id"])
|
|
13
|
+
return rule_ids
|
|
14
|
+
|
|
15
|
+
def _scan_one(tmp_path: Path, name, code):
|
|
16
|
+
file_path = _write(tmp_path, name, code)
|
|
17
|
+
return scan_ctx(tmp_path, [file_path])
|
|
18
|
+
|
|
19
|
+
def test_eval(tmp_path):
|
|
20
|
+
out = _scan_one(tmp_path, "a_eval.py", 'eval("1+1")\n')
|
|
21
|
+
assert "SKY-D201" in _rule_ids(out)
|
|
22
|
+
|
|
23
|
+
def test_exec(tmp_path):
|
|
24
|
+
out = _scan_one(tmp_path, "a_exec.py", 'exec("print(1)")\n')
|
|
25
|
+
assert "SKY-D202" in _rule_ids(out)
|
|
26
|
+
|
|
27
|
+
def test_os_system(tmp_path):
|
|
28
|
+
out = _scan_one(tmp_path, "a_os.py", "import os\nos.system('echo hi')\n")
|
|
29
|
+
assert "SKY-D203" in _rule_ids(out)
|
|
30
|
+
|
|
31
|
+
def test_pickle_loads(tmp_path):
|
|
32
|
+
out = _scan_one(tmp_path, "a_pickle.py", "import pickle\npickle.loads(b'\\x80\\x04K\\x01.')\n")
|
|
33
|
+
assert "SKY-D205" in _rule_ids(out)
|
|
34
|
+
|
|
35
|
+
def test_yaml_load_without_safeloader(tmp_path):
|
|
36
|
+
out = _scan_one(tmp_path, "a_yaml.py", "import yaml\nyaml.load('a: 1')\n")
|
|
37
|
+
assert "SKY-D206" in _rule_ids(out)
|
|
38
|
+
|
|
39
|
+
def test_md5_sha1(tmp_path):
|
|
40
|
+
out = _scan_one(tmp_path, "a_hashes.py", "import hashlib\nhashlib.md5(b'd')\nhashlib.sha1(b'd')\n")
|
|
41
|
+
ids = _rule_ids(out)
|
|
42
|
+
assert "SKY-D207" in ids
|
|
43
|
+
assert "SKY-D208" in ids
|
|
44
|
+
|
|
45
|
+
def test_subprocess_shell_true(tmp_path):
|
|
46
|
+
out = _scan_one(tmp_path, "a_subproc.py", "import subprocess\nsubprocess.run('echo hi', shell=True)\n")
|
|
47
|
+
assert "SKY-D209" in _rule_ids(out)
|
|
48
|
+
|
|
49
|
+
def test_requests_verify_false(tmp_path):
|
|
50
|
+
out = _scan_one(tmp_path, "a_requests.py", "import requests\nrequests.get('https://x', verify=False)\n")
|
|
51
|
+
assert "SKY-D210" in _rule_ids(out)
|
|
52
|
+
|
|
53
|
+
def test_yaml_safe_loader_does_not_trigger(tmp_path):
|
|
54
|
+
code = (
|
|
55
|
+
"import yaml\n"
|
|
56
|
+
"from yaml import SafeLoader\n"
|
|
57
|
+
"yaml.load('a: 1', Loader=SafeLoader)\n"
|
|
58
|
+
)
|
|
59
|
+
out = _scan_one(tmp_path, "b_yaml_safe.py", code)
|
|
60
|
+
assert "SKY-D206" not in _rule_ids(out)
|
|
61
|
+
|
|
62
|
+
def test_subprocess_without_shell_true_is_ok(tmp_path):
|
|
63
|
+
code = "import subprocess\nsubprocess.run(['echo','hi'])\n"
|
|
64
|
+
out = _scan_one(tmp_path, "b_subproc_ok.py", code)
|
|
65
|
+
assert "SKY-D209" not in _rule_ids(out)
|
|
66
|
+
|
|
67
|
+
def test_requests_default_verify_true_is_ok(tmp_path):
|
|
68
|
+
code = "import requests\nrequests.get('https://example.com')\n"
|
|
69
|
+
out = _scan_one(tmp_path, "b_requests_ok.py", code)
|
|
70
|
+
assert "SKY-D210" not in _rule_ids(out)
|
|
@@ -77,16 +77,6 @@ def test_aws_secret_access_key_special_case():
|
|
|
77
77
|
assert "entropy" in hit and isinstance(hit["entropy"], float)
|
|
78
78
|
assert ELLIPSIS in hit["preview"]
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
def test_generic_entropy_detection_and_threshold():
|
|
82
|
-
src = 'X = "o2uV7Ew1kZ9Q3nR8sT5yU6pX4cJ2mL7a"\n'
|
|
83
|
-
findings = list(scan_ctx(_ctx_from_source(src)))
|
|
84
|
-
assert any(f["provider"] == "generic" for f in findings)
|
|
85
|
-
|
|
86
|
-
findings_high_thr = list(scan_ctx(_ctx_from_source(src), min_entropy=8.0))
|
|
87
|
-
assert not any(f["provider"] == "generic" for f in findings_high_thr)
|
|
88
|
-
|
|
89
|
-
|
|
90
80
|
def test_ignore_directive_suppresses_matches():
|
|
91
81
|
src = 'GITHUB_TOKEN = "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" # skylos: ignore[SKY-S101]\n'
|
|
92
82
|
findings = list(scan_ctx(_ctx_from_source(src)))
|
|
@@ -177,3 +167,27 @@ def test_safe_hints_suppress_detection():
|
|
|
177
167
|
safe_line = 'EXAMPLE_TOKEN = "sk_test_this_is_example_value_not_real_123456"\n'
|
|
178
168
|
out = list(scan_ctx(_ctx_from_source(safe_line)))
|
|
179
169
|
assert out == []
|
|
170
|
+
|
|
171
|
+
def test_generic_is_suppressed_in_test_paths():
|
|
172
|
+
src = 'X = "o2uV7Ew1kZ9Q3nR8sT5yU6pX4cJ2mL7a"\n'
|
|
173
|
+
findings = list(scan_ctx(_ctx_from_source(src, rel="tests/unit/test_secrets.py")))
|
|
174
|
+
|
|
175
|
+
generic_findings = []
|
|
176
|
+
for f in findings:
|
|
177
|
+
if f["provider"] == "generic":
|
|
178
|
+
generic_findings.append(f)
|
|
179
|
+
|
|
180
|
+
assert len(generic_findings) == 0
|
|
181
|
+
|
|
182
|
+
def test_normal_strings_ignored():
|
|
183
|
+
src = 'X = "config_path"\n'
|
|
184
|
+
ctx = _ctx_from_source(src)
|
|
185
|
+
findings = list(scan_ctx(ctx))
|
|
186
|
+
|
|
187
|
+
generic_findings = []
|
|
188
|
+
for f in findings:
|
|
189
|
+
if f["provider"] == "generic":
|
|
190
|
+
generic_findings.append(f)
|
|
191
|
+
|
|
192
|
+
assert len(generic_findings) == 0
|
|
193
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|