skylos 2.2.2__tar.gz → 2.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skylos might be problematic. Click here for more details.

Files changed (48) hide show
  1. {skylos-2.2.2 → skylos-2.2.4}/PKG-INFO +1 -1
  2. {skylos-2.2.2 → skylos-2.2.4}/README.md +91 -84
  3. {skylos-2.2.2 → skylos-2.2.4}/pyproject.toml +1 -1
  4. {skylos-2.2.2 → skylos-2.2.4}/setup.py +1 -1
  5. {skylos-2.2.2 → skylos-2.2.4}/skylos/__init__.py +1 -1
  6. {skylos-2.2.2 → skylos-2.2.4}/skylos/analyzer.py +28 -7
  7. {skylos-2.2.2 → skylos-2.2.4}/skylos/cli.py +16 -2
  8. {skylos-2.2.2 → skylos-2.2.4}/skylos/codemods.py +15 -6
  9. skylos-2.2.4/skylos/rules/dangerous.py +135 -0
  10. {skylos-2.2.2 → skylos-2.2.4}/skylos/rules/secrets.py +34 -6
  11. {skylos-2.2.2 → skylos-2.2.4}/skylos.egg-info/PKG-INFO +1 -1
  12. {skylos-2.2.2 → skylos-2.2.4}/skylos.egg-info/SOURCES.txt +2 -0
  13. skylos-2.2.4/test/test_dangerous.py +70 -0
  14. {skylos-2.2.2 → skylos-2.2.4}/test/test_secrets.py +24 -10
  15. {skylos-2.2.2 → skylos-2.2.4}/setup.cfg +0 -0
  16. {skylos-2.2.2 → skylos-2.2.4}/skylos/constants.py +0 -0
  17. {skylos-2.2.2 → skylos-2.2.4}/skylos/rules/__init__.py +0 -0
  18. {skylos-2.2.2 → skylos-2.2.4}/skylos/server.py +0 -0
  19. {skylos-2.2.2 → skylos-2.2.4}/skylos/visitor.py +0 -0
  20. {skylos-2.2.2 → skylos-2.2.4}/skylos/visitors/__init__.py +0 -0
  21. {skylos-2.2.2 → skylos-2.2.4}/skylos/visitors/framework_aware.py +0 -0
  22. {skylos-2.2.2 → skylos-2.2.4}/skylos/visitors/test_aware.py +0 -0
  23. {skylos-2.2.2 → skylos-2.2.4}/skylos.egg-info/dependency_links.txt +0 -0
  24. {skylos-2.2.2 → skylos-2.2.4}/skylos.egg-info/entry_points.txt +0 -0
  25. {skylos-2.2.2 → skylos-2.2.4}/skylos.egg-info/requires.txt +0 -0
  26. {skylos-2.2.2 → skylos-2.2.4}/skylos.egg-info/top_level.txt +0 -0
  27. {skylos-2.2.2 → skylos-2.2.4}/test/__init__.py +0 -0
  28. {skylos-2.2.2 → skylos-2.2.4}/test/compare_tools.py +0 -0
  29. {skylos-2.2.2 → skylos-2.2.4}/test/conftest.py +0 -0
  30. {skylos-2.2.2 → skylos-2.2.4}/test/diagnostics.py +0 -0
  31. {skylos-2.2.2 → skylos-2.2.4}/test/sample_repo/__init__.py +0 -0
  32. {skylos-2.2.2 → skylos-2.2.4}/test/sample_repo/app.py +0 -0
  33. {skylos-2.2.2 → skylos-2.2.4}/test/sample_repo/sample_repo/__init__.py +0 -0
  34. {skylos-2.2.2 → skylos-2.2.4}/test/sample_repo/sample_repo/commands.py +0 -0
  35. {skylos-2.2.2 → skylos-2.2.4}/test/sample_repo/sample_repo/models.py +0 -0
  36. {skylos-2.2.2 → skylos-2.2.4}/test/sample_repo/sample_repo/routes.py +0 -0
  37. {skylos-2.2.2 → skylos-2.2.4}/test/sample_repo/sample_repo/utils.py +0 -0
  38. {skylos-2.2.2 → skylos-2.2.4}/test/test_analyzer.py +0 -0
  39. {skylos-2.2.2 → skylos-2.2.4}/test/test_changes_analyzer.py +0 -0
  40. {skylos-2.2.2 → skylos-2.2.4}/test/test_cli.py +0 -0
  41. {skylos-2.2.2 → skylos-2.2.4}/test/test_codemods.py +0 -0
  42. {skylos-2.2.2 → skylos-2.2.4}/test/test_constants.py +0 -0
  43. {skylos-2.2.2 → skylos-2.2.4}/test/test_framework_aware.py +0 -0
  44. {skylos-2.2.2 → skylos-2.2.4}/test/test_integration.py +0 -0
  45. {skylos-2.2.2 → skylos-2.2.4}/test/test_new_behaviours.py +0 -0
  46. {skylos-2.2.2 → skylos-2.2.4}/test/test_skylos.py +0 -0
  47. {skylos-2.2.2 → skylos-2.2.4}/test/test_test_aware.py +0 -0
  48. {skylos-2.2.2 → skylos-2.2.4}/test/test_visitor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skylos
3
- Version: 2.2.2
3
+ Version: 2.2.4
4
4
  Summary: A static analysis tool for Python codebases
5
5
  Author-email: oha <aaronoh2015@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -53,7 +53,7 @@
53
53
  * **Folder Management**: Inclusion/exclusion of directories
54
54
  * **Ignore Pragmas**: Skip lines tagged with `# pragma: no skylos`, `# pragma: no cover`, or `# noqa`
55
55
  **NEW** **Secrets Scanning (PoC, opt-in)**: Detects API keys & secrets (GitHub, GitLab, Slack, Stripe, AWS, Google, SendGrid, Twilio, private key blocks)
56
-
56
+ **NEW** **Dangerous Patterns**: Flags risky code such as `eval/exec`, `os.system`, `subprocess(shell=True)`, `pickle.load/loads`, `yaml.load` without SafeLoader, hashlib.md5/sha1. Refer to `DANGEROUS_CODE.md` for the whole list.
57
57
 
58
58
  ## Benchmark (You can find this benchmark test in `test` folder)
59
59
 
@@ -98,6 +98,7 @@ pip install .
98
98
  skylos /path/to/your/project
99
99
 
100
100
  skylos /path/to/your/project --secrets ## include api key scan
101
+ skylos /path/to/your/project --danger ## include safety scan for dangerous code
101
102
 
102
103
  # To launch the front end
103
104
  skylos run
@@ -253,6 +254,7 @@ Options:
253
254
  --json Output raw JSON instead of formatted text
254
255
  -o, --output FILE Write output to file instead of stdout
255
256
  -v, --verbose Enable verbose output
257
+ --version Checks version
256
258
  -i, --interactive Interactively select items to remove
257
259
  --dry-run Show what would be removed without modifying files
258
260
  --exclude-folder FOLDER Exclude a folder from analysis (can be used multiple times)
@@ -261,6 +263,7 @@ Options:
261
263
  --list-default-excludes List the default excluded folders and
262
264
  -c, --confidence LEVEL Confidence threshold (0-100). Lower values will show more items.
263
265
  -- secrets Scan for api keys/secrets
266
+ -- danger Scan for dangerous code
264
267
  ```
265
268
 
266
269
  ## Interactive Mode
@@ -275,22 +278,97 @@ The interactive mode lets you select specific functions and imports to remove:
275
278
 
276
279
  Pick **one** (or use **both**)
277
280
 
278
- 1. Pre-commit (local + CI): runs Skylos before commits/PRs.
281
+ 1. GitHub Actions: runs Skylos on pushes/PRs in CI.
282
+ - No local install needed
283
+
284
+ 2. Pre-commit (local + CI): runs Skylos before commits/PRs.
279
285
  - You must install pre-commit locally once. Skylos gets installed automatically by the hook.
280
286
 
281
- 2. GitHub Actions: runs Skylos on pushes/PRs in CI.
282
- - No local install needed
287
+ ### Option A Github Actions
288
+
289
+ 1. Create .github/workflows/skylos.yml **(COPY THE ENTIRE SKYLOS.YAML FROM BELOW)**:
290
+
291
+ ```yaml
292
+ name: Skylos Deadcode Scan
293
+
294
+ on:
295
+ pull_request:
296
+ push:
297
+ branches: [ main, master ]
298
+ workflow_dispatch:
299
+
300
+ jobs:
301
+ scan:
302
+ runs-on: ubuntu-latest
303
+ env:
304
+ SKYLOS_STRICT: ${{ vars.SKYLOS_STRICT || 'false' }}
305
+ steps:
306
+ - uses: actions/checkout@v4
307
+
308
+ - uses: actions/setup-python@v5
309
+ with:
310
+ python-version: '3.11'
311
+ cache: 'pip'
312
+
313
+ - name: Install Skylos
314
+ run: pip install skylos
315
+
316
+ - name: Run Skylos
317
+ env:
318
+ REPORT: skylos_${{ github.run_number }}_${{ github.sha }}.json
319
+ run: |
320
+ echo "REPORT=$REPORT" >> "$GITHUB_OUTPUT"
321
+ skylos . --json > "$REPORT"
322
+ id: scan
323
+
324
+ - name: Fail if there are findings
325
+ continue-on-error: ${{ env.SKYLOS_STRICT != 'true' }}
326
+ env:
327
+ REPORT: ${{ steps.scan.outputs.REPORT }}
328
+ run: |
329
+ python - << 'PY'
330
+ import json, sys, os
331
+ report = os.environ["REPORT"]
332
+ data = json.load(open(report, "r", encoding="utf-8"))
333
+ count = 0
334
+ for value in data.values():
335
+ if isinstance(value, list):
336
+ count += len(value)
337
+ print(f"Findings: {count}")
338
+ if count > 0:
339
+ print(f"::warning title=Skylos findings::{count} potential issues found. See {report}")
340
+ sys.exit(1 if count > 0 else 0)
341
+ PY
342
+
343
+ - name: Upload report artifact
344
+ if: always()
345
+ uses: actions/upload-artifact@v4
346
+ with:
347
+ name: ${{ steps.scan.outputs.REPORT }}
348
+ path: ${{ steps.scan.outputs.REPORT }}
349
+
350
+ - name: Summarize in job log
351
+ if: always()
352
+ run: |
353
+ echo "Skylos report: ${{ steps.scan.outputs.REPORT }}" >> $GITHUB_STEP_SUMMARY
354
+ ```
355
+
356
+ **To make the job fail on findings (strict mode)**:
357
+
358
+ 1. Go to GitHub -> Settings -> Secrets and variables -> Actions -> Variables
359
+
360
+ 2. Add variable SKYLOS_STRICT with value true
283
361
 
284
- ### Option A — Pre-commit (local + CI)
362
+ ### Option B — Pre-commit (local + CI)
285
363
 
286
- 1. Create or edit `.pre-commit-config.yaml` at the repo root:
364
+ . Create or edit `.pre-commit-config.yaml` at the repo root:
287
365
 
288
366
  **A: Skylos hook repo**
289
367
  ```yaml
290
368
  ## .pre-commit-config.yaml
291
369
  repos:
292
370
  - repo: https://github.com/duriantaco/skylos
293
- rev: v2.2.2
371
+ rev: v2.2.4
294
372
  hooks:
295
373
  - id: skylos-scan
296
374
  name: skylos report
@@ -299,7 +377,7 @@ repos:
299
377
  types_or: [python]
300
378
  pass_filenames: false
301
379
  require_serial: true
302
- args: [".", "--output", "report.json", "--confidence", "70"]
380
+ args: [".", "--output", "report.json", "--confidence", "70", "--danger"]
303
381
 
304
382
  - repo: local
305
383
  hooks:
@@ -340,7 +418,7 @@ repos:
340
418
  entry: python -m skylos.cli
341
419
  pass_filenames: false
342
420
  require_serial: true
343
- additional_dependencies: [skylos==2.2.2]
421
+ additional_dependencies: [skylos==2.2.4]
344
422
  args: [".", "--output", "report.json", "--confidence", "70"]
345
423
 
346
424
  - id: skylos-fail-on-findings
@@ -396,81 +474,6 @@ jobs:
396
474
 
397
475
  **Pre commit behavior:** the second hook is soft by default (SKYLOS_SOFT=1). This means that it prints findings and passes. You can remove the env/logic if you want pre-commit to block commits on finding
398
476
 
399
- ### Option B — Github Actions
400
-
401
- 1. Create .github/workflows/skylos.yml:
402
-
403
- ```yaml
404
- name: Skylos Deadcode Scan
405
-
406
- on:
407
- pull_request:
408
- push:
409
- branches: [ main, master ]
410
- workflow_dispatch:
411
-
412
- jobs:
413
- scan:
414
- runs-on: ubuntu-latest
415
- env:
416
- SKYLOS_STRICT: ${{ vars.SKYLOS_STRICT || 'false' }}
417
- steps:
418
- - uses: actions/checkout@v4
419
-
420
- - uses: actions/setup-python@v5
421
- with:
422
- python-version: '3.11'
423
- cache: 'pip'
424
-
425
- - name: Install Skylos
426
- run: pip install skylos
427
-
428
- - name: Run Skylos
429
- env:
430
- REPORT: skylos_${{ github.run_number }}_${{ github.sha }}.json
431
- run: |
432
- echo "REPORT=$REPORT" >> "$GITHUB_OUTPUT"
433
- skylos . --json > "$REPORT"
434
- id: scan
435
-
436
- - name: Fail if there are findings
437
- continue-on-error: ${{ env.SKYLOS_STRICT != 'true' }}
438
- env:
439
- REPORT: ${{ steps.scan.outputs.REPORT }}
440
- run: |
441
- python - << 'PY'
442
- import json, sys, os
443
- report = os.environ["REPORT"]
444
- data = json.load(open(report, "r", encoding="utf-8"))
445
- count = 0
446
- for value in data.values():
447
- if isinstance(value, list):
448
- count += len(value)
449
- print(f"Findings: {count}")
450
- if count > 0:
451
- print(f"::warning title=Skylos findings::{count} potential issues found. See {report}")
452
- sys.exit(1 if count > 0 else 0)
453
- PY
454
-
455
- - name: Upload report artifact
456
- if: always()
457
- uses: actions/upload-artifact@v4
458
- with:
459
- name: ${{ steps.scan.outputs.REPORT }}
460
- path: ${{ steps.scan.outputs.REPORT }}
461
-
462
- - name: Summarize in job log
463
- if: always()
464
- run: |
465
- echo "Skylos report: ${{ steps.scan.outputs.REPORT }}" >> $GITHUB_STEP_SUMMARY
466
- ```
467
-
468
- **To make the job fail on findings (strict mode)**:
469
-
470
- 1. Go to GitHub -> Settings -> Secrets and variables -> Actions -> Variables
471
-
472
- 2. Add variable SKYLOS_STRICT with value true
473
-
474
477
  ## Development
475
478
 
476
479
  ### Prerequisites
@@ -518,6 +521,9 @@ A: Web framework routes are given low confidence (20) because they might be call
518
521
  **Q: What confidence level should I use?**
519
522
  A: Start with 60 (default) for safe cleanup. Use 30 for framework applications. Use 20 for more comprehensive auditing.
520
523
 
524
+ **Q: What does `--danger` check**?
525
+ A: It flags common security problems. Refer to `DANGEROUS_CODE.md` for the full details
526
+
521
527
  ## Limitations
522
528
 
523
529
  - **Dynamic code**: `getattr()`, `globals()`, runtime imports are hard to detect
@@ -561,6 +567,7 @@ We welcome contributions! Please read our [Contributing Guidelines](CONTRIBUTING
561
567
  - [x] CI/CD integration examples
562
568
  - [ ] Further optimization
563
569
  - [ ] Add new rules
570
+ - [ ] Expanding on the `dangerous.py` list
564
571
 
565
572
  ## License
566
573
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "skylos"
7
- version = "2.2.2"
7
+ version = "2.2.4"
8
8
  requires-python = ">=3.9"
9
9
  description = "A static analysis tool for Python codebases"
10
10
  authors = [{name = "oha", email = "aaronoh2015@gmail.com"}]
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="skylos",
5
- version="2.2.2",
5
+ version="2.2.4",
6
6
  packages=find_packages(),
7
7
  python_requires=">=3.9",
8
8
  install_requires=[
@@ -1,4 +1,4 @@
1
- __version__ = "2.2.2"
1
+ __version__ = "2.2.4"
2
2
 
3
3
  def analyze(*args, **kwargs):
4
4
  from .analyzer import analyze as _analyze
@@ -9,6 +9,7 @@ from skylos.visitor import Visitor
9
9
  from skylos.constants import ( PENALTIES, AUTO_CALLED )
10
10
  from skylos.visitors.test_aware import TestAwareVisitor
11
11
  from skylos.rules.secrets import scan_ctx as _secrets_scan_ctx
12
+ from skylos.rules.dangerous import scan_ctx as scan_dangerous
12
13
  import os
13
14
  import traceback
14
15
  from skylos.visitors.framework_aware import FrameworkAwareVisitor, detect_framework_usage
@@ -238,7 +239,7 @@ class Skylos:
238
239
  if method.simple_name == "format" and cls.endswith("Formatter"):
239
240
  method.references += 1
240
241
 
241
- def analyze(self, path, thr=60, exclude_folders= None, enable_secrets = False):
242
+ def analyze(self, path, thr=60, exclude_folders= None, enable_secrets = False, enable_dangerous = False):
242
243
  files, root = self._get_python_files(path, exclude_folders)
243
244
 
244
245
  if not files:
@@ -262,6 +263,7 @@ class Skylos:
262
263
  modmap[f] = self._module(root, f)
263
264
 
264
265
  all_secrets = []
266
+ all_dangers = []
265
267
  for file in files:
266
268
  mod = modmap[file]
267
269
  defs, refs, dyn, exports, test_flags, framework_flags = proc_file(file, mod)
@@ -276,13 +278,23 @@ class Skylos:
276
278
 
277
279
  if enable_secrets and _secrets_scan_ctx is not None:
278
280
  try:
279
- src_lines = Path(file).read_text(encoding="utf-8", errors="ignore").splitlines(True)
280
- ctx = {"relpath": str(file), "lines": src_lines, "tree": None}
281
+ src = Path(file).read_text(encoding="utf-8", errors="ignore")
282
+ src_lines = src.splitlines(True)
283
+ rel = str(Path(file).relative_to(root))
284
+ ctx = {"relpath": rel, "lines": src_lines, "tree": None}
281
285
  findings = list(_secrets_scan_ctx(ctx))
282
286
  if findings:
283
287
  all_secrets.extend(findings)
284
288
  except Exception:
285
289
  pass
290
+
291
+ if enable_dangerous and scan_dangerous is not None:
292
+ try:
293
+ findings = scan_dangerous(root, [file])
294
+ if findings:
295
+ all_dangers.extend(findings)
296
+ except Exception:
297
+ pass
286
298
 
287
299
  self._mark_refs()
288
300
  self._apply_heuristics()
@@ -296,7 +308,6 @@ class Skylos:
296
308
  for d in sorted(self.defs.values(), key=def_sort_key):
297
309
  if shown >= 50:
298
310
  break
299
- print(f" type={d.type} refs={d.references} conf={d.confidence} exported={d.is_exported} line={d.line} name={d.name}")
300
311
  shown += 1
301
312
 
302
313
  unused = []
@@ -318,7 +329,12 @@ class Skylos:
318
329
 
319
330
  if enable_secrets and all_secrets:
320
331
  result["secrets"] = all_secrets
332
+ result["analysis_summary"]["secrets_count"] = len(all_secrets)
321
333
 
334
+ if enable_dangerous and all_dangers:
335
+ result["dangerous"] = all_dangers
336
+ result["analysis_summary"]["dangerous_count"] = len(all_dangers)
337
+
322
338
  for u in unused:
323
339
  if u["type"] in ("function", "method"):
324
340
  result["unused_functions"].append(u)
@@ -370,13 +386,18 @@ def proc_file(file_or_args, mod=None):
370
386
 
371
387
  return [], [], set(), set(), dummy_visitor, dummy_framework_visitor
372
388
 
373
- def analyze(path, conf=60, exclude_folders=None, enable_secrets=False):
374
- return Skylos().analyze(path,conf, exclude_folders, enable_secrets)
389
+ def analyze(path, conf=60, exclude_folders=None, enable_secrets=False, enable_dangerous=False):
390
+ return Skylos().analyze(path,conf, exclude_folders, enable_secrets, enable_dangerous)
375
391
 
376
392
  if __name__ == "__main__":
377
393
  if len(sys.argv)>1:
378
394
  p = sys.argv[1]
379
- confidence = int(sys.argv[2]) if len(sys.argv) >2 else 60
395
+
396
+ if len(sys.argv) > 2:
397
+ confidence = int(sys.argv[2])
398
+ else:
399
+ confidence = 60
400
+
380
401
  result = analyze(p,confidence)
381
402
 
382
403
  data = json.loads(result)
@@ -43,7 +43,7 @@ def setup_logger(output_file=None):
43
43
 
44
44
  formatter = CleanFormatter()
45
45
 
46
- console_handler = logging.StreamHandler(sys.stdout)
46
+ console_handler = logging.StreamHandler(sys.stderr)
47
47
  console_handler.setFormatter(formatter)
48
48
  logger.addHandler(console_handler)
49
49
 
@@ -270,6 +270,9 @@ def main():
270
270
  parser.add_argument("--secrets", action="store_true",
271
271
  help="Scan for API keys. Off by default.")
272
272
 
273
+ parser.add_argument("--danger", action="store_true",
274
+ help="Scan for security issues. Off by default.")
275
+
273
276
  args = parser.parse_args()
274
277
 
275
278
  if args.list_default_excludes:
@@ -305,6 +308,11 @@ def main():
305
308
 
306
309
  try:
307
310
  result_json = run_analyze(args.path, conf=args.confidence, enable_secrets=bool(args.secrets), exclude_folders=list(final_exclude_folders))
311
+
312
+ if args.json:
313
+ print(result_json)
314
+ return
315
+
308
316
  result = json.loads(result_json)
309
317
 
310
318
  except Exception as e:
@@ -312,8 +320,14 @@ def main():
312
320
  sys.exit(1)
313
321
 
314
322
  if args.json:
315
- logger.info(result_json)
323
+ lg = logging.getLogger('skylos')
324
+ for h in list(lg.handlers):
325
+ if isinstance(h, logging.StreamHandler):
326
+ lg.removeHandler(h)
327
+ print(result_json)
316
328
  return
329
+
330
+ result = json.loads(result_json)
317
331
 
318
332
  unused_functions = result.get("unused_functions", [])
319
333
  unused_imports = result.get("unused_imports", [])
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
  import libcst as cst
3
3
  from libcst.metadata import PositionProvider
4
+ from libcst.helpers import get_full_name_for_node
4
5
 
5
6
  class _CommentOutBlock(cst.CSTTransformer):
6
7
 
@@ -31,14 +32,16 @@ class _CommentOutFunctionAtLine(_CommentOutBlock):
31
32
  return pos and pos.start.line == self.target_line
32
33
 
33
34
  def leave_FunctionDef(self, orig: cst.FunctionDef, updated: cst.FunctionDef):
34
- if self._is_target(orig) and (orig.name.value == self.func_name):
35
+ target = self.func_name.split(".")[-1]
36
+ if self._is_target(orig) and (orig.name.value == target):
35
37
  self.changed = True
36
38
  pos = self.get_metadata(PositionProvider, orig)
37
39
  return cst.FlattenSentinel(self._comment_block(pos.start.line, pos.end.line))
38
40
  return updated
39
41
 
40
42
  def leave_AsyncFunctionDef(self, orig: cst.AsyncFunctionDef, updated: cst.AsyncFunctionDef):
41
- if self._is_target(orig) and (orig.name.value == self.func_name):
43
+ target = self.func_name.split(".")[-1]
44
+ if self._is_target(orig) and (orig.name.value == target):
42
45
  self.changed = True
43
46
  pos = self.get_metadata(PositionProvider, orig)
44
47
  return cst.FlattenSentinel(self._comment_block(pos.start.line, pos.end.line))
@@ -73,7 +76,9 @@ class _CommentOutImportAtLine(_CommentOutBlock):
73
76
  removed_for_comment= []
74
77
  for alias in list(aliases):
75
78
  bound = _bound_name_for_import_alias(alias)
76
- if bound == self.target_name:
79
+ name_code = get_full_name_for_node(alias.name)
80
+ tail = name_code.split(".")[-1]
81
+ if self.target_name in (bound, tail):
77
82
  self.changed = True
78
83
  removed_for_comment.append(self._render_single_alias_text(head, alias, is_from))
79
84
  else:
@@ -175,7 +180,9 @@ class _RemoveImportAtLine(cst.CSTTransformer):
175
180
  kept = []
176
181
  for alias in aliases:
177
182
  bound = _bound_name_for_import_alias(alias)
178
- if bound == self.target_name:
183
+ name_code = get_full_name_for_node(alias.name) or ""
184
+ tail = name_code.split(".")[-1]
185
+ if self.target_name in (bound, tail):
179
186
  self.changed = True
180
187
  continue
181
188
  kept.append(alias)
@@ -213,13 +220,15 @@ class _RemoveFunctionAtLine(cst.CSTTransformer):
213
220
  return pos and pos.start.line == self.target_line
214
221
 
215
222
  def leave_FunctionDef(self, orig: cst.FunctionDef, updated: cst.FunctionDef):
216
- if self._is_target(orig) and (orig.name.value == self.func_name):
223
+ target = self.func_name.split(".")[-1]
224
+ if self._is_target(orig) and (orig.name.value == target):
217
225
  self.changed = True
218
226
  return cst.RemoveFromParent()
219
227
  return updated
220
228
 
221
229
  def leave_AsyncFunctionDef(self, orig: cst.AsyncFunctionDef, updated: cst.AsyncFunctionDef):
222
- if self._is_target(orig) and (orig.name.value == self.func_name):
230
+ target = self.func_name.split(".")[-1]
231
+ if self._is_target(orig) and (orig.name.value == target):
223
232
  self.changed = True
224
233
  return cst.RemoveFromParent()
225
234
 
@@ -0,0 +1,135 @@
1
+ from __future__ import annotations
2
+ import ast
3
+ from pathlib import Path
4
+
5
+ ALLOWED_SUFFIXES = (".py", ".pyi", ".pyw")
6
+
7
+ ## will expand this list later with more rules
8
+ DANGEROUS_CALLS = {
9
+ "eval": ("SKY-D201", "HIGH", "Use of eval()"),
10
+ "exec": ("SKY-D202", "HIGH", "Use of exec()"),
11
+ "os.system": ("SKY-D203", "MEDIUM", "Use of os.system"),
12
+ "pickle.load": ("SKY-D204", "CRITICAL", "Untrusted deserialization via pickle.load"),
13
+ "pickle.loads": ("SKY-D205", "CRITICAL", "Untrusted deserialization via pickle.loads"),
14
+ "yaml.load": ("SKY-D206", "HIGH", "yaml.load without SafeLoader"),
15
+ "hashlib.md5": ("SKY-D207", "MEDIUM", "Weak hash (MD5)"),
16
+ "hashlib.sha1": ("SKY-D208", "MEDIUM", "Weak hash (SHA1)"),
17
+ ## this is for arguments like process
18
+ "subprocess.*": ("SKY-D209", "HIGH", "subprocess.* with shell=True",
19
+ {"kw_equals": {"shell": True}}),
20
+
21
+ "requests.*": ("SKY-D210", "HIGH", "requests call with verify=False",
22
+ {"kw_equals": {"verify": False}}),
23
+ }
24
+
25
+ def _matches_rule(name, rule_key):
26
+ if not name:
27
+ return False
28
+ if rule_key.endswith(".*"):
29
+ return name.startswith(rule_key[:-2] + ".")
30
+ return name == rule_key
31
+
32
+ def _kw_equals(node: ast.Call, requirements):
33
+ if not requirements:
34
+ return True
35
+ kw_map = {}
36
+ keywords = node.keywords or []
37
+ for kw in keywords:
38
+ if kw.arg:
39
+ kw_map[kw.arg] = kw.value
40
+
41
+ for key, expected in requirements.items():
42
+ val = kw_map.get(key)
43
+ if not isinstance(val, ast.Constant):
44
+ return False
45
+ if val.value is not expected:
46
+ return False
47
+ return True
48
+
49
+ def qualified_name_from_call(node: ast.Call):
50
+ f = node.func
51
+ parts = []
52
+ while isinstance(f, ast.Attribute):
53
+ parts.append(f.attr)
54
+ f = f.value
55
+ if isinstance(f, ast.Name):
56
+ parts.append(f.id)
57
+ parts.reverse()
58
+ return ".".join(parts)
59
+ if isinstance(f, ast.Name):
60
+ return f.id
61
+ return None
62
+
63
+ def _yaml_load_without_safeloader(node: ast.Call):
64
+ name = qualified_name_from_call(node)
65
+ if name != "yaml.load":
66
+ return False
67
+
68
+ for kw in node.keywords or []:
69
+ if kw.arg == "Loader":
70
+ try:
71
+ text = ast.unparse(kw.value)
72
+ return "SafeLoader" not in text
73
+ except Exception:
74
+ return True
75
+ return True
76
+
77
+ def _add_finding(findings,
78
+ file_path: Path,
79
+ node: ast.AST,
80
+ rule_id,
81
+ severity,
82
+ message):
83
+ findings.append({
84
+ "rule_id": rule_id,
85
+ "severity": severity,
86
+ "message": message,
87
+ "file": str(file_path),
88
+ "line": getattr(node, "lineno", 1),
89
+ "col": getattr(node, "col_offset", 0),
90
+ })
91
+
92
+ def scan_ctx(root, files):
93
+ findings = []
94
+
95
+ for file_path in files:
96
+ if file_path.suffix.lower() not in ALLOWED_SUFFIXES:
97
+ continue
98
+
99
+ try:
100
+ src = file_path.read_text(encoding="utf-8", errors="ignore")
101
+ tree = ast.parse(src)
102
+ except Exception:
103
+ continue
104
+
105
+ for node in ast.walk(tree):
106
+ if not isinstance(node, ast.Call):
107
+ continue
108
+
109
+ name = qualified_name_from_call(node)
110
+ if not name:
111
+ continue
112
+
113
+ for rule_key, tup in DANGEROUS_CALLS.items():
114
+ rule_id, severity, message, *rest = tup
115
+
116
+ if rest:
117
+ opts = rest[0]
118
+ else:
119
+ opts = None
120
+
121
+ if not _matches_rule(name, rule_key):
122
+ continue
123
+
124
+ if rule_key == "yaml.load":
125
+ if not _yaml_load_without_safeloader(node):
126
+ continue
127
+
128
+ if opts and "kw_equals" in opts:
129
+ if not _kw_equals(node, opts["kw_equals"]):
130
+ continue
131
+
132
+ _add_finding(findings, file_path, node, rule_id, severity, message)
133
+ break
134
+
135
+ return findings
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
  import re, ast
3
3
  from math import log2
4
- from typing import Dict, Any, Iterable, List, Optional
5
4
 
6
5
  __all__ = ["scan_ctx"]
7
6
 
@@ -23,7 +22,15 @@ GENERIC_VALUE = re.compile(r"""(?ix)
23
22
  (?:
24
23
  (token|api[_-]?key|secret|password|passwd|pwd|bearer|auth[_-]?token|access[_-]?token)
25
24
  \s*[:=]\s*(?P<q>['"])(?P<val>[^'"]{16,})(?P=q)
26
- )|(?P<bare>[A-Za-z0-9_\-]{24,})
25
+ )
26
+ |
27
+ (?P<bare>
28
+ (?=[A-Za-z0-9_-]{32,}\b)
29
+ (?=.*[A-Z])
30
+ (?=.*[a-z])
31
+ (?=.*\d)
32
+ [A-Za-z0-9_-]+
33
+ )
27
34
  """)
28
35
 
29
36
  SAFE_TEST_HINTS = {
@@ -31,8 +38,12 @@ SAFE_TEST_HINTS = {
31
38
  "changeme", "password", "secret", "not_a_real", "do_not_use",
32
39
  }
33
40
 
41
+ _IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
42
+
34
43
  IGNORE_DIRECTIVE = "skylos: ignore[SKY-S101]"
35
- DEFAULT_MIN_ENTROPY = 3.6
44
+ DEFAULT_MIN_ENTROPY = 3.9
45
+
46
+ IS_TEST_PATH = re.compile(r"(^|/)(tests?(/|$)|test_[^/]+\.py$)")
36
47
 
37
48
  def _entropy(s):
38
49
  if len(s) == 0:
@@ -65,6 +76,9 @@ def _mask(tok):
65
76
  last_part = tok[-4:]
66
77
  return first_part + "…" + last_part
67
78
 
79
+ def _looks_like_identifier(s):
80
+ return bool(_IDENTIFIER.fullmatch(s))
81
+
68
82
  def _docstring_lines(tree):
69
83
  if tree is None:
70
84
  return set()
@@ -108,12 +122,15 @@ def _docstring_lines(tree):
108
122
  return docstring_line_numbers
109
123
 
110
124
  def scan_ctx(ctx, *, min_entropy= DEFAULT_MIN_ENTROPY, scan_comments= True,
111
- scan_docstrings= True, allowlist_patterns= None, ignore_path_substrings= None):
125
+ scan_docstrings= True, allowlist_patterns= None, ignore_path_substrings= None, ignore_tests=True):
112
126
 
113
127
  rel_path = ctx.get("relpath", "")
114
128
  if not rel_path.endswith(ALLOWED_FILE_SUFFIXES):
115
129
  return []
116
130
 
131
+ if ignore_tests and IS_TEST_PATH.search(rel_path.replace("\\", "/")):
132
+ return []
133
+
117
134
  if ignore_path_substrings:
118
135
  for substring in ignore_path_substrings:
119
136
  if substring and substring in rel_path:
@@ -221,22 +238,33 @@ def scan_ctx(ctx, *, min_entropy= DEFAULT_MIN_ENTROPY, scan_comments= True,
221
238
  "entropy": round(tok_entropy, 2),
222
239
  }
223
240
  findings.append(aws_finding)
224
-
225
- generic_match = GENERIC_VALUE.search(line_content)
241
+
242
+ in_tests = bool(IS_TEST_PATH.search(rel_path.replace("\\", "/")))
243
+
244
+ if in_tests:
245
+ generic_match = None
246
+ else:
247
+ generic_match= GENERIC_VALUE.search(line_content)
248
+
226
249
  if generic_match:
227
250
  val_group = generic_match.group("val")
228
251
  bare_group = generic_match.group("bare")
229
252
 
253
+ is_bare = False
230
254
  if val_group:
231
255
  extracted_token = val_group
232
256
  elif bare_group:
233
257
  extracted_token = bare_group
258
+ is_bare = True
234
259
  else:
235
260
  extracted_token = ""
236
261
 
237
262
  clean_token = extracted_token.strip()
238
263
 
239
264
  if clean_token:
265
+ if is_bare and _looks_like_identifier(clean_token):
266
+ continue
267
+
240
268
  token_lowercase = clean_token.lower()
241
269
  has_safe_hint = False
242
270
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skylos
3
- Version: 2.2.2
3
+ Version: 2.2.4
4
4
  Summary: A static analysis tool for Python codebases
5
5
  Author-email: oha <aaronoh2015@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -15,6 +15,7 @@ skylos.egg-info/entry_points.txt
15
15
  skylos.egg-info/requires.txt
16
16
  skylos.egg-info/top_level.txt
17
17
  skylos/rules/__init__.py
18
+ skylos/rules/dangerous.py
18
19
  skylos/rules/secrets.py
19
20
  skylos/visitors/__init__.py
20
21
  skylos/visitors/framework_aware.py
@@ -28,6 +29,7 @@ test/test_changes_analyzer.py
28
29
  test/test_cli.py
29
30
  test/test_codemods.py
30
31
  test/test_constants.py
32
+ test/test_dangerous.py
31
33
  test/test_framework_aware.py
32
34
  test/test_integration.py
33
35
  test/test_new_behaviours.py
@@ -0,0 +1,70 @@
1
+ from pathlib import Path
2
+ from skylos.rules.dangerous import scan_ctx
3
+
4
+ def _write(tmp_path: Path, name, code):
5
+ p = tmp_path / name
6
+ p.write_text(code, encoding="utf-8")
7
+ return p
8
+
9
+ def _rule_ids(findings):
10
+ rule_ids = set()
11
+ for f in findings:
12
+ rule_ids.add(f["rule_id"])
13
+ return rule_ids
14
+
15
+ def _scan_one(tmp_path: Path, name, code):
16
+ file_path = _write(tmp_path, name, code)
17
+ return scan_ctx(tmp_path, [file_path])
18
+
19
+ def test_eval(tmp_path):
20
+ out = _scan_one(tmp_path, "a_eval.py", 'eval("1+1")\n')
21
+ assert "SKY-D201" in _rule_ids(out)
22
+
23
+ def test_exec(tmp_path):
24
+ out = _scan_one(tmp_path, "a_exec.py", 'exec("print(1)")\n')
25
+ assert "SKY-D202" in _rule_ids(out)
26
+
27
+ def test_os_system(tmp_path):
28
+ out = _scan_one(tmp_path, "a_os.py", "import os\nos.system('echo hi')\n")
29
+ assert "SKY-D203" in _rule_ids(out)
30
+
31
+ def test_pickle_loads(tmp_path):
32
+ out = _scan_one(tmp_path, "a_pickle.py", "import pickle\npickle.loads(b'\\x80\\x04K\\x01.')\n")
33
+ assert "SKY-D205" in _rule_ids(out)
34
+
35
+ def test_yaml_load_without_safeloader(tmp_path):
36
+ out = _scan_one(tmp_path, "a_yaml.py", "import yaml\nyaml.load('a: 1')\n")
37
+ assert "SKY-D206" in _rule_ids(out)
38
+
39
+ def test_md5_sha1(tmp_path):
40
+ out = _scan_one(tmp_path, "a_hashes.py", "import hashlib\nhashlib.md5(b'd')\nhashlib.sha1(b'd')\n")
41
+ ids = _rule_ids(out)
42
+ assert "SKY-D207" in ids
43
+ assert "SKY-D208" in ids
44
+
45
+ def test_subprocess_shell_true(tmp_path):
46
+ out = _scan_one(tmp_path, "a_subproc.py", "import subprocess\nsubprocess.run('echo hi', shell=True)\n")
47
+ assert "SKY-D209" in _rule_ids(out)
48
+
49
+ def test_requests_verify_false(tmp_path):
50
+ out = _scan_one(tmp_path, "a_requests.py", "import requests\nrequests.get('https://x', verify=False)\n")
51
+ assert "SKY-D210" in _rule_ids(out)
52
+
53
+ def test_yaml_safe_loader_does_not_trigger(tmp_path):
54
+ code = (
55
+ "import yaml\n"
56
+ "from yaml import SafeLoader\n"
57
+ "yaml.load('a: 1', Loader=SafeLoader)\n"
58
+ )
59
+ out = _scan_one(tmp_path, "b_yaml_safe.py", code)
60
+ assert "SKY-D206" not in _rule_ids(out)
61
+
62
+ def test_subprocess_without_shell_true_is_ok(tmp_path):
63
+ code = "import subprocess\nsubprocess.run(['echo','hi'])\n"
64
+ out = _scan_one(tmp_path, "b_subproc_ok.py", code)
65
+ assert "SKY-D209" not in _rule_ids(out)
66
+
67
+ def test_requests_default_verify_true_is_ok(tmp_path):
68
+ code = "import requests\nrequests.get('https://example.com')\n"
69
+ out = _scan_one(tmp_path, "b_requests_ok.py", code)
70
+ assert "SKY-D210" not in _rule_ids(out)
@@ -77,16 +77,6 @@ def test_aws_secret_access_key_special_case():
77
77
  assert "entropy" in hit and isinstance(hit["entropy"], float)
78
78
  assert ELLIPSIS in hit["preview"]
79
79
 
80
-
81
- def test_generic_entropy_detection_and_threshold():
82
- src = 'X = "o2uV7Ew1kZ9Q3nR8sT5yU6pX4cJ2mL7a"\n'
83
- findings = list(scan_ctx(_ctx_from_source(src)))
84
- assert any(f["provider"] == "generic" for f in findings)
85
-
86
- findings_high_thr = list(scan_ctx(_ctx_from_source(src), min_entropy=8.0))
87
- assert not any(f["provider"] == "generic" for f in findings_high_thr)
88
-
89
-
90
80
  def test_ignore_directive_suppresses_matches():
91
81
  src = 'GITHUB_TOKEN = "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" # skylos: ignore[SKY-S101]\n'
92
82
  findings = list(scan_ctx(_ctx_from_source(src)))
@@ -177,3 +167,27 @@ def test_safe_hints_suppress_detection():
177
167
  safe_line = 'EXAMPLE_TOKEN = "sk_test_this_is_example_value_not_real_123456"\n'
178
168
  out = list(scan_ctx(_ctx_from_source(safe_line)))
179
169
  assert out == []
170
+
171
+ def test_generic_is_suppressed_in_test_paths():
172
+ src = 'X = "o2uV7Ew1kZ9Q3nR8sT5yU6pX4cJ2mL7a"\n'
173
+ findings = list(scan_ctx(_ctx_from_source(src, rel="tests/unit/test_secrets.py")))
174
+
175
+ generic_findings = []
176
+ for f in findings:
177
+ if f["provider"] == "generic":
178
+ generic_findings.append(f)
179
+
180
+ assert len(generic_findings) == 0
181
+
182
+ def test_normal_strings_ignored():
183
+ src = 'X = "config_path"\n'
184
+ ctx = _ctx_from_source(src)
185
+ findings = list(scan_ctx(ctx))
186
+
187
+ generic_findings = []
188
+ for f in findings:
189
+ if f["provider"] == "generic":
190
+ generic_findings.append(f)
191
+
192
+ assert len(generic_findings) == 0
193
+
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes