guarddog 2.7.1__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. guarddog/analyzer/metadata/__init__.py +3 -0
  2. guarddog/analyzer/metadata/go/typosquatting.py +11 -28
  3. guarddog/analyzer/metadata/npm/direct_url_dependency.py +0 -1
  4. guarddog/analyzer/metadata/npm/typosquatting.py +24 -59
  5. guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -164
  6. guarddog/analyzer/metadata/pypi/typosquatting.py +20 -77
  7. guarddog/analyzer/metadata/repository_integrity_mismatch.py +202 -2
  8. guarddog/analyzer/metadata/resources/top_go_packages.json +2926 -2923
  9. guarddog/analyzer/metadata/resources/top_npm_packages.json +8005 -8002
  10. guarddog/analyzer/metadata/resources/top_pypi_packages.json +15003 -60021
  11. guarddog/analyzer/metadata/resources/top_rubygems_packages.json +979 -0
  12. guarddog/analyzer/metadata/rubygems/__init__.py +26 -0
  13. guarddog/analyzer/metadata/rubygems/bundled_binary.py +13 -0
  14. guarddog/analyzer/metadata/rubygems/empty_information.py +24 -0
  15. guarddog/analyzer/metadata/rubygems/release_zero.py +22 -0
  16. guarddog/analyzer/metadata/rubygems/repository_integrity_mismatch.py +49 -0
  17. guarddog/analyzer/metadata/rubygems/typosquatting.py +91 -0
  18. guarddog/analyzer/metadata/typosquatting.py +218 -0
  19. guarddog/analyzer/metadata/utils.py +23 -0
  20. guarddog/analyzer/sourcecode/__init__.py +2 -0
  21. guarddog/analyzer/sourcecode/api-obfuscation.yml +35 -40
  22. guarddog/analyzer/sourcecode/code-execution.yml +20 -0
  23. guarddog/analyzer/sourcecode/exec-base64.yml +19 -0
  24. guarddog/analyzer/sourcecode/exfiltrate-sensitive-data.yml +31 -5
  25. guarddog/analyzer/sourcecode/npm-api-obfuscation.yml +51 -0
  26. guarddog/analyzer/sourcecode/rubygems-code-execution.yml +67 -0
  27. guarddog/analyzer/sourcecode/rubygems-exec-base64.yml +26 -0
  28. guarddog/analyzer/sourcecode/rubygems-exfiltrate-sensitive-data.yml +70 -0
  29. guarddog/analyzer/sourcecode/rubygems-install-hook.yml +45 -0
  30. guarddog/analyzer/sourcecode/rubygems-network-on-require.yml +78 -0
  31. guarddog/analyzer/sourcecode/rubygems-serialize-environment.yml +38 -0
  32. guarddog/analyzer/sourcecode/screenshot.yml +38 -0
  33. guarddog/ecosystems.py +3 -0
  34. guarddog/scanners/__init__.py +6 -0
  35. guarddog/scanners/npm_project_scanner.py +1 -1
  36. guarddog/scanners/rubygems_package_scanner.py +112 -0
  37. guarddog/scanners/rubygems_project_scanner.py +75 -0
  38. guarddog/scanners/scanner.py +36 -12
  39. guarddog/utils/archives.py +1 -1
  40. guarddog-2.9.0.dist-info/METADATA +471 -0
  41. {guarddog-2.7.1.dist-info → guarddog-2.9.0.dist-info}/RECORD +46 -29
  42. {guarddog-2.7.1.dist-info → guarddog-2.9.0.dist-info}/WHEEL +1 -1
  43. guarddog-2.7.1.dist-info/METADATA +0 -40
  44. {guarddog-2.7.1.dist-info → guarddog-2.9.0.dist-info}/entry_points.txt +0 -0
  45. {guarddog-2.7.1.dist-info → guarddog-2.9.0.dist-info}/licenses/LICENSE +0 -0
  46. {guarddog-2.7.1.dist-info → guarddog-2.9.0.dist-info}/licenses/LICENSE-3rdparty.csv +0 -0
  47. {guarddog-2.7.1.dist-info → guarddog-2.9.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,26 @@
1
+ from typing import Type
2
+
3
+ from guarddog.analyzer.metadata import Detector
4
+ from guarddog.analyzer.metadata.rubygems.typosquatting import RubyGemsTyposquatDetector
5
+ from guarddog.analyzer.metadata.rubygems.empty_information import (
6
+ RubyGemsEmptyInfoDetector,
7
+ )
8
+ from guarddog.analyzer.metadata.rubygems.release_zero import RubyGemsReleaseZeroDetector
9
+ from guarddog.analyzer.metadata.rubygems.bundled_binary import RubyGemsBundledBinary
10
+ from guarddog.analyzer.metadata.rubygems.repository_integrity_mismatch import (
11
+ RubyGemsIntegrityMismatchDetector,
12
+ )
13
+
14
+ RUBYGEMS_METADATA_RULES = {}
15
+
16
+ classes: list[Type[Detector]] = [
17
+ RubyGemsTyposquatDetector,
18
+ RubyGemsEmptyInfoDetector,
19
+ RubyGemsReleaseZeroDetector,
20
+ RubyGemsBundledBinary,
21
+ RubyGemsIntegrityMismatchDetector,
22
+ ]
23
+
24
+ for detectorClass in classes:
25
+ detectorInstance = detectorClass() # type: ignore
26
+ RUBYGEMS_METADATA_RULES[detectorInstance.get_name()] = detectorInstance
@@ -0,0 +1,13 @@
1
+ from guarddog.analyzer.metadata.bundled_binary import BundledBinary
2
+ from typing import Optional
3
+
4
+
5
+ class RubyGemsBundledBinary(BundledBinary):
6
+ def detect(
7
+ self,
8
+ package_info,
9
+ path: Optional[str] = None,
10
+ name: Optional[str] = None,
11
+ version: Optional[str] = None,
12
+ ) -> tuple[bool, str]:
13
+ return super().detect(package_info, path, name, version)
@@ -0,0 +1,24 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ from guarddog.analyzer.metadata.empty_information import EmptyInfoDetector
5
+
6
+ log = logging.getLogger("guarddog")
7
+
8
+
9
+ class RubyGemsEmptyInfoDetector(EmptyInfoDetector):
10
+ def detect(
11
+ self,
12
+ package_info,
13
+ path: Optional[str] = None,
14
+ name: Optional[str] = None,
15
+ version: Optional[str] = None,
16
+ ) -> tuple[bool, str]:
17
+ log.debug(f"Running RubyGems empty description heuristic on package {name}")
18
+ info = package_info.get("info", "")
19
+ if info is None:
20
+ info = ""
21
+ return (
22
+ len(info.strip()) == 0,
23
+ EmptyInfoDetector.MESSAGE_TEMPLATE % "RubyGems",
24
+ )
@@ -0,0 +1,22 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
5
+
6
+ log = logging.getLogger("guarddog")
7
+
8
+
9
+ class RubyGemsReleaseZeroDetector(ReleaseZeroDetector):
10
+ def detect(
11
+ self,
12
+ package_info,
13
+ path: Optional[str] = None,
14
+ name: Optional[str] = None,
15
+ version: Optional[str] = None,
16
+ ) -> tuple[bool, str]:
17
+ log.debug(f"Running zero version heuristic on RubyGems package {name}")
18
+ gem_version = package_info.get("version", "")
19
+ return (
20
+ gem_version in ["0.0.0", "0.0"],
21
+ ReleaseZeroDetector.MESSAGE_TEMPLATE % gem_version,
22
+ )
@@ -0,0 +1,49 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import urllib3.util
5
+
6
+ from guarddog.analyzer.metadata.repository_integrity_mismatch import IntegrityMismatch
7
+
8
+ log = logging.getLogger("guarddog")
9
+
10
+
11
+ def normalize_github_url(url):
12
+ if url is None:
13
+ return None
14
+ url = url.strip()
15
+ if url.endswith(".git"):
16
+ url = url[:-4]
17
+ if url.startswith("git://"):
18
+ url = url.replace("git://", "https://")
19
+ if url.startswith("http://"):
20
+ url = url.replace("http://", "https://")
21
+ parsed = urllib3.util.parse_url(url)
22
+ if parsed.host not in ("github.com", "www.github.com"):
23
+ return None
24
+ return url
25
+
26
+
27
+ class RubyGemsIntegrityMismatchDetector(IntegrityMismatch):
28
+ EXCLUDED_EXTENSIONS = [".md", ".txt", ".rdoc"]
29
+
30
+ def extract_github_url(self, package_info, name: str) -> Optional[str]:
31
+ """Extract GitHub URL from RubyGems metadata."""
32
+ source_code_uri = package_info.get("source_code_uri")
33
+ homepage_uri = package_info.get("homepage_uri")
34
+
35
+ github_url = normalize_github_url(source_code_uri)
36
+ if github_url is None:
37
+ github_url = normalize_github_url(homepage_uri)
38
+
39
+ return github_url
40
+
41
+ def get_base_path(self, path: str, name: str) -> str:
42
+ """RubyGems: files are extracted directly to the path."""
43
+ return path
44
+
45
+ def get_version(self, package_info, version: Optional[str]) -> Optional[str]:
46
+ """Get version from RubyGems metadata or use provided version."""
47
+ if version is None:
48
+ version = package_info.get("version")
49
+ return version
@@ -0,0 +1,91 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
5
+
6
+ log = logging.getLogger("guarddog")
7
+
8
+
9
+ class RubyGemsTyposquatDetector(TyposquatDetector):
10
+ """
11
+ Detector for typosquatting attacks on RubyGems.
12
+ Checks for distance one Levenshtein, one-off character swaps,
13
+ permutations around hyphens, and substrings.
14
+
15
+ Attributes:
16
+ popular_packages (set): set of critical/popular gems from ecosyste.ms
17
+ """
18
+
19
+ def _get_top_packages(self) -> set:
20
+ """
21
+ Gets the top 1000 critical RubyGems packages.
22
+ Uses the base class implementation with RubyGems-specific parameters.
23
+ """
24
+ url = "https://packages.ecosyste.ms/api/v1/registries/rubygems.org/package_names?critical=true&per_page=1000"
25
+ return self._get_top_packages_with_refresh(
26
+ packages_filename="top_rubygems_packages.json",
27
+ popular_packages_url=url,
28
+ refresh_days=30,
29
+ )
30
+
31
+ def detect(
32
+ self,
33
+ package_info,
34
+ path: Optional[str] = None,
35
+ name: Optional[str] = None,
36
+ version: Optional[str] = None,
37
+ ) -> tuple[bool, Optional[str]]:
38
+ """
39
+ Uses a gem's information to determine if it's attempting
40
+ a typosquatting attack.
41
+ """
42
+ gem_name = package_info.get("name", name)
43
+ log.debug(f"Running typosquatting heuristic on RubyGems package {gem_name}")
44
+
45
+ similar_package_names = self.get_typosquatted_package(gem_name)
46
+ if len(similar_package_names) > 0:
47
+ return True, TyposquatDetector.MESSAGE_TEMPLATE % ", ".join(
48
+ similar_package_names
49
+ )
50
+ return False, None
51
+
52
+ def _get_confused_forms(self, package_name) -> list:
53
+ """
54
+ Gets confused terms for Ruby gems.
55
+ Confused terms are:
56
+ - ruby to rb swaps (or vice versa)
57
+ - the removal of ruby/rb terms
58
+ - rails to ruby-on-rails swaps
59
+
60
+ Args:
61
+ package_name (str): name of the package
62
+
63
+ Returns:
64
+ list: list of confused terms
65
+ """
66
+ confused_forms = []
67
+
68
+ terms = package_name.split("-")
69
+
70
+ for i in range(len(terms)):
71
+ confused_term = None
72
+
73
+ if "ruby" in terms[i]:
74
+ confused_term = terms[i].replace("ruby", "rb")
75
+ elif "rb" in terms[i]:
76
+ confused_term = terms[i].replace("rb", "ruby")
77
+ else:
78
+ continue
79
+
80
+ replaced_form = terms[:i] + [confused_term] + terms[i + 1 :]
81
+ removed_form = terms[:i] + terms[i + 1 :]
82
+
83
+ for form in (replaced_form, removed_form):
84
+ confused_forms.append("-".join(form))
85
+
86
+ if package_name == "rails":
87
+ confused_forms.append("ruby-on-rails")
88
+ elif package_name == "ruby-on-rails":
89
+ confused_forms.append("rails")
90
+
91
+ return confused_forms
@@ -1,7 +1,18 @@
1
1
  import abc
2
+ import json
3
+ import logging
4
+ import os
5
+ import time
6
+ from datetime import datetime, timedelta
2
7
  from itertools import permutations
8
+ from typing import Optional
9
+
10
+ import requests
3
11
 
4
12
  from guarddog.analyzer.metadata.detector import Detector
13
+ from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
14
+
15
+ log = logging.getLogger("guarddog")
5
16
 
6
17
 
7
18
  class TyposquatDetector(Detector):
@@ -19,8 +30,215 @@ class TyposquatDetector(Detector):
19
30
 
20
31
  @abc.abstractmethod
21
32
  def _get_top_packages(self) -> set:
33
+ """
34
+ Subclasses should implement this to return a set of top package names.
35
+
36
+ For simple implementations without network refresh, override this directly.
37
+ For implementations with network refresh, use _get_top_packages_with_refresh().
38
+ """
22
39
  pass
23
40
 
41
+ def _get_top_packages_with_refresh(
42
+ self,
43
+ packages_filename: str,
44
+ popular_packages_url: Optional[str] = None,
45
+ refresh_days: int = 30,
46
+ ) -> set:
47
+ """
48
+ Common implementation for getting top packages with optional network refresh.
49
+
50
+ Args:
51
+ packages_filename: Name of the JSON file (e.g., "top_pypi_packages.json")
52
+ popular_packages_url: URL to fetch fresh package data. If None, refresh is disabled.
53
+ refresh_days: Number of days before file is considered expired
54
+
55
+ Returns:
56
+ set: Set of package names
57
+ """
58
+ resources_dir = TOP_PACKAGES_CACHE_LOCATION
59
+ if resources_dir is None:
60
+ resources_dir = os.path.abspath(
61
+ os.path.join(os.path.dirname(__file__), "resources")
62
+ )
63
+
64
+ top_packages_path = os.path.join(resources_dir, packages_filename)
65
+ log.debug(f"Loading cache from: {top_packages_path}")
66
+
67
+ cache_data = self._load_cache_file(top_packages_path)
68
+
69
+ if cache_data:
70
+ log.debug(f"Cache loaded successfully with keys: {list(cache_data.keys())}")
71
+ else:
72
+ log.debug("Cache is empty or invalid")
73
+
74
+ top_packages_information = cache_data.get("packages") if cache_data else None
75
+
76
+ # Enable refresh if URL is provided
77
+ enable_refresh = popular_packages_url is not None
78
+ is_expired = self._cache_is_expired(cache_data, days=refresh_days)
79
+ log.debug(
80
+ f"Cache expired check: {is_expired} (refresh enabled: {enable_refresh})"
81
+ )
82
+
83
+ if enable_refresh and is_expired and popular_packages_url is not None:
84
+ log.info(
85
+ f"Cache is expired, attempting to refresh from: {popular_packages_url}"
86
+ )
87
+ new_response_data = self._get_top_packages_network_raw(popular_packages_url)
88
+ if new_response_data is not None:
89
+ log.debug("Downloaded new data, extracting package names")
90
+ top_packages_information = self._extract_package_names(
91
+ new_response_data
92
+ )
93
+
94
+ # Save with new standardized format
95
+ cache_data = {
96
+ "downloaded_timestamp": int(time.time()),
97
+ "packages": top_packages_information,
98
+ }
99
+
100
+ if top_packages_information is not None:
101
+ log.info(
102
+ f"Saving refreshed cache with {len(top_packages_information)} packages to {top_packages_path}"
103
+ )
104
+ with open(top_packages_path, "w+") as f:
105
+ json.dump(cache_data, f, ensure_ascii=False, indent=4)
106
+ else:
107
+ log.warning(
108
+ f"Failed to download new cache data from {popular_packages_url}"
109
+ )
110
+
111
+ if top_packages_information is None:
112
+ return set()
113
+
114
+ return set(top_packages_information)
115
+
116
+ def _cache_is_expired(self, cache_data: dict | None, days: int) -> bool:
117
+ """
118
+ Check if cache data is expired based on downloaded_timestamp.
119
+
120
+ Args:
121
+ cache_data: Cache dictionary with 'downloaded_timestamp' key
122
+ days: Number of days before cache is considered expired
123
+
124
+ Returns:
125
+ bool: True if expired or timestamp missing, False otherwise
126
+ """
127
+ if cache_data is None:
128
+ log.debug("Cache is expired: cache_data is None")
129
+ return True
130
+
131
+ timestamp = cache_data.get("downloaded_timestamp")
132
+ if timestamp is None:
133
+ # Missing timestamp, consider expired
134
+ log.debug("Cache is expired: missing 'downloaded_timestamp' field")
135
+ return True
136
+
137
+ try:
138
+ download_time = datetime.fromtimestamp(timestamp)
139
+ age = datetime.now() - download_time
140
+ is_expired = age > timedelta(days=days)
141
+ log.debug(
142
+ f"Cache age: {age.days} days, threshold: {days} days, expired: {is_expired}"
143
+ )
144
+ return is_expired
145
+ except (ValueError, OSError) as e:
146
+ # Invalid timestamp
147
+ log.debug(f"Cache is expired: invalid timestamp {timestamp} - {e}")
148
+ return True
149
+
150
+ def _load_cache_file(self, path: str) -> dict | None:
151
+ """
152
+ Load cache data from local JSON file.
153
+
154
+ Expected format: {"downloaded_timestamp": epoch, "packages": [...]}
155
+
156
+ If the file doesn't match this format, it will be considered invalid
157
+ and trigger a refresh to download data in the correct format.
158
+
159
+ Args:
160
+ path: Path to the JSON file
161
+
162
+ Returns:
163
+ dict: Cache data with 'packages' and 'downloaded_timestamp', or None if invalid
164
+ """
165
+ try:
166
+ with open(path, "r") as f:
167
+ result = json.load(f)
168
+
169
+ # Validate new format structure
170
+ if (
171
+ isinstance(result, dict)
172
+ and "packages" in result
173
+ and "downloaded_timestamp" in result
174
+ ):
175
+ # Validate that packages is a list
176
+ if isinstance(result["packages"], list):
177
+ return result
178
+ else:
179
+ log.warning(
180
+ f"Invalid cache format in {path}: 'packages' must be a list. Will trigger refresh."
181
+ )
182
+ return None
183
+
184
+ # File doesn't have the correct format - invalidate it
185
+ log.info(
186
+ f"Cache file {path} has old or invalid format. Will trigger refresh to new format."
187
+ )
188
+ return None
189
+
190
+ except FileNotFoundError:
191
+ log.debug(f"Cache file not found: {path}")
192
+ return None
193
+ except json.JSONDecodeError:
194
+ log.error(f"Invalid JSON in file: {path}")
195
+ return None
196
+
197
+ def _get_top_packages_network_raw(self, url: str) -> dict | list | None:
198
+ """
199
+ Fetch the complete response data from the network.
200
+ Returns the full JSON structure to preserve format when saving.
201
+
202
+ Args:
203
+ url: URL to fetch package data from
204
+
205
+ Returns:
206
+ dict | list: Full response data or None on error
207
+ """
208
+ try:
209
+ response = requests.get(url)
210
+ response.raise_for_status()
211
+ return response.json()
212
+ except json.JSONDecodeError:
213
+ log.error(f'Couldn\'t convert to json: "{response.text}"')
214
+ return None
215
+ except requests.exceptions.RequestException as e:
216
+ log.error(f"Network error: {e}")
217
+ return None
218
+
219
+ def _extract_package_names(self, data: dict | list | None) -> list | None:
220
+ """
221
+ Extract package names from the raw data structure.
222
+
223
+ Override this method in subclasses if the data format is specific to the ecosystem.
224
+ Default implementation assumes data is already a list of package names.
225
+
226
+ Args:
227
+ data: Raw data from JSON file or network response
228
+
229
+ Returns:
230
+ list: List of package names or None
231
+ """
232
+ if data is None:
233
+ return None
234
+
235
+ # Default: assume data is already a list
236
+ if isinstance(data, list):
237
+ return data
238
+
239
+ # If it's a dict, subclasses should override this method
240
+ return None
241
+
24
242
  def _is_distance_one_Levenshtein(self, name1, name2) -> bool:
25
243
  """
26
244
  Returns True if two names have a Levenshtein distance of one
@@ -2,6 +2,7 @@ from datetime import datetime, timezone
2
2
  from functools import cache
3
3
  from typing import Optional
4
4
 
5
+ import hashlib
5
6
  import whois # type: ignore
6
7
 
7
8
  NPM_MAINTAINER_EMAIL_WARNING = (
@@ -53,3 +54,25 @@ def extract_email_address_domain(email_address: str):
53
54
 
54
55
  except IndexError:
55
56
  raise ValueError(f"Invalid email address: {email_address}")
57
+
58
+
59
+ def get_file_hash(path: str) -> tuple[str, list[str]]:
60
+ """
61
+ Gets the sha256 of the file
62
+
63
+ Args:
64
+ path (str): Full file path
65
+
66
+ Returns:
67
+ str: The SHA256 hash of the file as a hexadecimal string
68
+ list: The file contents as a list of lines
69
+ """
70
+ with open(path, "rb") as f:
71
+ # Read the contents of the file
72
+ file_contents = f.read()
73
+ # Create a hash object
74
+ hash_object = hashlib.sha256()
75
+ # Feed the file contents to the hash object
76
+ hash_object.update(file_contents)
77
+ # Get the hexadecimal hash value
78
+ return hash_object.hexdigest(), str(file_contents).strip().splitlines()
@@ -89,6 +89,8 @@ for file_name in semgrep_rule_file_names:
89
89
  ecosystems.add(ECOSYSTEM.EXTENSION)
90
90
  case "go":
91
91
  ecosystems.add(ECOSYSTEM.GO)
92
+ case "ruby":
93
+ ecosystems.add(ECOSYSTEM.RUBYGEMS)
92
94
  case _:
93
95
  continue
94
96
 
@@ -1,42 +1,37 @@
1
1
  rules:
2
- - id: api-obfuscation
3
- languages:
4
- - python
5
- message: This package uses obfuscated API calls that may evade static analysis detection
6
- metadata:
7
- description: Identify obfuscated API calls using alternative Python syntax patterns
8
- severity: WARNING
9
- patterns:
10
- - pattern-either:
11
- # Covered cases:
12
- # 1) __dict__ access patterns: $MODULE.__dict__[$METHOD](...) / .__call__(...)
13
- # 2) __getattribute__ patterns: $MODULE.__getattribute__($METHOD)(...) / .__call__(...)
14
- # 3) getattr patterns: getattr($MODULE, $METHOD)(...) / .__call__(...)
15
- # It also covers the case where $MODULE is imported as __import__('mod')
16
- - patterns:
17
- - pattern-either:
18
- - pattern: $MODULE.__dict__[$METHOD]($...ARGS)
19
- - pattern: $MODULE.__dict__[$METHOD].__call__($...ARGS)
20
- - pattern: $MODULE.__getattribute__($METHOD)($...ARGS)
21
- - pattern: $MODULE.__getattribute__($METHOD).__call__($...ARGS)
22
- - pattern: getattr($MODULE, $METHOD)($...ARGS)
23
- - pattern: getattr($MODULE, $METHOD).__call__($...ARGS)
24
- - metavariable-regex:
25
- metavariable: $MODULE
26
- regex: "^[A-Za-z_][A-Za-z0-9_\\.]*$|^__import__\\([\"'][A-Za-z_][A-Za-z0-9_]*[\"']\\)$"
27
- - metavariable-regex:
28
- metavariable: $METHOD
29
- regex: "^[\"'][A-Za-z_][A-Za-z0-9_]*[\"']$"
2
+ - id: api-obfuscation
3
+ languages:
4
+ - python
5
+ message: This package uses obfuscated API calls that may evade static analysis detection
6
+ metadata:
7
+ description: Identify obfuscated API calls using alternative Python syntax patterns
8
+ severity: WARNING
9
+ patterns:
10
+ - pattern-either:
11
+ # Covered cases:
12
+ # 1) __dict__ access patterns: $MODULE.__dict__[$METHOD](...) / .__call__(...)
13
+ # 2) __getattribute__ patterns: $MODULE.__getattribute__($METHOD)(...) / .__call__(...)
14
+ # 3) getattr patterns: getattr($MODULE, $METHOD)(...) / .__call__(...)
15
+ # It also covers the case where $MODULE is imported as __import__($mod),
16
+ # where $mod is a generic expression (e.g., string literal, variable, etc.)
17
+ - patterns:
18
+ - pattern-either:
19
+ - pattern: $MODULE.__dict__[$METHOD]($...ARGS)
20
+ - pattern: $MODULE.__dict__[$METHOD].__call__($...ARGS)
21
+ - pattern: $MODULE.__getattribute__($METHOD)($...ARGS)
22
+ - pattern: $MODULE.__getattribute__($METHOD).__call__($...ARGS)
23
+ - pattern: getattr($MODULE, $METHOD)($...ARGS)
24
+ - pattern: getattr($MODULE, $METHOD).__call__($...ARGS)
25
+ - metavariable-regex:
26
+ metavariable: $MODULE
27
+ regex: "^[A-Za-z_][A-Za-z0-9_\\.]*$|^__import__\\(.*\\)$"
30
28
 
31
- # --- Additional Cases: __import__('mod').method(...) / .__call__(...)
32
- - patterns:
33
- - pattern-either:
34
- - pattern: __import__($MODULE).$METHOD($...ARGS)
35
- - pattern: __import__($MODULE).$METHOD.__call__($...ARGS)
36
- - metavariable-regex:
37
- metavariable: $MODULE
38
- regex: "^[\"'][A-Za-z_][A-Za-z0-9_]*[\"']$"
39
- - metavariable-regex:
40
- metavariable: $METHOD
41
- # avoid matching __getattribute__
42
- regex: "[^(__getattribute__)][A-Za-z_][A-Za-z0-9_]*"
29
+ # --- Additional Cases: __import__('mod').method(...) / .__call__(...)
30
+ - patterns:
31
+ - pattern-either:
32
+ - pattern: __import__($MODULE).$METHOD($...ARGS)
33
+ - pattern: __import__($MODULE).$METHOD.__call__($...ARGS)
34
+ - metavariable-regex:
35
+ metavariable: $METHOD
36
+ # avoid matching __getattribute__
37
+ regex: "[^(__getattribute__)][A-Za-z_][A-Za-z0-9_]*"
@@ -114,6 +114,26 @@ rules:
114
114
  - pattern-either:
115
115
  - pattern: globals()['eval']($ARG1)
116
116
  - pattern: globals()['\x65\x76\x61\x6c']($ARG1) # that's "eval" in hexadecimal
117
+
118
+ # vars() indirection to access builtins
119
+ - pattern: vars(__builtins__)['compile']($ARG1, ...)
120
+ - pattern: vars(__builtins__)['exec']($ARG1)
121
+ - pattern: vars(__builtins__)['eval']($ARG1)
122
+
123
+ # vars().get() variant
124
+ - pattern: vars(__builtins__).get('compile')($ARG1, ...)
125
+ - pattern: vars(__builtins__).get('exec')($ARG1)
126
+ - pattern: vars(__builtins__).get('eval')($ARG1)
127
+
128
+ # vars/globals combinations
129
+ - pattern: vars(globals()['__builtins__'])['exec']($ARG1)
130
+ - pattern: vars(globals()['__builtins__'])['eval']($ARG1)
131
+ - pattern: vars(locals()['__builtins__'])['exec']($ARG1)
132
+ - pattern: vars(locals()['__builtins__'])['eval']($ARG1)
133
+
134
+ # Direct compile() calls
135
+ - pattern: compile($ARG1, '<string>', 'exec')
136
+ - pattern: compile($ARG1, '<string>', 'eval')
117
137
 
118
138
  - metavariable-pattern:
119
139
  metavariable: $ARG1
@@ -56,4 +56,23 @@ rules:
56
56
  - pattern: __import__("base64").b64decode(...)
57
57
  - pattern: marshal.loads(zlib.decompress(...))
58
58
  - pattern: $FUNC("...").decrypt(...)
59
+
60
+ # codecs.decode with base64 (all valid aliases)
61
+ - pattern: codecs.decode(..., 'base64')
62
+ - pattern: codecs.decode(..., 'base_64')
63
+ - pattern: codecs.decode(..., 'base-64')
64
+ - pattern: codecs.decode(..., 'BASE64')
65
+ - pattern: codecs.decode(..., 'BASE_64')
66
+ - pattern: codecs.decode(..., 'BASE-64')
67
+
68
+ # importlib + base64 module
69
+ - pattern: importlib.import_module('base64').b64decode(...)
70
+
71
+ # importlib + codecs module (all base64 aliases)
72
+ - pattern: importlib.import_module('codecs').decode(..., 'base64')
73
+ - pattern: importlib.import_module('codecs').decode(..., 'base_64')
74
+ - pattern: importlib.import_module('codecs').decode(..., 'base-64')
75
+ - pattern: importlib.import_module('codecs').decode(..., 'BASE64')
76
+ - pattern: importlib.import_module('codecs').decode(..., 'BASE_64')
77
+ - pattern: importlib.import_module('codecs').decode(..., 'BASE-64')
59
78
  severity: WARNING