guarddog 1.11.2__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {guarddog-1.11.2 → guarddog-2.0.1}/PKG-INFO +4 -2
  2. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/__init__.py +1 -0
  3. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/analyzer.py +99 -13
  4. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/__init__.py +3 -0
  5. guarddog-2.0.1/guarddog/analyzer/metadata/go/__init__.py +9 -0
  6. guarddog-2.0.1/guarddog/analyzer/sourcecode/__init__.py +108 -0
  7. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/dll-hijacking.yml +17 -0
  8. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/exfiltrate-sensitive-data.yml +12 -0
  9. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-dll-hijacking.yml +17 -2
  10. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-install-script.yml +11 -0
  11. guarddog-2.0.1/guarddog/analyzer/sourcecode/shady-links.yml +44 -0
  12. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/cli.py +116 -212
  13. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/ecosystems.py +3 -0
  14. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/reporters/sarif.py +11 -15
  15. guarddog-2.0.1/guarddog/scanners/__init__.py +54 -0
  16. guarddog-2.0.1/guarddog/scanners/go_package_scanner.py +75 -0
  17. guarddog-2.0.1/guarddog/scanners/go_project_scanner.py +68 -0
  18. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/pypi_package_scanner.py +8 -12
  19. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/scanner.py +7 -31
  20. guarddog-2.0.1/guarddog/utils/archives.py +78 -0
  21. {guarddog-1.11.2 → guarddog-2.0.1}/pyproject.toml +5 -2
  22. guarddog-1.11.2/guarddog/analyzer/sourcecode/__init__.py +0 -33
  23. guarddog-1.11.2/guarddog/analyzer/sourcecode/shady-links.yml +0 -41
  24. guarddog-1.11.2/guarddog/scanners/__init__.py +0 -21
  25. guarddog-1.11.2/guarddog/utils/archives.py +0 -30
  26. {guarddog-1.11.2 → guarddog-2.0.1}/LICENSE +0 -0
  27. {guarddog-1.11.2 → guarddog-2.0.1}/LICENSE-3rdparty.csv +0 -0
  28. {guarddog-1.11.2 → guarddog-2.0.1}/NOTICE +0 -0
  29. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/__main__.py +0 -0
  30. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/__init__.py +0 -0
  31. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/bundled_binary.py +0 -0
  32. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/deceptive_author.py +0 -0
  33. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/detector.py +0 -0
  34. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/empty_information.py +0 -0
  35. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/__init__.py +0 -0
  36. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/bundled_binary.py +0 -0
  37. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/deceptive_author.py +0 -0
  38. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/direct_url_dependency.py +0 -0
  39. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/empty_information.py +0 -0
  40. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/npm_metadata_mismatch.py +0 -0
  41. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +0 -0
  42. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/release_zero.py +0 -0
  43. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/typosquatting.py +0 -0
  44. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +0 -0
  45. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/utils.py +0 -0
  46. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/potentially_compromised_email_domain.py +0 -0
  47. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/__init__.py +0 -0
  48. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/bundled_binary.py +0 -0
  49. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/deceptive_author.py +0 -0
  50. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/empty_information.py +0 -0
  51. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +0 -0
  52. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/release_zero.py +0 -0
  53. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +0 -0
  54. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/single_python_file.py +0 -0
  55. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/typosquatting.py +0 -0
  56. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +0 -0
  57. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/utils.py +0 -0
  58. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/release_zero.py +0 -0
  59. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/repository_integrity_mismatch.py +0 -0
  60. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/resources/placeholder_email_domains.txt +0 -0
  61. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/resources/top_npm_packages.json +0 -0
  62. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/resources/top_pypi_packages.json +0 -0
  63. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/typosquatting.py +0 -0
  64. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +0 -0
  65. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/utils.py +0 -0
  66. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/bidirectional-characters.yml +0 -0
  67. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/clipboard-access.yml +0 -0
  68. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/cmd-overwrite.yml +0 -0
  69. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/code-execution.yml +0 -0
  70. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/download-executable.yml +0 -0
  71. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/exec-base64.yml +0 -0
  72. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-exec-base64.yml +0 -0
  73. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-exfiltrate-sensitive-data.yml +0 -0
  74. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-obfuscation.yml +0 -0
  75. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-serialize-environment.yml +0 -0
  76. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-silent-process-execution.yml +0 -0
  77. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-steganography.yml +0 -0
  78. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/obfuscation.yml +0 -0
  79. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/silent-process-execution.yml +0 -0
  80. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/steganography.yml +0 -0
  81. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/reporters/__init__.py +0 -0
  82. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/npm_package_scanner.py +0 -0
  83. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/npm_project_scanner.py +0 -0
  84. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/pypi_project_scanner.py +0 -0
  85. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/__init__.py +0 -0
  86. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/config.py +0 -0
  87. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/exceptions.py +0 -0
  88. {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/package_info.py +0 -0
  89. {guarddog-1.11.2 → guarddog-2.0.1}/pypi.rst +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: guarddog
3
- Version: 1.11.2
3
+ Version: 2.0.1
4
4
  Summary: GuardDog is a CLI tool to Identify malicious PyPI packages
5
5
  Home-page: https://github.com/DataDog/guarddog
6
6
  License: Apache-2.0
@@ -15,7 +15,7 @@ Requires-Dist: click (>=8.1.3,<9.0.0)
15
15
  Requires-Dist: click-option-group (>=0.5.5,<0.6.0)
16
16
  Requires-Dist: colorama (>=0.4.6,<0.5.0)
17
17
  Requires-Dist: configparser (>=5.3,<8.0)
18
- Requires-Dist: disposable-email-domains (>=0.0.103,<0.0.104)
18
+ Requires-Dist: disposable-email-domains (>=0.0.103,<0.0.105)
19
19
  Requires-Dist: prettytable (>=3.6.0,<4.0.0)
20
20
  Requires-Dist: pygit2 (>=1.11,<1.16)
21
21
  Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
@@ -24,9 +24,11 @@ Requires-Dist: pyyaml (>=6.0,<7.0)
24
24
  Requires-Dist: requests (>=2.29.0,<3.0.0)
25
25
  Requires-Dist: semantic-version (>=2.10.0,<3.0.0)
26
26
  Requires-Dist: semgrep (==1.67.0)
27
+ Requires-Dist: setuptools (>=70.3.0,<71.0.0)
27
28
  Requires-Dist: tarsafe (>=0.0.5,<0.0.6)
28
29
  Requires-Dist: termcolor (>=2.1.0,<3.0.0)
29
30
  Requires-Dist: urllib3 (==2.2.2)
31
+ Requires-Dist: yara-python (>=4.5.1,<5.0.0)
30
32
  Project-URL: Repository, https://github.com/DataDog/guarddog
31
33
  Description-Content-Type: text/x-rst
32
34
 
@@ -1,2 +1,3 @@
1
1
  from guarddog.scanners.npm_package_scanner import NPMPackageScanner # NOQA
2
2
  from guarddog.scanners.pypi_package_scanner import PypiPackageScanner # NOQA
3
+ from guarddog.scanners.go_package_scanner import GoModuleScanner # NOQA
@@ -2,12 +2,13 @@ import json
2
2
  import logging
3
3
  import os
4
4
  import subprocess
5
+ import yara # type: ignore
5
6
  from collections import defaultdict
6
7
  from pathlib import Path
7
- from typing import Iterable, Optional
8
+ from typing import Iterable, Optional, Dict
8
9
 
9
10
  from guarddog.analyzer.metadata import get_metadata_detectors
10
- from guarddog.analyzer.sourcecode import SOURCECODE_RULES
11
+ from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, YaraRule
11
12
  from guarddog.ecosystems import ECOSYSTEM
12
13
 
13
14
  SEMGREP_MAX_TARGET_BYTES = 10_000_000
@@ -24,6 +25,7 @@ class Analyzer:
24
25
  ecosystem (str): name of the current ecosystem
25
26
  metadata_ruleset (list): list of metadata rule names
26
27
  sourcecode_ruleset (list): list of source code rule names
28
+ ioc_ruleset (list): list of ioc rule names
27
29
 
28
30
  exclude (list): list of directories to exclude from source code search
29
31
 
@@ -32,14 +34,18 @@ class Analyzer:
32
34
 
33
35
  def __init__(self, ecosystem=ECOSYSTEM.PYPI) -> None:
34
36
  self.sourcecode_rules_path = os.path.join(os.path.dirname(__file__), "sourcecode")
35
-
36
37
  self.ecosystem = ecosystem
37
38
 
38
39
  # Rules and associated detectors
39
40
  self.metadata_detectors = get_metadata_detectors(ecosystem)
40
41
 
41
42
  self.metadata_ruleset: set[str] = set(self.metadata_detectors.keys())
42
- self.sourcecode_ruleset: set[str] = set(rule["id"] for rule in SOURCECODE_RULES[ecosystem])
43
+ self.semgrep_ruleset: set[str] = set(
44
+ r.id for r in get_sourcecode_rules(ecosystem, SempgrepRule)
45
+ )
46
+ self.yara_ruleset: set[str] = set(
47
+ r.id for r in get_sourcecode_rules(ecosystem, YaraRule)
48
+ )
43
49
 
44
50
  # Define paths to exclude from sourcecode analysis
45
51
  self.exclude = [
@@ -77,10 +83,7 @@ class Analyzer:
77
83
  sourcecode_results = None
78
84
 
79
85
  # populate results, errors, and number of issues
80
- log.debug(f"Running metadata rules against package '{name}'")
81
86
  metadata_results = self.analyze_metadata(path, info, rules, name, version)
82
-
83
- log.debug(f"Running source code rules against directory '{path}'")
84
87
  sourcecode_results = self.analyze_sourcecode(path, rules)
85
88
 
86
89
  # Concatenate dictionaries together
@@ -104,6 +107,8 @@ class Analyzer:
104
107
  dict[str]: map from each metadata rule and their corresponding output
105
108
  """
106
109
 
110
+ log.debug(f"Running metadata rules against package '{name}'")
111
+
107
112
  all_rules = self.metadata_ruleset
108
113
  if rules is not None:
109
114
  # filtering the full ruleset witht the user's input
@@ -139,11 +144,87 @@ class Analyzer:
139
144
  Returns:
140
145
  dict[str]: map from each source code rule and their corresponding output
141
146
  """
147
+ semgrepscan_results = self.analyze_semgrep(path, rules)
148
+
149
+ yarascan_results = self.analyze_yara(path, rules)
150
+
151
+ # Concatenate dictionaries together
152
+ issues = semgrepscan_results["issues"] + yarascan_results["issues"]
153
+ results = semgrepscan_results["results"] | yarascan_results["results"]
154
+ errors = semgrepscan_results["errors"] | yarascan_results["errors"]
155
+
156
+ return {"issues": issues, "errors": errors, "results": results, "path": path}
157
+
158
+ def analyze_yara(self, path: str, rules: Optional[set] = None) -> dict:
159
+ """
160
+ Analyzes the IOCs of a given package
161
+
162
+ Args:
163
+ path (str): path to package
164
+ rules (set, optional): Set of IOC rules to analyze. Defaults to all rules.
165
+
166
+ Returns:
167
+ dict[str]: map from each IOC rule and their corresponding output
168
+ """
169
+ log.debug(f"Running yara rules against directory '{path}'")
170
+
171
+ all_rules = self.yara_ruleset
172
+ if rules is not None:
173
+ # filtering the full ruleset witht the user's input
174
+ all_rules = self.yara_ruleset & rules
175
+
176
+ results = {rule: {} for rule in all_rules} # type: dict
177
+ errors: Dict[str, str] = {}
178
+ issues = 0
179
+
180
+ rules_path = {
181
+ rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yar")
182
+ for rule_name in all_rules
183
+ }
184
+
185
+ if len(rules_path) == 0:
186
+ log.debug("No yara rules to run")
187
+ return {"results": results, "errors": errors, "issues": issues}
188
+
189
+ try:
190
+ scan_rules = yara.compile(filepaths=rules_path)
191
+
192
+ for root, _, files in os.walk(path):
193
+ for f in files:
194
+ matches = scan_rules.match(os.path.join(root, f))
195
+ for m in matches:
196
+ for s in m.strings:
197
+ for i in s.instances:
198
+ rule_results = {
199
+ "location": f"{f}:{i.offset}",
200
+ "code": self.trim_code_snippet(str(i.matched_data)),
201
+ 'message': m.meta.get("description", f"{m.rule} rule matched")
202
+ }
203
+ issues += len(m.strings)
204
+ results[m.rule].update(rule_results)
205
+ except Exception as e:
206
+ errors["rules-all"] = f"failed to run rule: {str(e)}"
207
+
208
+ return {"results": results, "errors": errors, "issues": issues}
209
+
210
+ def analyze_semgrep(self, path, rules=None) -> dict:
211
+ """
212
+ Analyzes the source code of a given package
213
+
214
+ Args:
215
+ path (str): path to directory of package
216
+ rules (set, optional): Set of source code rules to analyze. Defaults to all rules.
217
+
218
+ Returns:
219
+ dict[str]: map from each source code rule and their corresponding output
220
+ """
221
+ log.debug(f"Running semgrep rules against directory '{path}'")
222
+
142
223
  targetpath = Path(path)
143
- all_rules = self.sourcecode_ruleset
224
+ all_rules = self.semgrep_ruleset
144
225
  if rules is not None:
145
226
  # filtering the full ruleset witht the user's input
146
- all_rules = self.sourcecode_ruleset & rules
227
+ all_rules = self.semgrep_ruleset & rules
147
228
 
148
229
  results = {rule: {} for rule in all_rules} # type: dict
149
230
  errors = {}
@@ -155,11 +236,11 @@ class Analyzer:
155
236
  ))
156
237
 
157
238
  if len(rules_path) == 0:
158
- log.debug("No source code rules to run")
239
+ log.debug("No semgrep code rules to run")
159
240
  return {"results": {}, "errors": {}, "issues": 0}
160
241
 
161
242
  try:
162
- log.debug(f"Running source code rules against {path}")
243
+ log.debug(f"Running semgrep code rules against {path}")
163
244
  response = self._invoke_semgrep(target=path, rules=rules_path)
164
245
  rule_results = self._format_semgrep_response(response, targetpath=targetpath)
165
246
  issues += sum(len(res) for res in rule_results.values())
@@ -240,11 +321,16 @@ output: {e.output}
240
321
  location = file_path + ":" + str(line)
241
322
  code = self.trim_code_snippet(code_snippet)
242
323
 
243
- results[rule_name].append({
324
+ finding = {
244
325
  'location': location,
245
326
  'code': code,
246
327
  'message': result["extra"]["message"]
247
- })
328
+ }
329
+
330
+ rule_results = results[rule_name]
331
+ if finding in rule_results:
332
+ continue
333
+ results[rule_name].append(finding)
248
334
 
249
335
  return results
250
336
 
@@ -1,6 +1,7 @@
1
1
  from guarddog.analyzer.metadata.detector import Detector
2
2
  from guarddog.analyzer.metadata.npm import NPM_METADATA_RULES
3
3
  from guarddog.analyzer.metadata.pypi import PYPI_METADATA_RULES
4
+ from guarddog.analyzer.metadata.go import GO_METADATA_RULES
4
5
  from guarddog.ecosystems import ECOSYSTEM
5
6
 
6
7
 
@@ -10,3 +11,5 @@ def get_metadata_detectors(ecosystem: ECOSYSTEM) -> dict[str, Detector]:
10
11
  return PYPI_METADATA_RULES
11
12
  case ECOSYSTEM.NPM:
12
13
  return NPM_METADATA_RULES
14
+ case ECOSYSTEM.GO:
15
+ return GO_METADATA_RULES
@@ -0,0 +1,9 @@
1
+ from guarddog.analyzer.metadata import Detector
2
+
3
+ GO_METADATA_RULES = {}
4
+
5
+ classes: list[Detector] = []
6
+
7
+ for detectorClass in classes:
8
+ detectorInstance = detectorClass() # type: ignore
9
+ GO_METADATA_RULES[detectorInstance.get_name()] = detectorInstance
@@ -0,0 +1,108 @@
1
+ import os
2
+ import pathlib
3
+ from dataclasses import dataclass
4
+ from typing import Optional, Iterable
5
+
6
+ import yaml
7
+ from yaml.loader import SafeLoader
8
+
9
+ from guarddog.ecosystems import ECOSYSTEM
10
+
11
+ current_dir = pathlib.Path(__file__).parent.resolve()
12
+
13
+
14
+ # These data class aim to reduce the spreading of the logic
15
+ # Instead of using the a dict as a structure and parse it difffently depending on the type
16
+ @dataclass
17
+ class SourceCodeRule:
18
+ """
19
+ Base class for source code rules
20
+ """
21
+ id: str
22
+ file: str
23
+
24
+
25
+ @dataclass
26
+ class YaraRule(SourceCodeRule):
27
+ """
28
+ Yara rule just reimplements base
29
+ """
30
+ pass
31
+
32
+
33
+ @dataclass
34
+ class SempgrepRule(SourceCodeRule):
35
+ """
36
+ Semgrep rule are language specific
37
+ Content of rule in yaml format is accessible through rule_content
38
+ """
39
+ description: str
40
+ ecosystem: ECOSYSTEM
41
+ rule_content: dict
42
+
43
+
44
+ def get_sourcecode_rules(
45
+ ecosystem: ECOSYSTEM, kind: Optional[type] = None
46
+ ) -> Iterable[SourceCodeRule]:
47
+ """
48
+ This function returns the source code rules for a given ecosystem and kind.
49
+ Args:
50
+ ecosystem: The ecosystem to filter for if rules are ecosystem specific
51
+ kind: The kind of rule to filter for
52
+ """
53
+ for rule in SOURCECODE_RULES:
54
+ if kind and not isinstance(rule, kind):
55
+ continue
56
+ if not (getattr(rule, "ecosystem", ecosystem) == ecosystem):
57
+ continue
58
+ yield rule
59
+
60
+
61
+ SOURCECODE_RULES: list[SourceCodeRule] = list()
62
+
63
+ semgrep_rule_file_names = list(
64
+ filter(lambda x: x.endswith("yml"), os.listdir(current_dir))
65
+ )
66
+ # all yml files placed in the sourcecode directory are loaded as semgrep rules
67
+ # refer to README.md for more information
68
+ for file_name in semgrep_rule_file_names:
69
+ with open(os.path.join(current_dir, file_name), "r") as fd:
70
+ data = yaml.load(fd, Loader=SafeLoader)
71
+ for rule in data["rules"]:
72
+ for lang in rule["languages"]:
73
+ ecosystem = None
74
+ match lang:
75
+ case "python":
76
+ ecosystem = ECOSYSTEM.PYPI
77
+ case "javascript" | "typescript" | "json":
78
+ ecosystem = ECOSYSTEM.NPM
79
+ case "go":
80
+ ecosystem = ECOSYSTEM.GO
81
+ case _:
82
+ continue
83
+
84
+ # avoids duplicates when multiple languages are supported by a rule
85
+ if not next(
86
+ filter(
87
+ lambda r: r.id == rule["id"],
88
+ get_sourcecode_rules(ecosystem, SempgrepRule),
89
+ ),
90
+ None,
91
+ ):
92
+ SOURCECODE_RULES.append(
93
+ SempgrepRule(
94
+ id=rule["id"],
95
+ ecosystem=ecosystem,
96
+ description=rule.get("metadata", {}).get("description", ""),
97
+ file=file_name,
98
+ rule_content=rule,
99
+ )
100
+ )
101
+
102
+ yara_rule_file_names = list(
103
+ filter(lambda x: x.endswith("yar"), os.listdir(current_dir))
104
+ )
105
+ # all yar files placed in the sourcecode directory are loaded as YARA rules
106
+ # refer to README.md for more information
107
+ for file_name in yara_rule_file_names:
108
+ SOURCECODE_RULES.append(YaraRule(id=pathlib.Path(file_name).stem, file=file_name))
@@ -11,6 +11,8 @@ rules:
11
11
  - pattern-either:
12
12
  - patterns:
13
13
  - pattern: "$DLL_LOAD"
14
+ # Ignore docstrings
15
+ - pattern-not-regex: ^\s*"""(.|\n)*?"""\s*$
14
16
  - metavariable-pattern:
15
17
  metavariable: $DLL_LOAD
16
18
  pattern-either:
@@ -20,6 +22,21 @@ rules:
20
22
  - pattern-regex: (?i).*?\/bin/.+\s+.*?\.so
21
23
  # environment preload
22
24
  - pattern-regex: LD_PRELOAD
25
+ # MITRE ATT&CK "System Binary Proxy Execution" techniques
26
+ # https://attack.mitre.org/techniques/T1218/
27
+ - pattern-regex: (?i)control(.exe)?\s+\S+.cpl
28
+ - pattern-regex: (?i)cmstp(.exe)?\s+\S+
29
+ - pattern-regex: (?i)InstallUtil(.exe)?\s+\S+
30
+ - pattern-regex: (?i)mshta(.exe)?\s+\S+
31
+ - pattern-regex: (?i)msiexec(.exe)?\s+\S+
32
+ - pattern-regex: (?i)odbcconf(.exe)?\s+.*{\s*REGSVR\s+\S+\s*}
33
+ - pattern-regex: (?i)regsvcs(.exe)?\s+\S+
34
+ - pattern-regex: (?i)regasm(.exe)?\s+\S+
35
+ - pattern-regex: (?i)regsvr32(.exe)?\s+\S+
36
+ - pattern-regex: (?i)rundll32(.exe)?\s+\S+
37
+ - pattern-regex: (?i)verclsid(.exe)?\s+.*{\s*\S+\s*}
38
+ - pattern-regex: (?i)mavinject(.exe)?\s+\d+\s+/INJECTRUNNING\s+\S+
39
+ - pattern-regex: (?i)mmc(.exe)?\s+-Embedding\s+\S+.ms
23
40
  - patterns:
24
41
  - pattern: $FN($EXE,...,$DLL)
25
42
  - metavariable-pattern:
@@ -31,6 +31,18 @@ rules:
31
31
  - metavariable-regex:
32
32
  metavariable: $ENVVAR
33
33
  regex: ([\"\'](AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN)[\"\'])
34
+ - patterns:
35
+ - pattern-inside: |
36
+ $CONNECT = sqlite3.connect(...)
37
+ ...
38
+ $CURSOR = $CONNECT.cursor(...)
39
+ ...
40
+ - pattern: $CURSOR.execute($QUERY, ...)
41
+ - metavariable-pattern:
42
+ metavariable: $QUERY
43
+ patterns:
44
+ - pattern: "..."
45
+ - pattern-regex: (?i)(cookies|credit_cards|logins|moz_cookies|moz_formhistory|moz_logins)
34
46
  pattern-sinks:
35
47
  - pattern-either:
36
48
  - pattern-inside: requests.$METHOD(...)
@@ -2,7 +2,7 @@ rules:
2
2
  - id: npm-dll-hijacking
3
3
  languages:
4
4
  - javascript
5
- message: This package manipulates a trusted application into loading a malicious dll
5
+ message: This package manipulates a trusted application into loading a malicious DLL
6
6
  metadata:
7
7
  description: Identifies when a malicious package manipulates a trusted application into loading a malicious DLL
8
8
  pattern-either:
@@ -20,6 +20,21 @@ rules:
20
20
  - pattern-regex: (?i).*?\/bin/.+\s+.*?\.so
21
21
  # environment preload
22
22
  - pattern-regex: LD_PRELOAD
23
+ # MITRE ATT&CK "System Binary Proxy Execution" techniques
24
+ # https://attack.mitre.org/techniques/T1218/
25
+ - pattern-regex: (?i)control(.exe)?\s+\S+.cpl
26
+ - pattern-regex: (?i)cmstp(.exe)?\s+\S+
27
+ - pattern-regex: (?i)InstallUtil(.exe)?\s+\S+
28
+ - pattern-regex: (?i)mshta(.exe)?\s+\S+
29
+ - pattern-regex: (?i)msiexec(.exe)?\s+\S+
30
+ - pattern-regex: (?i)odbcconf(.exe)?\s+.*{\s*REGSVR\s+\S+\s*}
31
+ - pattern-regex: (?i)regsvcs(.exe)?\s+\S+
32
+ - pattern-regex: (?i)regasm(.exe)?\s+\S+
33
+ - pattern-regex: (?i)regsvr32(.exe)?\s+\S+
34
+ - pattern-regex: (?i)rundll32(.exe)?\s+\S+
35
+ - pattern-regex: (?i)verclsid(.exe)?\s+.*{\s*\S+\s*}
36
+ - pattern-regex: (?i)mavinject(.exe)?\s+\d+\s+/INJECTRUNNING\s+\S+
37
+ - pattern-regex: (?i)mmc(.exe)?\s+-Embedding\s+\S+.ms
23
38
  - patterns:
24
39
  - pattern: $FN($EXE,...,$DLL)
25
40
  - metavariable-pattern:
@@ -58,7 +73,7 @@ rules:
58
73
  - pattern: ....appendFile
59
74
  - metavariable-pattern:
60
75
  metavariable: $EXE
61
- patterns:
76
+ patterns:
62
77
  # a string with .exe or /bin/[whatever] in it
63
78
  - pattern: "..."
64
79
  - pattern-regex: (?i).*?(\.exe|\/bin/.+)
@@ -10,6 +10,17 @@ rules:
10
10
  # (typically when a dependency is a git repository, see https://github.com/npm/cli/issues/6031#issuecomment-1449119423)
11
11
  # however this happens pretty rarely so reporting every package with a "prepare" script would be too noisy;
12
12
  # see https://github.com/DataDog/guarddog/issues/308
13
+ - pattern-not: |
14
+ "...": "npx only-allow pnpm"
15
+ - pattern-not: |
16
+ "...": ""
17
+ - pattern-not: |
18
+ "...": "patch-package"
19
+ - pattern-not: |
20
+ "...": "husky"
21
+ - pattern-not: |
22
+ "preinstall": "echo \"preinstall script\""
23
+
13
24
  - pattern-either:
14
25
  - pattern: |
15
26
  "preinstall": "..."
@@ -0,0 +1,44 @@
1
+ # TODO: Detects these links well, but lots of legitimate packages seem to use these domain extensions
2
+ rules:
3
+ - id: shady-links
4
+ message: This package contains an URL to a domain with a suspicious extension
5
+ metadata:
6
+ description: Identify when a package contains an URL to a domain with a suspicious extension
7
+ patterns:
8
+
9
+ # ignore comments
10
+ - pattern-not-regex: ^\s*\# .*
11
+ - pattern-not-regex: ^\s*\/\*(.|\n)*?\*\/\s*$
12
+ - pattern-not-regex: ^\s*\/\/.*$
13
+
14
+ # ignore docstring
15
+ - pattern-not-regex: ^\s*"""(.|\n)*?"""\s*$
16
+
17
+ # Exclude local IPv4 sometimes used in tests
18
+ - pattern-not-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:192\.168|10\.\d{1,3}|172\.(?:1[6-9]|2\d|3[0-1])|127\.\d{1,3})\.\d{1,3}\.\d{1,3}|0\.0\.0\.0|localhost)
19
+
20
+ # Exclude public IPv4 sometimes used in tests
21
+ - pattern-not-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:1\.1\.1\.1|8\.8\.8\.8))
22
+
23
+ - patterns:
24
+ - pattern: ("...")
25
+ - pattern-either:
26
+ # complete domains
27
+ - pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(bit\.ly|discord\.com|workers\.dev|transfer\.sh|filetransfer\.io|sendspace\.com|appdomain\.cloud|backblazeb2\.com\|paste\.ee|ngrok\.io|termbin\.com|localhost\.run|webhook\.site|oastify\.com|burpcollaborator\.me)\/)
28
+ # top-level domains
29
+ - pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream)\/)
30
+ # IPv4
31
+ - pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))
32
+ # IPv6
33
+ - pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:\[(([A-Fa-f0-9]{1,4}:){0,7}|:):?[A-Fa-f0-9]{1,4}(:[A-Fa-f0-9]{1,4}){0,7})\])
34
+ paths:
35
+ exclude:
36
+ - "*/test/*"
37
+ - "*/tests/*"
38
+ - "*/test_*"
39
+ languages:
40
+ - javascript
41
+ - python
42
+ - typescript
43
+ - go
44
+ severity: WARNING