guarddog 1.11.2__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {guarddog-1.11.2 → guarddog-2.0.1}/PKG-INFO +4 -2
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/__init__.py +1 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/analyzer.py +99 -13
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/__init__.py +3 -0
- guarddog-2.0.1/guarddog/analyzer/metadata/go/__init__.py +9 -0
- guarddog-2.0.1/guarddog/analyzer/sourcecode/__init__.py +108 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/dll-hijacking.yml +17 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/exfiltrate-sensitive-data.yml +12 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-dll-hijacking.yml +17 -2
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-install-script.yml +11 -0
- guarddog-2.0.1/guarddog/analyzer/sourcecode/shady-links.yml +44 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/cli.py +116 -212
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/ecosystems.py +3 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/reporters/sarif.py +11 -15
- guarddog-2.0.1/guarddog/scanners/__init__.py +54 -0
- guarddog-2.0.1/guarddog/scanners/go_package_scanner.py +75 -0
- guarddog-2.0.1/guarddog/scanners/go_project_scanner.py +68 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/pypi_package_scanner.py +8 -12
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/scanner.py +7 -31
- guarddog-2.0.1/guarddog/utils/archives.py +78 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/pyproject.toml +5 -2
- guarddog-1.11.2/guarddog/analyzer/sourcecode/__init__.py +0 -33
- guarddog-1.11.2/guarddog/analyzer/sourcecode/shady-links.yml +0 -41
- guarddog-1.11.2/guarddog/scanners/__init__.py +0 -21
- guarddog-1.11.2/guarddog/utils/archives.py +0 -30
- {guarddog-1.11.2 → guarddog-2.0.1}/LICENSE +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/LICENSE-3rdparty.csv +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/NOTICE +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/__main__.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/__init__.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/bundled_binary.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/deceptive_author.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/detector.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/empty_information.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/__init__.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/bundled_binary.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/deceptive_author.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/direct_url_dependency.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/empty_information.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/npm_metadata_mismatch.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/release_zero.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/typosquatting.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/npm/utils.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/potentially_compromised_email_domain.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/__init__.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/bundled_binary.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/deceptive_author.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/empty_information.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/release_zero.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/single_python_file.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/typosquatting.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/pypi/utils.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/release_zero.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/repository_integrity_mismatch.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/resources/placeholder_email_domains.txt +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/resources/top_npm_packages.json +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/resources/top_pypi_packages.json +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/typosquatting.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/metadata/utils.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/bidirectional-characters.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/clipboard-access.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/cmd-overwrite.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/code-execution.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/download-executable.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/exec-base64.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-exec-base64.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-exfiltrate-sensitive-data.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-obfuscation.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-serialize-environment.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-silent-process-execution.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/npm-steganography.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/obfuscation.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/silent-process-execution.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/steganography.yml +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/reporters/__init__.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/npm_package_scanner.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/npm_project_scanner.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/scanners/pypi_project_scanner.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/__init__.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/config.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/exceptions.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/guarddog/utils/package_info.py +0 -0
- {guarddog-1.11.2 → guarddog-2.0.1}/pypi.rst +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: guarddog
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: GuardDog is a CLI tool to Identify malicious PyPI packages
|
|
5
5
|
Home-page: https://github.com/DataDog/guarddog
|
|
6
6
|
License: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Requires-Dist: click (>=8.1.3,<9.0.0)
|
|
|
15
15
|
Requires-Dist: click-option-group (>=0.5.5,<0.6.0)
|
|
16
16
|
Requires-Dist: colorama (>=0.4.6,<0.5.0)
|
|
17
17
|
Requires-Dist: configparser (>=5.3,<8.0)
|
|
18
|
-
Requires-Dist: disposable-email-domains (>=0.0.103,<0.0.
|
|
18
|
+
Requires-Dist: disposable-email-domains (>=0.0.103,<0.0.105)
|
|
19
19
|
Requires-Dist: prettytable (>=3.6.0,<4.0.0)
|
|
20
20
|
Requires-Dist: pygit2 (>=1.11,<1.16)
|
|
21
21
|
Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
|
|
@@ -24,9 +24,11 @@ Requires-Dist: pyyaml (>=6.0,<7.0)
|
|
|
24
24
|
Requires-Dist: requests (>=2.29.0,<3.0.0)
|
|
25
25
|
Requires-Dist: semantic-version (>=2.10.0,<3.0.0)
|
|
26
26
|
Requires-Dist: semgrep (==1.67.0)
|
|
27
|
+
Requires-Dist: setuptools (>=70.3.0,<71.0.0)
|
|
27
28
|
Requires-Dist: tarsafe (>=0.0.5,<0.0.6)
|
|
28
29
|
Requires-Dist: termcolor (>=2.1.0,<3.0.0)
|
|
29
30
|
Requires-Dist: urllib3 (==2.2.2)
|
|
31
|
+
Requires-Dist: yara-python (>=4.5.1,<5.0.0)
|
|
30
32
|
Project-URL: Repository, https://github.com/DataDog/guarddog
|
|
31
33
|
Description-Content-Type: text/x-rst
|
|
32
34
|
|
|
@@ -2,12 +2,13 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
4
|
import subprocess
|
|
5
|
+
import yara # type: ignore
|
|
5
6
|
from collections import defaultdict
|
|
6
7
|
from pathlib import Path
|
|
7
|
-
from typing import Iterable, Optional
|
|
8
|
+
from typing import Iterable, Optional, Dict
|
|
8
9
|
|
|
9
10
|
from guarddog.analyzer.metadata import get_metadata_detectors
|
|
10
|
-
from guarddog.analyzer.sourcecode import
|
|
11
|
+
from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, YaraRule
|
|
11
12
|
from guarddog.ecosystems import ECOSYSTEM
|
|
12
13
|
|
|
13
14
|
SEMGREP_MAX_TARGET_BYTES = 10_000_000
|
|
@@ -24,6 +25,7 @@ class Analyzer:
|
|
|
24
25
|
ecosystem (str): name of the current ecosystem
|
|
25
26
|
metadata_ruleset (list): list of metadata rule names
|
|
26
27
|
sourcecode_ruleset (list): list of source code rule names
|
|
28
|
+
ioc_ruleset (list): list of ioc rule names
|
|
27
29
|
|
|
28
30
|
exclude (list): list of directories to exclude from source code search
|
|
29
31
|
|
|
@@ -32,14 +34,18 @@ class Analyzer:
|
|
|
32
34
|
|
|
33
35
|
def __init__(self, ecosystem=ECOSYSTEM.PYPI) -> None:
|
|
34
36
|
self.sourcecode_rules_path = os.path.join(os.path.dirname(__file__), "sourcecode")
|
|
35
|
-
|
|
36
37
|
self.ecosystem = ecosystem
|
|
37
38
|
|
|
38
39
|
# Rules and associated detectors
|
|
39
40
|
self.metadata_detectors = get_metadata_detectors(ecosystem)
|
|
40
41
|
|
|
41
42
|
self.metadata_ruleset: set[str] = set(self.metadata_detectors.keys())
|
|
42
|
-
self.
|
|
43
|
+
self.semgrep_ruleset: set[str] = set(
|
|
44
|
+
r.id for r in get_sourcecode_rules(ecosystem, SempgrepRule)
|
|
45
|
+
)
|
|
46
|
+
self.yara_ruleset: set[str] = set(
|
|
47
|
+
r.id for r in get_sourcecode_rules(ecosystem, YaraRule)
|
|
48
|
+
)
|
|
43
49
|
|
|
44
50
|
# Define paths to exclude from sourcecode analysis
|
|
45
51
|
self.exclude = [
|
|
@@ -77,10 +83,7 @@ class Analyzer:
|
|
|
77
83
|
sourcecode_results = None
|
|
78
84
|
|
|
79
85
|
# populate results, errors, and number of issues
|
|
80
|
-
log.debug(f"Running metadata rules against package '{name}'")
|
|
81
86
|
metadata_results = self.analyze_metadata(path, info, rules, name, version)
|
|
82
|
-
|
|
83
|
-
log.debug(f"Running source code rules against directory '{path}'")
|
|
84
87
|
sourcecode_results = self.analyze_sourcecode(path, rules)
|
|
85
88
|
|
|
86
89
|
# Concatenate dictionaries together
|
|
@@ -104,6 +107,8 @@ class Analyzer:
|
|
|
104
107
|
dict[str]: map from each metadata rule and their corresponding output
|
|
105
108
|
"""
|
|
106
109
|
|
|
110
|
+
log.debug(f"Running metadata rules against package '{name}'")
|
|
111
|
+
|
|
107
112
|
all_rules = self.metadata_ruleset
|
|
108
113
|
if rules is not None:
|
|
109
114
|
# filtering the full ruleset witht the user's input
|
|
@@ -139,11 +144,87 @@ class Analyzer:
|
|
|
139
144
|
Returns:
|
|
140
145
|
dict[str]: map from each source code rule and their corresponding output
|
|
141
146
|
"""
|
|
147
|
+
semgrepscan_results = self.analyze_semgrep(path, rules)
|
|
148
|
+
|
|
149
|
+
yarascan_results = self.analyze_yara(path, rules)
|
|
150
|
+
|
|
151
|
+
# Concatenate dictionaries together
|
|
152
|
+
issues = semgrepscan_results["issues"] + yarascan_results["issues"]
|
|
153
|
+
results = semgrepscan_results["results"] | yarascan_results["results"]
|
|
154
|
+
errors = semgrepscan_results["errors"] | yarascan_results["errors"]
|
|
155
|
+
|
|
156
|
+
return {"issues": issues, "errors": errors, "results": results, "path": path}
|
|
157
|
+
|
|
158
|
+
def analyze_yara(self, path: str, rules: Optional[set] = None) -> dict:
|
|
159
|
+
"""
|
|
160
|
+
Analyzes the IOCs of a given package
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
path (str): path to package
|
|
164
|
+
rules (set, optional): Set of IOC rules to analyze. Defaults to all rules.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
dict[str]: map from each IOC rule and their corresponding output
|
|
168
|
+
"""
|
|
169
|
+
log.debug(f"Running yara rules against directory '{path}'")
|
|
170
|
+
|
|
171
|
+
all_rules = self.yara_ruleset
|
|
172
|
+
if rules is not None:
|
|
173
|
+
# filtering the full ruleset witht the user's input
|
|
174
|
+
all_rules = self.yara_ruleset & rules
|
|
175
|
+
|
|
176
|
+
results = {rule: {} for rule in all_rules} # type: dict
|
|
177
|
+
errors: Dict[str, str] = {}
|
|
178
|
+
issues = 0
|
|
179
|
+
|
|
180
|
+
rules_path = {
|
|
181
|
+
rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yar")
|
|
182
|
+
for rule_name in all_rules
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if len(rules_path) == 0:
|
|
186
|
+
log.debug("No yara rules to run")
|
|
187
|
+
return {"results": results, "errors": errors, "issues": issues}
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
scan_rules = yara.compile(filepaths=rules_path)
|
|
191
|
+
|
|
192
|
+
for root, _, files in os.walk(path):
|
|
193
|
+
for f in files:
|
|
194
|
+
matches = scan_rules.match(os.path.join(root, f))
|
|
195
|
+
for m in matches:
|
|
196
|
+
for s in m.strings:
|
|
197
|
+
for i in s.instances:
|
|
198
|
+
rule_results = {
|
|
199
|
+
"location": f"{f}:{i.offset}",
|
|
200
|
+
"code": self.trim_code_snippet(str(i.matched_data)),
|
|
201
|
+
'message': m.meta.get("description", f"{m.rule} rule matched")
|
|
202
|
+
}
|
|
203
|
+
issues += len(m.strings)
|
|
204
|
+
results[m.rule].update(rule_results)
|
|
205
|
+
except Exception as e:
|
|
206
|
+
errors["rules-all"] = f"failed to run rule: {str(e)}"
|
|
207
|
+
|
|
208
|
+
return {"results": results, "errors": errors, "issues": issues}
|
|
209
|
+
|
|
210
|
+
def analyze_semgrep(self, path, rules=None) -> dict:
|
|
211
|
+
"""
|
|
212
|
+
Analyzes the source code of a given package
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
path (str): path to directory of package
|
|
216
|
+
rules (set, optional): Set of source code rules to analyze. Defaults to all rules.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
dict[str]: map from each source code rule and their corresponding output
|
|
220
|
+
"""
|
|
221
|
+
log.debug(f"Running semgrep rules against directory '{path}'")
|
|
222
|
+
|
|
142
223
|
targetpath = Path(path)
|
|
143
|
-
all_rules = self.
|
|
224
|
+
all_rules = self.semgrep_ruleset
|
|
144
225
|
if rules is not None:
|
|
145
226
|
# filtering the full ruleset witht the user's input
|
|
146
|
-
all_rules = self.
|
|
227
|
+
all_rules = self.semgrep_ruleset & rules
|
|
147
228
|
|
|
148
229
|
results = {rule: {} for rule in all_rules} # type: dict
|
|
149
230
|
errors = {}
|
|
@@ -155,11 +236,11 @@ class Analyzer:
|
|
|
155
236
|
))
|
|
156
237
|
|
|
157
238
|
if len(rules_path) == 0:
|
|
158
|
-
log.debug("No
|
|
239
|
+
log.debug("No semgrep code rules to run")
|
|
159
240
|
return {"results": {}, "errors": {}, "issues": 0}
|
|
160
241
|
|
|
161
242
|
try:
|
|
162
|
-
log.debug(f"Running
|
|
243
|
+
log.debug(f"Running semgrep code rules against {path}")
|
|
163
244
|
response = self._invoke_semgrep(target=path, rules=rules_path)
|
|
164
245
|
rule_results = self._format_semgrep_response(response, targetpath=targetpath)
|
|
165
246
|
issues += sum(len(res) for res in rule_results.values())
|
|
@@ -240,11 +321,16 @@ output: {e.output}
|
|
|
240
321
|
location = file_path + ":" + str(line)
|
|
241
322
|
code = self.trim_code_snippet(code_snippet)
|
|
242
323
|
|
|
243
|
-
|
|
324
|
+
finding = {
|
|
244
325
|
'location': location,
|
|
245
326
|
'code': code,
|
|
246
327
|
'message': result["extra"]["message"]
|
|
247
|
-
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
rule_results = results[rule_name]
|
|
331
|
+
if finding in rule_results:
|
|
332
|
+
continue
|
|
333
|
+
results[rule_name].append(finding)
|
|
248
334
|
|
|
249
335
|
return results
|
|
250
336
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from guarddog.analyzer.metadata.detector import Detector
|
|
2
2
|
from guarddog.analyzer.metadata.npm import NPM_METADATA_RULES
|
|
3
3
|
from guarddog.analyzer.metadata.pypi import PYPI_METADATA_RULES
|
|
4
|
+
from guarddog.analyzer.metadata.go import GO_METADATA_RULES
|
|
4
5
|
from guarddog.ecosystems import ECOSYSTEM
|
|
5
6
|
|
|
6
7
|
|
|
@@ -10,3 +11,5 @@ def get_metadata_detectors(ecosystem: ECOSYSTEM) -> dict[str, Detector]:
|
|
|
10
11
|
return PYPI_METADATA_RULES
|
|
11
12
|
case ECOSYSTEM.NPM:
|
|
12
13
|
return NPM_METADATA_RULES
|
|
14
|
+
case ECOSYSTEM.GO:
|
|
15
|
+
return GO_METADATA_RULES
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional, Iterable
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
from yaml.loader import SafeLoader
|
|
8
|
+
|
|
9
|
+
from guarddog.ecosystems import ECOSYSTEM
|
|
10
|
+
|
|
11
|
+
current_dir = pathlib.Path(__file__).parent.resolve()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# These data class aim to reduce the spreading of the logic
|
|
15
|
+
# Instead of using the a dict as a structure and parse it difffently depending on the type
|
|
16
|
+
@dataclass
|
|
17
|
+
class SourceCodeRule:
|
|
18
|
+
"""
|
|
19
|
+
Base class for source code rules
|
|
20
|
+
"""
|
|
21
|
+
id: str
|
|
22
|
+
file: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class YaraRule(SourceCodeRule):
|
|
27
|
+
"""
|
|
28
|
+
Yara rule just reimplements base
|
|
29
|
+
"""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class SempgrepRule(SourceCodeRule):
|
|
35
|
+
"""
|
|
36
|
+
Semgrep rule are language specific
|
|
37
|
+
Content of rule in yaml format is accessible through rule_content
|
|
38
|
+
"""
|
|
39
|
+
description: str
|
|
40
|
+
ecosystem: ECOSYSTEM
|
|
41
|
+
rule_content: dict
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_sourcecode_rules(
|
|
45
|
+
ecosystem: ECOSYSTEM, kind: Optional[type] = None
|
|
46
|
+
) -> Iterable[SourceCodeRule]:
|
|
47
|
+
"""
|
|
48
|
+
This function returns the source code rules for a given ecosystem and kind.
|
|
49
|
+
Args:
|
|
50
|
+
ecosystem: The ecosystem to filter for if rules are ecosystem specific
|
|
51
|
+
kind: The kind of rule to filter for
|
|
52
|
+
"""
|
|
53
|
+
for rule in SOURCECODE_RULES:
|
|
54
|
+
if kind and not isinstance(rule, kind):
|
|
55
|
+
continue
|
|
56
|
+
if not (getattr(rule, "ecosystem", ecosystem) == ecosystem):
|
|
57
|
+
continue
|
|
58
|
+
yield rule
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
SOURCECODE_RULES: list[SourceCodeRule] = list()
|
|
62
|
+
|
|
63
|
+
semgrep_rule_file_names = list(
|
|
64
|
+
filter(lambda x: x.endswith("yml"), os.listdir(current_dir))
|
|
65
|
+
)
|
|
66
|
+
# all yml files placed in the sourcecode directory are loaded as semgrep rules
|
|
67
|
+
# refer to README.md for more information
|
|
68
|
+
for file_name in semgrep_rule_file_names:
|
|
69
|
+
with open(os.path.join(current_dir, file_name), "r") as fd:
|
|
70
|
+
data = yaml.load(fd, Loader=SafeLoader)
|
|
71
|
+
for rule in data["rules"]:
|
|
72
|
+
for lang in rule["languages"]:
|
|
73
|
+
ecosystem = None
|
|
74
|
+
match lang:
|
|
75
|
+
case "python":
|
|
76
|
+
ecosystem = ECOSYSTEM.PYPI
|
|
77
|
+
case "javascript" | "typescript" | "json":
|
|
78
|
+
ecosystem = ECOSYSTEM.NPM
|
|
79
|
+
case "go":
|
|
80
|
+
ecosystem = ECOSYSTEM.GO
|
|
81
|
+
case _:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
# avoids duplicates when multiple languages are supported by a rule
|
|
85
|
+
if not next(
|
|
86
|
+
filter(
|
|
87
|
+
lambda r: r.id == rule["id"],
|
|
88
|
+
get_sourcecode_rules(ecosystem, SempgrepRule),
|
|
89
|
+
),
|
|
90
|
+
None,
|
|
91
|
+
):
|
|
92
|
+
SOURCECODE_RULES.append(
|
|
93
|
+
SempgrepRule(
|
|
94
|
+
id=rule["id"],
|
|
95
|
+
ecosystem=ecosystem,
|
|
96
|
+
description=rule.get("metadata", {}).get("description", ""),
|
|
97
|
+
file=file_name,
|
|
98
|
+
rule_content=rule,
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
yara_rule_file_names = list(
|
|
103
|
+
filter(lambda x: x.endswith("yar"), os.listdir(current_dir))
|
|
104
|
+
)
|
|
105
|
+
# all yar files placed in the sourcecode directory are loaded as YARA rules
|
|
106
|
+
# refer to README.md for more information
|
|
107
|
+
for file_name in yara_rule_file_names:
|
|
108
|
+
SOURCECODE_RULES.append(YaraRule(id=pathlib.Path(file_name).stem, file=file_name))
|
|
@@ -11,6 +11,8 @@ rules:
|
|
|
11
11
|
- pattern-either:
|
|
12
12
|
- patterns:
|
|
13
13
|
- pattern: "$DLL_LOAD"
|
|
14
|
+
# Ignore docstrings
|
|
15
|
+
- pattern-not-regex: ^\s*"""(.|\n)*?"""\s*$
|
|
14
16
|
- metavariable-pattern:
|
|
15
17
|
metavariable: $DLL_LOAD
|
|
16
18
|
pattern-either:
|
|
@@ -20,6 +22,21 @@ rules:
|
|
|
20
22
|
- pattern-regex: (?i).*?\/bin/.+\s+.*?\.so
|
|
21
23
|
# environment preload
|
|
22
24
|
- pattern-regex: LD_PRELOAD
|
|
25
|
+
# MITRE ATT&CK "System Binary Proxy Execution" techniques
|
|
26
|
+
# https://attack.mitre.org/techniques/T1218/
|
|
27
|
+
- pattern-regex: (?i)control(.exe)?\s+\S+.cpl
|
|
28
|
+
- pattern-regex: (?i)cmstp(.exe)?\s+\S+
|
|
29
|
+
- pattern-regex: (?i)InstallUtil(.exe)?\s+\S+
|
|
30
|
+
- pattern-regex: (?i)mshta(.exe)?\s+\S+
|
|
31
|
+
- pattern-regex: (?i)msiexec(.exe)?\s+\S+
|
|
32
|
+
- pattern-regex: (?i)odbcconf(.exe)?\s+.*{\s*REGSVR\s+\S+\s*}
|
|
33
|
+
- pattern-regex: (?i)regsvcs(.exe)?\s+\S+
|
|
34
|
+
- pattern-regex: (?i)regasm(.exe)?\s+\S+
|
|
35
|
+
- pattern-regex: (?i)regsvr32(.exe)?\s+\S+
|
|
36
|
+
- pattern-regex: (?i)rundll32(.exe)?\s+\S+
|
|
37
|
+
- pattern-regex: (?i)verclsid(.exe)?\s+.*{\s*\S+\s*}
|
|
38
|
+
- pattern-regex: (?i)mavinject(.exe)?\s+\d+\s+/INJECTRUNNING\s+\S+
|
|
39
|
+
- pattern-regex: (?i)mmc(.exe)?\s+-Embedding\s+\S+.ms
|
|
23
40
|
- patterns:
|
|
24
41
|
- pattern: $FN($EXE,...,$DLL)
|
|
25
42
|
- metavariable-pattern:
|
{guarddog-1.11.2 → guarddog-2.0.1}/guarddog/analyzer/sourcecode/exfiltrate-sensitive-data.yml
RENAMED
|
@@ -31,6 +31,18 @@ rules:
|
|
|
31
31
|
- metavariable-regex:
|
|
32
32
|
metavariable: $ENVVAR
|
|
33
33
|
regex: ([\"\'](AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN)[\"\'])
|
|
34
|
+
- patterns:
|
|
35
|
+
- pattern-inside: |
|
|
36
|
+
$CONNECT = sqlite3.connect(...)
|
|
37
|
+
...
|
|
38
|
+
$CURSOR = $CONNECT.cursor(...)
|
|
39
|
+
...
|
|
40
|
+
- pattern: $CURSOR.execute($QUERY, ...)
|
|
41
|
+
- metavariable-pattern:
|
|
42
|
+
metavariable: $QUERY
|
|
43
|
+
patterns:
|
|
44
|
+
- pattern: "..."
|
|
45
|
+
- pattern-regex: (?i)(cookies|credit_cards|logins|moz_cookies|moz_formhistory|moz_logins)
|
|
34
46
|
pattern-sinks:
|
|
35
47
|
- pattern-either:
|
|
36
48
|
- pattern-inside: requests.$METHOD(...)
|
|
@@ -2,7 +2,7 @@ rules:
|
|
|
2
2
|
- id: npm-dll-hijacking
|
|
3
3
|
languages:
|
|
4
4
|
- javascript
|
|
5
|
-
message: This package manipulates a trusted application into loading a malicious
|
|
5
|
+
message: This package manipulates a trusted application into loading a malicious DLL
|
|
6
6
|
metadata:
|
|
7
7
|
description: Identifies when a malicious package manipulates a trusted application into loading a malicious DLL
|
|
8
8
|
pattern-either:
|
|
@@ -20,6 +20,21 @@ rules:
|
|
|
20
20
|
- pattern-regex: (?i).*?\/bin/.+\s+.*?\.so
|
|
21
21
|
# environment preload
|
|
22
22
|
- pattern-regex: LD_PRELOAD
|
|
23
|
+
# MITRE ATT&CK "System Binary Proxy Execution" techniques
|
|
24
|
+
# https://attack.mitre.org/techniques/T1218/
|
|
25
|
+
- pattern-regex: (?i)control(.exe)?\s+\S+.cpl
|
|
26
|
+
- pattern-regex: (?i)cmstp(.exe)?\s+\S+
|
|
27
|
+
- pattern-regex: (?i)InstallUtil(.exe)?\s+\S+
|
|
28
|
+
- pattern-regex: (?i)mshta(.exe)?\s+\S+
|
|
29
|
+
- pattern-regex: (?i)msiexec(.exe)?\s+\S+
|
|
30
|
+
- pattern-regex: (?i)odbcconf(.exe)?\s+.*{\s*REGSVR\s+\S+\s*}
|
|
31
|
+
- pattern-regex: (?i)regsvcs(.exe)?\s+\S+
|
|
32
|
+
- pattern-regex: (?i)regasm(.exe)?\s+\S+
|
|
33
|
+
- pattern-regex: (?i)regsvr32(.exe)?\s+\S+
|
|
34
|
+
- pattern-regex: (?i)rundll32(.exe)?\s+\S+
|
|
35
|
+
- pattern-regex: (?i)verclsid(.exe)?\s+.*{\s*\S+\s*}
|
|
36
|
+
- pattern-regex: (?i)mavinject(.exe)?\s+\d+\s+/INJECTRUNNING\s+\S+
|
|
37
|
+
- pattern-regex: (?i)mmc(.exe)?\s+-Embedding\s+\S+.ms
|
|
23
38
|
- patterns:
|
|
24
39
|
- pattern: $FN($EXE,...,$DLL)
|
|
25
40
|
- metavariable-pattern:
|
|
@@ -58,7 +73,7 @@ rules:
|
|
|
58
73
|
- pattern: ....appendFile
|
|
59
74
|
- metavariable-pattern:
|
|
60
75
|
metavariable: $EXE
|
|
61
|
-
patterns:
|
|
76
|
+
patterns:
|
|
62
77
|
# a string with .exe or /bin/[whatever] in it
|
|
63
78
|
- pattern: "..."
|
|
64
79
|
- pattern-regex: (?i).*?(\.exe|\/bin/.+)
|
|
@@ -10,6 +10,17 @@ rules:
|
|
|
10
10
|
# (typically when a dependency is a git repository, see https://github.com/npm/cli/issues/6031#issuecomment-1449119423)
|
|
11
11
|
# however this happens pretty rarely so reporting every package with a "prepare" script would be too noisy;
|
|
12
12
|
# see https://github.com/DataDog/guarddog/issues/308
|
|
13
|
+
- pattern-not: |
|
|
14
|
+
"...": "npx only-allow pnpm"
|
|
15
|
+
- pattern-not: |
|
|
16
|
+
"...": ""
|
|
17
|
+
- pattern-not: |
|
|
18
|
+
"...": "patch-package"
|
|
19
|
+
- pattern-not: |
|
|
20
|
+
"...": "husky"
|
|
21
|
+
- pattern-not: |
|
|
22
|
+
"preinstall": "echo \"preinstall script\""
|
|
23
|
+
|
|
13
24
|
- pattern-either:
|
|
14
25
|
- pattern: |
|
|
15
26
|
"preinstall": "..."
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# TODO: Detects these links well, but lots of legitimate packages seem to use these domain extensions
|
|
2
|
+
rules:
|
|
3
|
+
- id: shady-links
|
|
4
|
+
message: This package contains an URL to a domain with a suspicious extension
|
|
5
|
+
metadata:
|
|
6
|
+
description: Identify when a package contains an URL to a domain with a suspicious extension
|
|
7
|
+
patterns:
|
|
8
|
+
|
|
9
|
+
# ignore comments
|
|
10
|
+
- pattern-not-regex: ^\s*\# .*
|
|
11
|
+
- pattern-not-regex: ^\s*\/\*(.|\n)*?\*\/\s*$
|
|
12
|
+
- pattern-not-regex: ^\s*\/\/.*$
|
|
13
|
+
|
|
14
|
+
# ignore docstring
|
|
15
|
+
- pattern-not-regex: ^\s*"""(.|\n)*?"""\s*$
|
|
16
|
+
|
|
17
|
+
# Exclude local IPv4 sometimes used in tests
|
|
18
|
+
- pattern-not-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:192\.168|10\.\d{1,3}|172\.(?:1[6-9]|2\d|3[0-1])|127\.\d{1,3})\.\d{1,3}\.\d{1,3}|0\.0\.0\.0|localhost)
|
|
19
|
+
|
|
20
|
+
# Exclude public IPv4 sometimes used in tests
|
|
21
|
+
- pattern-not-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:1\.1\.1\.1|8\.8\.8\.8))
|
|
22
|
+
|
|
23
|
+
- patterns:
|
|
24
|
+
- pattern: ("...")
|
|
25
|
+
- pattern-either:
|
|
26
|
+
# complete domains
|
|
27
|
+
- pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(bit\.ly|discord\.com|workers\.dev|transfer\.sh|filetransfer\.io|sendspace\.com|appdomain\.cloud|backblazeb2\.com\|paste\.ee|ngrok\.io|termbin\.com|localhost\.run|webhook\.site|oastify\.com|burpcollaborator\.me)\/)
|
|
28
|
+
# top-level domains
|
|
29
|
+
- pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream)\/)
|
|
30
|
+
# IPv4
|
|
31
|
+
- pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))
|
|
32
|
+
# IPv6
|
|
33
|
+
- pattern-regex: (http[s]?:\/\/[^\n\[\/\?#"']*?(?:\[(([A-Fa-f0-9]{1,4}:){0,7}|:):?[A-Fa-f0-9]{1,4}(:[A-Fa-f0-9]{1,4}){0,7})\])
|
|
34
|
+
paths:
|
|
35
|
+
exclude:
|
|
36
|
+
- "*/test/*"
|
|
37
|
+
- "*/tests/*"
|
|
38
|
+
- "*/test_*"
|
|
39
|
+
languages:
|
|
40
|
+
- javascript
|
|
41
|
+
- python
|
|
42
|
+
- typescript
|
|
43
|
+
- go
|
|
44
|
+
severity: WARNING
|