guarddog 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/analyzer.py +58 -20
- guarddog/analyzer/metadata/__init__.py +2 -0
- guarddog/analyzer/metadata/bundled_binary.py +6 -6
- guarddog/analyzer/metadata/deceptive_author.py +3 -1
- guarddog/analyzer/metadata/detector.py +7 -2
- guarddog/analyzer/metadata/empty_information.py +8 -3
- guarddog/analyzer/metadata/go/typosquatting.py +4 -3
- guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
- guarddog/analyzer/metadata/npm/empty_information.py +10 -7
- guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/npm/release_zero.py +13 -5
- guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
- guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
- guarddog/analyzer/metadata/npm/utils.py +4 -5
- guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
- guarddog/analyzer/metadata/pypi/__init__.py +12 -6
- guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
- guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
- guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
- guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
- guarddog/analyzer/metadata/pypi/typosquatting.py +21 -8
- guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
- guarddog/analyzer/metadata/pypi/utils.py +1 -4
- guarddog/analyzer/metadata/release_zero.py +1 -1
- guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
- guarddog/analyzer/metadata/typosquatting.py +12 -8
- guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
- guarddog/analyzer/sourcecode/__init__.py +34 -7
- guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
- guarddog/analyzer/sourcecode/code-execution.yml +1 -0
- guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
- guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
- guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
- guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
- guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
- guarddog/analyzer/sourcecode/shady-links.yml +2 -0
- guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
- guarddog/analyzer/sourcecode/unicode.yml +75 -0
- guarddog/cli.py +33 -107
- guarddog/ecosystems.py +3 -0
- guarddog/reporters/__init__.py +28 -0
- guarddog/reporters/human_readable.py +138 -0
- guarddog/reporters/json.py +28 -0
- guarddog/reporters/reporter_factory.py +50 -0
- guarddog/reporters/sarif.py +179 -173
- guarddog/scanners/__init__.py +5 -0
- guarddog/scanners/extension_scanner.py +152 -0
- guarddog/scanners/github_action_project_scanner.py +47 -8
- guarddog/scanners/github_action_scanner.py +6 -2
- guarddog/scanners/go_project_scanner.py +42 -5
- guarddog/scanners/npm_package_scanner.py +12 -4
- guarddog/scanners/npm_project_scanner.py +54 -10
- guarddog/scanners/pypi_package_scanner.py +9 -3
- guarddog/scanners/pypi_project_scanner.py +67 -29
- guarddog/scanners/scanner.py +247 -164
- guarddog/utils/archives.py +2 -1
- guarddog/utils/package_info.py +3 -1
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/METADATA +11 -10
- guarddog-2.7.0.dist-info/RECORD +100 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/WHEEL +1 -1
- guarddog-2.5.0.dist-info/RECORD +0 -90
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/entry_points.txt +0 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE +0 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/NOTICE +0 -0
guarddog/analyzer/analyzer.py
CHANGED
|
@@ -13,9 +13,10 @@ from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, Yar
|
|
|
13
13
|
from guarddog.utils.config import YARA_EXT_EXCLUDE
|
|
14
14
|
from guarddog.ecosystems import ECOSYSTEM
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
MAX_BYTES_DEFAULT = 10_000_000
|
|
17
|
+
SEMGREP_TIMEOUT_DEFAULT = 10
|
|
18
18
|
|
|
19
|
+
SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
|
|
19
20
|
log = logging.getLogger("guarddog")
|
|
20
21
|
|
|
21
22
|
|
|
@@ -64,7 +65,14 @@ class Analyzer:
|
|
|
64
65
|
".semgrep_logs",
|
|
65
66
|
]
|
|
66
67
|
|
|
67
|
-
def analyze(
|
|
68
|
+
def analyze(
|
|
69
|
+
self,
|
|
70
|
+
path,
|
|
71
|
+
info=None,
|
|
72
|
+
rules=None,
|
|
73
|
+
name: Optional[str] = None,
|
|
74
|
+
version: Optional[str] = None,
|
|
75
|
+
) -> dict:
|
|
68
76
|
"""
|
|
69
77
|
Analyzes a package in the given path
|
|
70
78
|
|
|
@@ -94,8 +102,14 @@ class Analyzer:
|
|
|
94
102
|
|
|
95
103
|
return {"issues": issues, "errors": errors, "results": results, "path": path}
|
|
96
104
|
|
|
97
|
-
def analyze_metadata(
|
|
98
|
-
|
|
105
|
+
def analyze_metadata(
|
|
106
|
+
self,
|
|
107
|
+
path: str,
|
|
108
|
+
info,
|
|
109
|
+
rules=None,
|
|
110
|
+
name: Optional[str] = None,
|
|
111
|
+
version: Optional[str] = None,
|
|
112
|
+
) -> dict:
|
|
99
113
|
"""
|
|
100
114
|
Analyzes the metadata of a given package
|
|
101
115
|
|
|
@@ -124,7 +138,9 @@ class Analyzer:
|
|
|
124
138
|
for rule in all_rules:
|
|
125
139
|
try:
|
|
126
140
|
log.debug(f"Running rule {rule} against package '{name}'")
|
|
127
|
-
rule_matches, message = self.metadata_detectors[rule].detect(
|
|
141
|
+
rule_matches, message = self.metadata_detectors[rule].detect(
|
|
142
|
+
info, path, name, version
|
|
143
|
+
)
|
|
128
144
|
results[rule] = None
|
|
129
145
|
if rule_matches:
|
|
130
146
|
issues += 1
|
|
@@ -199,16 +215,21 @@ class Analyzer:
|
|
|
199
215
|
continue
|
|
200
216
|
|
|
201
217
|
scan_file_target_abspath = os.path.join(root, f)
|
|
202
|
-
scan_file_target_relpath = os.path.relpath(
|
|
218
|
+
scan_file_target_relpath = os.path.relpath(
|
|
219
|
+
scan_file_target_abspath, path
|
|
220
|
+
)
|
|
203
221
|
|
|
204
222
|
matches = scan_rules.match(scan_file_target_abspath)
|
|
205
223
|
for m in matches:
|
|
224
|
+
|
|
206
225
|
for s in m.strings:
|
|
207
226
|
for i in s.instances:
|
|
208
227
|
finding = {
|
|
209
228
|
"location": f"{scan_file_target_relpath}:{i.offset}",
|
|
210
229
|
"code": self.trim_code_snippet(str(i.matched_data)),
|
|
211
|
-
|
|
230
|
+
"message": m.meta.get(
|
|
231
|
+
"description", f"{m.rule} rule matched"
|
|
232
|
+
),
|
|
212
233
|
}
|
|
213
234
|
|
|
214
235
|
# since yara can match the multiple times in the same file
|
|
@@ -251,10 +272,14 @@ class Analyzer:
|
|
|
251
272
|
errors = {}
|
|
252
273
|
issues = 0
|
|
253
274
|
|
|
254
|
-
rules_path = list(
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
275
|
+
rules_path = list(
|
|
276
|
+
map(
|
|
277
|
+
lambda rule_name: os.path.join(
|
|
278
|
+
SOURCECODE_RULES_PATH, f"{rule_name}.yml"
|
|
279
|
+
),
|
|
280
|
+
all_rules,
|
|
281
|
+
)
|
|
282
|
+
)
|
|
258
283
|
|
|
259
284
|
if len(rules_path) == 0:
|
|
260
285
|
log.debug("No semgrep code rules to run")
|
|
@@ -263,7 +288,9 @@ class Analyzer:
|
|
|
263
288
|
try:
|
|
264
289
|
log.debug(f"Running semgrep code rules against {path}")
|
|
265
290
|
response = self._invoke_semgrep(target=path, rules=rules_path)
|
|
266
|
-
rule_results = self._format_semgrep_response(
|
|
291
|
+
rule_results = self._format_semgrep_response(
|
|
292
|
+
response, targetpath=targetpath
|
|
293
|
+
)
|
|
267
294
|
issues += sum(len(res) for res in rule_results.values())
|
|
268
295
|
|
|
269
296
|
results = results | rule_results
|
|
@@ -274,12 +301,19 @@ class Analyzer:
|
|
|
274
301
|
|
|
275
302
|
def _invoke_semgrep(self, target: str, rules: Iterable[str]):
|
|
276
303
|
try:
|
|
304
|
+
SEMGREP_MAX_TARGET_BYTES = int(
|
|
305
|
+
os.getenv("GUARDDOG_SEMGREP_MAX_TARGET_BYTES", MAX_BYTES_DEFAULT)
|
|
306
|
+
)
|
|
307
|
+
SEMGREP_TIMEOUT = int(
|
|
308
|
+
os.getenv("GUARDDOG_SEMGREP_TIMEOUT", SEMGREP_TIMEOUT_DEFAULT)
|
|
309
|
+
)
|
|
277
310
|
cmd = ["semgrep"]
|
|
278
311
|
for rule in rules:
|
|
279
312
|
cmd.extend(["--config", rule])
|
|
280
313
|
|
|
281
314
|
for excluded in self.exclude:
|
|
282
315
|
cmd.append(f"--exclude='{excluded}'")
|
|
316
|
+
cmd.append(f"--timeout={SEMGREP_TIMEOUT}")
|
|
283
317
|
cmd.append("--no-git-ignore")
|
|
284
318
|
cmd.append("--json")
|
|
285
319
|
cmd.append("--quiet")
|
|
@@ -287,7 +321,9 @@ class Analyzer:
|
|
|
287
321
|
cmd.append(f"--max-target-bytes={SEMGREP_MAX_TARGET_BYTES}")
|
|
288
322
|
cmd.append(target)
|
|
289
323
|
log.debug(f"Invoking semgrep with command line: {' '.join(cmd)}")
|
|
290
|
-
result = subprocess.run(
|
|
324
|
+
result = subprocess.run(
|
|
325
|
+
cmd, capture_output=True, check=True, encoding="utf-8"
|
|
326
|
+
)
|
|
291
327
|
return json.loads(str(result.stdout))
|
|
292
328
|
except FileNotFoundError:
|
|
293
329
|
raise Exception("unable to find semgrep binary")
|
|
@@ -302,6 +338,8 @@ output: {e.output}
|
|
|
302
338
|
raise Exception(error_message)
|
|
303
339
|
except json.JSONDecodeError as e:
|
|
304
340
|
raise Exception("unable to parse semgrep JSON output: " + str(e))
|
|
341
|
+
except ValueError as e:
|
|
342
|
+
raise Exception("Invalid environment variable value: " + str(e))
|
|
305
343
|
|
|
306
344
|
def _format_semgrep_response(self, response, rule=None, targetpath=None):
|
|
307
345
|
"""
|
|
@@ -348,9 +386,9 @@ output: {e.output}
|
|
|
348
386
|
location = file_path + ":" + str(start_line)
|
|
349
387
|
|
|
350
388
|
finding = {
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
389
|
+
"location": location,
|
|
390
|
+
"code": code,
|
|
391
|
+
"message": result["extra"]["message"],
|
|
354
392
|
}
|
|
355
393
|
|
|
356
394
|
rule_results = results[rule_name]
|
|
@@ -374,7 +412,7 @@ output: {e.output}
|
|
|
374
412
|
"""
|
|
375
413
|
snippet = []
|
|
376
414
|
try:
|
|
377
|
-
with open(file_path,
|
|
415
|
+
with open(file_path, "r") as file:
|
|
378
416
|
for current_line_number, line in enumerate(file, start=1):
|
|
379
417
|
if start_line <= current_line_number <= end_line:
|
|
380
418
|
snippet.append(line)
|
|
@@ -385,12 +423,12 @@ output: {e.output}
|
|
|
385
423
|
except Exception as e:
|
|
386
424
|
log.error(f"Error reading file {file_path}: {str(e)}")
|
|
387
425
|
|
|
388
|
-
return
|
|
426
|
+
return "".join(snippet)
|
|
389
427
|
|
|
390
428
|
# Makes sure the matching code to be displayed isn't too long
|
|
391
429
|
def trim_code_snippet(self, code):
|
|
392
430
|
THRESHOLD = 250
|
|
393
431
|
if len(code) > THRESHOLD:
|
|
394
|
-
return code[: THRESHOLD - 10] +
|
|
432
|
+
return code[: THRESHOLD - 10] + "..." + code[len(code) - 10 :]
|
|
395
433
|
else:
|
|
396
434
|
return code
|
|
@@ -15,10 +15,10 @@ class BundledBinary(Detector):
|
|
|
15
15
|
# magic bytes are the first few bytes of a file that can be used to identify the file type
|
|
16
16
|
# regardless of their extension
|
|
17
17
|
magic_bytes = {
|
|
18
|
-
"exe": b"\
|
|
19
|
-
"elf": b"\
|
|
20
|
-
"macho32": b"\
|
|
21
|
-
"macho64": b"\
|
|
18
|
+
"exe": b"\x4d\x5a",
|
|
19
|
+
"elf": b"\x7f\x45\x4c\x46",
|
|
20
|
+
"macho32": b"\xfe\xed\xfa\xce",
|
|
21
|
+
"macho64": b"\xfe\xed\xfa\xcf",
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
def __init__(self):
|
|
@@ -40,7 +40,7 @@ class BundledBinary(Detector):
|
|
|
40
40
|
def sha256(file: str) -> str:
|
|
41
41
|
with open(file, "rb") as f:
|
|
42
42
|
hasher = hashlib.sha256()
|
|
43
|
-
while
|
|
43
|
+
while chunk := f.read(4096):
|
|
44
44
|
hasher.update(chunk)
|
|
45
45
|
return hasher.hexdigest()
|
|
46
46
|
|
|
@@ -65,7 +65,7 @@ class BundledBinary(Detector):
|
|
|
65
65
|
if not bin_files:
|
|
66
66
|
return False, ""
|
|
67
67
|
|
|
68
|
-
output_lines =
|
|
68
|
+
output_lines = "\n".join(
|
|
69
69
|
f"{digest}: {', '.join(files)}" for digest, files in bin_files.items()
|
|
70
70
|
)
|
|
71
71
|
return True, f"Binary file/s detected in package:\n{output_lines}"
|
|
@@ -41,7 +41,9 @@ class DeceptiveAuthorDetector(Detector):
|
|
|
41
41
|
# read internal maintained list of placeholder email domains
|
|
42
42
|
# this domains are usually used by authors who want to don't want to reveal their real email
|
|
43
43
|
placeholder_email_domains_data = None
|
|
44
|
-
with open(
|
|
44
|
+
with open(
|
|
45
|
+
placeholder_email_domains_path, "r"
|
|
46
|
+
) as placeholder_email_domains_file:
|
|
45
47
|
placeholder_email_domains_data = set(
|
|
46
48
|
placeholder_email_domains_file.read().split("\n")
|
|
47
49
|
)
|
|
@@ -11,8 +11,13 @@ class Detector:
|
|
|
11
11
|
|
|
12
12
|
# returns (ruleMatches, message)
|
|
13
13
|
@abstractmethod
|
|
14
|
-
def detect(
|
|
15
|
-
|
|
14
|
+
def detect(
|
|
15
|
+
self,
|
|
16
|
+
package_info,
|
|
17
|
+
path: Optional[str] = None,
|
|
18
|
+
name: Optional[str] = None,
|
|
19
|
+
version: Optional[str] = None,
|
|
20
|
+
) -> tuple[bool, Optional[str]]:
|
|
16
21
|
pass # pragma: no cover
|
|
17
22
|
|
|
18
23
|
def get_name(self) -> str:
|
|
@@ -15,12 +15,17 @@ class EmptyInfoDetector(Detector):
|
|
|
15
15
|
def __init__(self):
|
|
16
16
|
super().__init__(
|
|
17
17
|
name="empty_information",
|
|
18
|
-
description="Identify packages with an empty description field"
|
|
18
|
+
description="Identify packages with an empty description field",
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
@abstractmethod
|
|
22
|
-
def detect(
|
|
23
|
-
|
|
22
|
+
def detect(
|
|
23
|
+
self,
|
|
24
|
+
package_info,
|
|
25
|
+
path: Optional[str] = None,
|
|
26
|
+
name: Optional[str] = None,
|
|
27
|
+
version: Optional[str] = None,
|
|
28
|
+
) -> tuple[bool, str]:
|
|
24
29
|
"""
|
|
25
30
|
Uses a package's information from PyPI's JSON API to determine
|
|
26
31
|
if the package has an empty description
|
|
@@ -34,7 +34,8 @@ class GoTyposquatDetector(TyposquatDetector):
|
|
|
34
34
|
|
|
35
35
|
if top_packages_information is None:
|
|
36
36
|
raise Exception(
|
|
37
|
-
f"Could not retrieve top Go packages from {top_packages_path}"
|
|
37
|
+
f"Could not retrieve top Go packages from {top_packages_path}"
|
|
38
|
+
)
|
|
38
39
|
|
|
39
40
|
return set(top_packages_information)
|
|
40
41
|
|
|
@@ -104,8 +105,8 @@ class GoTyposquatDetector(TyposquatDetector):
|
|
|
104
105
|
continue
|
|
105
106
|
|
|
106
107
|
# Get form when replacing or removing go/golang term
|
|
107
|
-
replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
|
|
108
|
-
removed_form = terms[:i] + terms[i + 1:]
|
|
108
|
+
replaced_form = terms[:i] + [confused_term] + terms[i + 1 :]
|
|
109
|
+
removed_form = terms[:i] + terms[i + 1 :]
|
|
109
110
|
|
|
110
111
|
for form in (replaced_form, removed_form):
|
|
111
112
|
confused_forms.append("-".join(form))
|
|
@@ -3,6 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class NPMBundledBinary(BundledBinary):
|
|
6
|
-
def detect(
|
|
7
|
-
|
|
6
|
+
def detect(
|
|
7
|
+
self,
|
|
8
|
+
package_info,
|
|
9
|
+
path: Optional[str] = None,
|
|
10
|
+
name: Optional[str] = None,
|
|
11
|
+
version: Optional[str] = None,
|
|
12
|
+
) -> tuple[bool, str]:
|
|
8
13
|
return super().detect(package_info, path, name, version)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Empty Information Detector
|
|
2
2
|
|
|
3
3
|
Detects if a package contains an empty description
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import os.path
|
|
6
7
|
from typing import Optional
|
|
7
8
|
|
|
@@ -12,13 +13,15 @@ MESSAGE = "This package has an empty description on PyPi"
|
|
|
12
13
|
|
|
13
14
|
class NPMEmptyInfoDetector(EmptyInfoDetector):
|
|
14
15
|
|
|
15
|
-
def detect(
|
|
16
|
-
|
|
16
|
+
def detect(
|
|
17
|
+
self,
|
|
18
|
+
package_info,
|
|
19
|
+
path: Optional[str] = None,
|
|
20
|
+
name: Optional[str] = None,
|
|
21
|
+
version: Optional[str] = None,
|
|
22
|
+
) -> tuple[bool, str]:
|
|
17
23
|
if path is None:
|
|
18
24
|
raise TypeError("path must be a string")
|
|
19
25
|
package_path = os.path.join(path, "package")
|
|
20
|
-
content = map(
|
|
21
|
-
lambda x: x.lower(),
|
|
22
|
-
os.listdir(package_path)
|
|
23
|
-
)
|
|
26
|
+
content = map(lambda x: x.lower(), os.listdir(package_path))
|
|
24
27
|
return "readme.md" not in content, EmptyInfoDetector.MESSAGE_TEMPLATE % "npm"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Compromised Email Detector
|
|
2
2
|
|
|
3
3
|
Detects if a maintainer's email domain might have been compromised.
|
|
4
4
|
"""
|
|
@@ -8,8 +8,9 @@ from typing import Optional
|
|
|
8
8
|
|
|
9
9
|
from dateutil import parser
|
|
10
10
|
|
|
11
|
-
from guarddog.analyzer.metadata.potentially_compromised_email_domain import
|
|
12
|
-
PotentiallyCompromisedEmailDomainDetector
|
|
11
|
+
from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
|
|
12
|
+
PotentiallyCompromisedEmailDomainDetector,
|
|
13
|
+
)
|
|
13
14
|
|
|
14
15
|
from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
|
|
15
16
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Empty Information Detector
|
|
2
2
|
|
|
3
3
|
Detects when a package has its latest release version to 0.0.0
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
from typing import Optional
|
|
6
7
|
|
|
7
8
|
from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
|
|
@@ -9,7 +10,14 @@ from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
|
|
|
9
10
|
|
|
10
11
|
class NPMReleaseZeroDetector(ReleaseZeroDetector):
|
|
11
12
|
|
|
12
|
-
def detect(
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
def detect(
|
|
14
|
+
self,
|
|
15
|
+
package_info,
|
|
16
|
+
path: Optional[str] = None,
|
|
17
|
+
name: Optional[str] = None,
|
|
18
|
+
version: Optional[str] = None,
|
|
19
|
+
) -> tuple[bool, str]:
|
|
20
|
+
return (
|
|
21
|
+
package_info["dist-tags"]["latest"] in ["0.0.0", "0.0", "0"],
|
|
22
|
+
ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["dist-tags"]["latest"],
|
|
23
|
+
)
|
|
@@ -79,7 +79,7 @@ class NPMTyposquatDetector(TyposquatDetector):
|
|
|
79
79
|
return False, None
|
|
80
80
|
|
|
81
81
|
def _get_confused_forms(self, package_name) -> list:
|
|
82
|
-
"""
|
|
82
|
+
"""Gets confused terms for npm packages.
|
|
83
83
|
Currently, there are no confused terms for npm packages.
|
|
84
84
|
"""
|
|
85
85
|
return []
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import
|
|
4
|
-
UnclaimedMaintainerEmailDomainDetector
|
|
3
|
+
from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import (
|
|
4
|
+
UnclaimedMaintainerEmailDomainDetector,
|
|
5
|
+
)
|
|
5
6
|
|
|
6
7
|
from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
|
|
7
8
|
|
|
@@ -6,8 +6,7 @@ NPM_API_MAINTAINER_EMAIL_WARNING = (
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def get_email_addresses(package_info: dict) -> set[str]:
|
|
9
|
-
return {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
} - {None, ""}
|
|
9
|
+
return {m["email"] for m in package_info.get("maintainers", []) if "email" in m} - {
|
|
10
|
+
None,
|
|
11
|
+
"",
|
|
12
|
+
}
|
|
@@ -60,10 +60,14 @@ class PotentiallyCompromisedEmailDomainDetector(Detector):
|
|
|
60
60
|
has_issues = True
|
|
61
61
|
|
|
62
62
|
messages.append(
|
|
63
|
-
f"The domain name of the maintainer's email address ({email}) was"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
|
|
63
|
+
f"The domain name of the maintainer's email address ({email}) was"
|
|
64
|
+
" re-registered after"
|
|
65
|
+
" the latest release of this "
|
|
66
|
+
"package. This can be an indicator that this is a"
|
|
67
|
+
" custom domain that expired, and was leveraged by"
|
|
68
|
+
" an attacker to compromise the"
|
|
69
|
+
f" package owner's {self.ecosystem}"
|
|
70
|
+
" account."
|
|
67
71
|
)
|
|
68
72
|
|
|
69
73
|
return has_issues, "\n".join(messages)
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
from guarddog.analyzer.metadata.pypi.empty_information import PypiEmptyInfoDetector
|
|
2
|
-
from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import
|
|
3
|
-
PypiPotentiallyCompromisedEmailDomainDetector
|
|
4
|
-
|
|
5
|
-
|
|
2
|
+
from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import (
|
|
3
|
+
PypiPotentiallyCompromisedEmailDomainDetector,
|
|
4
|
+
)
|
|
5
|
+
from guarddog.analyzer.metadata.pypi.unclaimed_maintainer_email_domain import (
|
|
6
|
+
PypiUnclaimedMaintainerEmailDomainDetector,
|
|
7
|
+
)
|
|
6
8
|
from guarddog.analyzer.metadata.pypi.release_zero import PypiReleaseZeroDetector
|
|
7
|
-
from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import
|
|
8
|
-
|
|
9
|
+
from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import (
|
|
10
|
+
PypiIntegrityMismatchDetector,
|
|
11
|
+
)
|
|
12
|
+
from guarddog.analyzer.metadata.pypi.single_python_file import (
|
|
13
|
+
PypiSinglePythonFileDetector,
|
|
14
|
+
)
|
|
9
15
|
from guarddog.analyzer.metadata.pypi.typosquatting import PypiTyposquatDetector
|
|
10
16
|
from guarddog.analyzer.metadata.pypi.bundled_binary import PypiBundledBinary
|
|
11
17
|
from guarddog.analyzer.metadata.pypi.deceptive_author import PypiDeceptiveAuthor
|
|
@@ -3,6 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class PypiBundledBinary(BundledBinary):
|
|
6
|
-
def detect(
|
|
7
|
-
|
|
6
|
+
def detect(
|
|
7
|
+
self,
|
|
8
|
+
package_info,
|
|
9
|
+
path: Optional[str] = None,
|
|
10
|
+
name: Optional[str] = None,
|
|
11
|
+
version: Optional[str] = None,
|
|
12
|
+
) -> tuple[bool, str]:
|
|
8
13
|
return super().detect(package_info, path, name, version)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Empty Information Detector
|
|
2
2
|
|
|
3
3
|
Detects if a package contains an empty description
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import logging
|
|
6
7
|
from typing import Optional
|
|
7
8
|
|
|
@@ -13,7 +14,17 @@ log = logging.getLogger("guarddog")
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class PypiEmptyInfoDetector(EmptyInfoDetector):
|
|
16
|
-
def detect(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
def detect(
|
|
18
|
+
self,
|
|
19
|
+
package_info,
|
|
20
|
+
path: Optional[str] = None,
|
|
21
|
+
name: Optional[str] = None,
|
|
22
|
+
version: Optional[str] = None,
|
|
23
|
+
) -> tuple[bool, str]:
|
|
24
|
+
log.debug(
|
|
25
|
+
f"Running PyPI empty description heuristic on package {name} version {version}"
|
|
26
|
+
)
|
|
27
|
+
return (
|
|
28
|
+
len(package_info["info"]["description"].strip()) == 0,
|
|
29
|
+
EmptyInfoDetector.MESSAGE_TEMPLATE % "PyPI",
|
|
30
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Compromised Email Detector
|
|
2
2
|
|
|
3
3
|
Detects if a maintainer's email domain might have been compromised.
|
|
4
4
|
"""
|
|
@@ -9,8 +9,9 @@ from typing import Optional
|
|
|
9
9
|
from dateutil import parser
|
|
10
10
|
from packaging import version
|
|
11
11
|
|
|
12
|
-
from guarddog.analyzer.metadata.potentially_compromised_email_domain import
|
|
13
|
-
PotentiallyCompromisedEmailDomainDetector
|
|
12
|
+
from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
|
|
13
|
+
PotentiallyCompromisedEmailDomainDetector,
|
|
14
|
+
)
|
|
14
15
|
|
|
15
16
|
from .utils import get_email_addresses
|
|
16
17
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Empty Information Detector
|
|
2
2
|
|
|
3
3
|
Detects when a package has its latest release version to 0.0.0
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import logging
|
|
6
7
|
from typing import Optional
|
|
7
8
|
|
|
@@ -12,8 +13,17 @@ log = logging.getLogger("guarddog")
|
|
|
12
13
|
|
|
13
14
|
class PypiReleaseZeroDetector(ReleaseZeroDetector):
|
|
14
15
|
|
|
15
|
-
def detect(
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
16
|
+
def detect(
|
|
17
|
+
self,
|
|
18
|
+
package_info,
|
|
19
|
+
path: Optional[str] = None,
|
|
20
|
+
name: Optional[str] = None,
|
|
21
|
+
version: Optional[str] = None,
|
|
22
|
+
) -> tuple[bool, str]:
|
|
23
|
+
log.debug(
|
|
24
|
+
f"Running zero version heuristic on PyPI package {name} version {version}"
|
|
25
|
+
)
|
|
26
|
+
return (
|
|
27
|
+
package_info["info"]["version"] in ["0.0.0", "0.0"],
|
|
28
|
+
ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["info"]["version"],
|
|
29
|
+
)
|