guarddog 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/analyzer.py +51 -23
- guarddog/analyzer/metadata/__init__.py +2 -0
- guarddog/analyzer/metadata/bundled_binary.py +6 -6
- guarddog/analyzer/metadata/deceptive_author.py +3 -1
- guarddog/analyzer/metadata/detector.py +7 -2
- guarddog/analyzer/metadata/empty_information.py +8 -3
- guarddog/analyzer/metadata/go/typosquatting.py +17 -9
- guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
- guarddog/analyzer/metadata/npm/empty_information.py +10 -7
- guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/npm/release_zero.py +13 -5
- guarddog/analyzer/metadata/npm/typosquatting.py +44 -13
- guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
- guarddog/analyzer/metadata/npm/utils.py +4 -5
- guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
- guarddog/analyzer/metadata/pypi/__init__.py +12 -6
- guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
- guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
- guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
- guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
- guarddog/analyzer/metadata/pypi/typosquatting.py +73 -26
- guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
- guarddog/analyzer/metadata/pypi/utils.py +1 -4
- guarddog/analyzer/metadata/release_zero.py +1 -1
- guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +29998 -29986
- guarddog/analyzer/metadata/typosquatting.py +12 -8
- guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
- guarddog/analyzer/metadata/utils.py +1 -1
- guarddog/analyzer/sourcecode/__init__.py +34 -7
- guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
- guarddog/analyzer/sourcecode/code-execution.yml +1 -0
- guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
- guarddog/analyzer/sourcecode/shady-links.yml +1 -1
- guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
- guarddog/analyzer/sourcecode/unicode.yml +75 -0
- guarddog/ecosystems.py +3 -0
- guarddog/scanners/__init__.py +5 -0
- guarddog/scanners/extension_scanner.py +152 -0
- guarddog/scanners/github_action_scanner.py +6 -2
- guarddog/scanners/go_project_scanner.py +1 -1
- guarddog/scanners/npm_package_scanner.py +12 -4
- guarddog/scanners/pypi_package_scanner.py +9 -3
- guarddog/scanners/pypi_project_scanner.py +9 -12
- guarddog/scanners/scanner.py +1 -0
- guarddog/utils/archives.py +134 -9
- guarddog/utils/config.py +24 -2
- guarddog/utils/package_info.py +3 -1
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/METADATA +10 -9
- guarddog-2.7.1.dist-info/RECORD +100 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/WHEEL +1 -1
- guarddog-2.6.0.dist-info/RECORD +0 -96
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/entry_points.txt +0 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE +0 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/NOTICE +0 -0
guarddog/analyzer/analyzer.py
CHANGED
|
@@ -16,9 +16,7 @@ from guarddog.ecosystems import ECOSYSTEM
|
|
|
16
16
|
MAX_BYTES_DEFAULT = 10_000_000
|
|
17
17
|
SEMGREP_TIMEOUT_DEFAULT = 10
|
|
18
18
|
|
|
19
|
-
SOURCECODE_RULES_PATH = os.path.join(
|
|
20
|
-
os.path.dirname(__file__), "sourcecode"
|
|
21
|
-
)
|
|
19
|
+
SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
|
|
22
20
|
log = logging.getLogger("guarddog")
|
|
23
21
|
|
|
24
22
|
|
|
@@ -67,7 +65,14 @@ class Analyzer:
|
|
|
67
65
|
".semgrep_logs",
|
|
68
66
|
]
|
|
69
67
|
|
|
70
|
-
def analyze(
|
|
68
|
+
def analyze(
|
|
69
|
+
self,
|
|
70
|
+
path,
|
|
71
|
+
info=None,
|
|
72
|
+
rules=None,
|
|
73
|
+
name: Optional[str] = None,
|
|
74
|
+
version: Optional[str] = None,
|
|
75
|
+
) -> dict:
|
|
71
76
|
"""
|
|
72
77
|
Analyzes a package in the given path
|
|
73
78
|
|
|
@@ -97,8 +102,14 @@ class Analyzer:
|
|
|
97
102
|
|
|
98
103
|
return {"issues": issues, "errors": errors, "results": results, "path": path}
|
|
99
104
|
|
|
100
|
-
def analyze_metadata(
|
|
101
|
-
|
|
105
|
+
def analyze_metadata(
|
|
106
|
+
self,
|
|
107
|
+
path: str,
|
|
108
|
+
info,
|
|
109
|
+
rules=None,
|
|
110
|
+
name: Optional[str] = None,
|
|
111
|
+
version: Optional[str] = None,
|
|
112
|
+
) -> dict:
|
|
102
113
|
"""
|
|
103
114
|
Analyzes the metadata of a given package
|
|
104
115
|
|
|
@@ -127,7 +138,9 @@ class Analyzer:
|
|
|
127
138
|
for rule in all_rules:
|
|
128
139
|
try:
|
|
129
140
|
log.debug(f"Running rule {rule} against package '{name}'")
|
|
130
|
-
rule_matches, message = self.metadata_detectors[rule].detect(
|
|
141
|
+
rule_matches, message = self.metadata_detectors[rule].detect(
|
|
142
|
+
info, path, name, version
|
|
143
|
+
)
|
|
131
144
|
results[rule] = None
|
|
132
145
|
if rule_matches:
|
|
133
146
|
issues += 1
|
|
@@ -202,16 +215,21 @@ class Analyzer:
|
|
|
202
215
|
continue
|
|
203
216
|
|
|
204
217
|
scan_file_target_abspath = os.path.join(root, f)
|
|
205
|
-
scan_file_target_relpath = os.path.relpath(
|
|
218
|
+
scan_file_target_relpath = os.path.relpath(
|
|
219
|
+
scan_file_target_abspath, path
|
|
220
|
+
)
|
|
206
221
|
|
|
207
222
|
matches = scan_rules.match(scan_file_target_abspath)
|
|
208
223
|
for m in matches:
|
|
224
|
+
|
|
209
225
|
for s in m.strings:
|
|
210
226
|
for i in s.instances:
|
|
211
227
|
finding = {
|
|
212
228
|
"location": f"{scan_file_target_relpath}:{i.offset}",
|
|
213
229
|
"code": self.trim_code_snippet(str(i.matched_data)),
|
|
214
|
-
|
|
230
|
+
"message": m.meta.get(
|
|
231
|
+
"description", f"{m.rule} rule matched"
|
|
232
|
+
),
|
|
215
233
|
}
|
|
216
234
|
|
|
217
235
|
# since yara can match the multiple times in the same file
|
|
@@ -254,10 +272,14 @@ class Analyzer:
|
|
|
254
272
|
errors = {}
|
|
255
273
|
issues = 0
|
|
256
274
|
|
|
257
|
-
rules_path = list(
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
275
|
+
rules_path = list(
|
|
276
|
+
map(
|
|
277
|
+
lambda rule_name: os.path.join(
|
|
278
|
+
SOURCECODE_RULES_PATH, f"{rule_name}.yml"
|
|
279
|
+
),
|
|
280
|
+
all_rules,
|
|
281
|
+
)
|
|
282
|
+
)
|
|
261
283
|
|
|
262
284
|
if len(rules_path) == 0:
|
|
263
285
|
log.debug("No semgrep code rules to run")
|
|
@@ -266,7 +288,9 @@ class Analyzer:
|
|
|
266
288
|
try:
|
|
267
289
|
log.debug(f"Running semgrep code rules against {path}")
|
|
268
290
|
response = self._invoke_semgrep(target=path, rules=rules_path)
|
|
269
|
-
rule_results = self._format_semgrep_response(
|
|
291
|
+
rule_results = self._format_semgrep_response(
|
|
292
|
+
response, targetpath=targetpath
|
|
293
|
+
)
|
|
270
294
|
issues += sum(len(res) for res in rule_results.values())
|
|
271
295
|
|
|
272
296
|
results = results | rule_results
|
|
@@ -278,9 +302,11 @@ class Analyzer:
|
|
|
278
302
|
def _invoke_semgrep(self, target: str, rules: Iterable[str]):
|
|
279
303
|
try:
|
|
280
304
|
SEMGREP_MAX_TARGET_BYTES = int(
|
|
281
|
-
os.getenv("GUARDDOG_SEMGREP_MAX_TARGET_BYTES", MAX_BYTES_DEFAULT)
|
|
305
|
+
os.getenv("GUARDDOG_SEMGREP_MAX_TARGET_BYTES", MAX_BYTES_DEFAULT)
|
|
306
|
+
)
|
|
282
307
|
SEMGREP_TIMEOUT = int(
|
|
283
|
-
os.getenv("GUARDDOG_SEMGREP_TIMEOUT", SEMGREP_TIMEOUT_DEFAULT)
|
|
308
|
+
os.getenv("GUARDDOG_SEMGREP_TIMEOUT", SEMGREP_TIMEOUT_DEFAULT)
|
|
309
|
+
)
|
|
284
310
|
cmd = ["semgrep"]
|
|
285
311
|
for rule in rules:
|
|
286
312
|
cmd.extend(["--config", rule])
|
|
@@ -295,7 +321,9 @@ class Analyzer:
|
|
|
295
321
|
cmd.append(f"--max-target-bytes={SEMGREP_MAX_TARGET_BYTES}")
|
|
296
322
|
cmd.append(target)
|
|
297
323
|
log.debug(f"Invoking semgrep with command line: {' '.join(cmd)}")
|
|
298
|
-
result = subprocess.run(
|
|
324
|
+
result = subprocess.run(
|
|
325
|
+
cmd, capture_output=True, check=True, encoding="utf-8"
|
|
326
|
+
)
|
|
299
327
|
return json.loads(str(result.stdout))
|
|
300
328
|
except FileNotFoundError:
|
|
301
329
|
raise Exception("unable to find semgrep binary")
|
|
@@ -358,9 +386,9 @@ output: {e.output}
|
|
|
358
386
|
location = file_path + ":" + str(start_line)
|
|
359
387
|
|
|
360
388
|
finding = {
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
389
|
+
"location": location,
|
|
390
|
+
"code": code,
|
|
391
|
+
"message": result["extra"]["message"],
|
|
364
392
|
}
|
|
365
393
|
|
|
366
394
|
rule_results = results[rule_name]
|
|
@@ -384,7 +412,7 @@ output: {e.output}
|
|
|
384
412
|
"""
|
|
385
413
|
snippet = []
|
|
386
414
|
try:
|
|
387
|
-
with open(file_path,
|
|
415
|
+
with open(file_path, "r") as file:
|
|
388
416
|
for current_line_number, line in enumerate(file, start=1):
|
|
389
417
|
if start_line <= current_line_number <= end_line:
|
|
390
418
|
snippet.append(line)
|
|
@@ -395,12 +423,12 @@ output: {e.output}
|
|
|
395
423
|
except Exception as e:
|
|
396
424
|
log.error(f"Error reading file {file_path}: {str(e)}")
|
|
397
425
|
|
|
398
|
-
return
|
|
426
|
+
return "".join(snippet)
|
|
399
427
|
|
|
400
428
|
# Makes sure the matching code to be displayed isn't too long
|
|
401
429
|
def trim_code_snippet(self, code):
|
|
402
430
|
THRESHOLD = 250
|
|
403
431
|
if len(code) > THRESHOLD:
|
|
404
|
-
return code[: THRESHOLD - 10] +
|
|
432
|
+
return code[: THRESHOLD - 10] + "..." + code[len(code) - 10 :]
|
|
405
433
|
else:
|
|
406
434
|
return code
|
|
@@ -15,10 +15,10 @@ class BundledBinary(Detector):
|
|
|
15
15
|
# magic bytes are the first few bytes of a file that can be used to identify the file type
|
|
16
16
|
# regardless of their extension
|
|
17
17
|
magic_bytes = {
|
|
18
|
-
"exe": b"\
|
|
19
|
-
"elf": b"\
|
|
20
|
-
"macho32": b"\
|
|
21
|
-
"macho64": b"\
|
|
18
|
+
"exe": b"\x4d\x5a",
|
|
19
|
+
"elf": b"\x7f\x45\x4c\x46",
|
|
20
|
+
"macho32": b"\xfe\xed\xfa\xce",
|
|
21
|
+
"macho64": b"\xfe\xed\xfa\xcf",
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
def __init__(self):
|
|
@@ -40,7 +40,7 @@ class BundledBinary(Detector):
|
|
|
40
40
|
def sha256(file: str) -> str:
|
|
41
41
|
with open(file, "rb") as f:
|
|
42
42
|
hasher = hashlib.sha256()
|
|
43
|
-
while
|
|
43
|
+
while chunk := f.read(4096):
|
|
44
44
|
hasher.update(chunk)
|
|
45
45
|
return hasher.hexdigest()
|
|
46
46
|
|
|
@@ -65,7 +65,7 @@ class BundledBinary(Detector):
|
|
|
65
65
|
if not bin_files:
|
|
66
66
|
return False, ""
|
|
67
67
|
|
|
68
|
-
output_lines =
|
|
68
|
+
output_lines = "\n".join(
|
|
69
69
|
f"{digest}: {', '.join(files)}" for digest, files in bin_files.items()
|
|
70
70
|
)
|
|
71
71
|
return True, f"Binary file/s detected in package:\n{output_lines}"
|
|
@@ -41,7 +41,9 @@ class DeceptiveAuthorDetector(Detector):
|
|
|
41
41
|
# read internal maintained list of placeholder email domains
|
|
42
42
|
# this domains are usually used by authors who want to don't want to reveal their real email
|
|
43
43
|
placeholder_email_domains_data = None
|
|
44
|
-
with open(
|
|
44
|
+
with open(
|
|
45
|
+
placeholder_email_domains_path, "r"
|
|
46
|
+
) as placeholder_email_domains_file:
|
|
45
47
|
placeholder_email_domains_data = set(
|
|
46
48
|
placeholder_email_domains_file.read().split("\n")
|
|
47
49
|
)
|
|
@@ -11,8 +11,13 @@ class Detector:
|
|
|
11
11
|
|
|
12
12
|
# returns (ruleMatches, message)
|
|
13
13
|
@abstractmethod
|
|
14
|
-
def detect(
|
|
15
|
-
|
|
14
|
+
def detect(
|
|
15
|
+
self,
|
|
16
|
+
package_info,
|
|
17
|
+
path: Optional[str] = None,
|
|
18
|
+
name: Optional[str] = None,
|
|
19
|
+
version: Optional[str] = None,
|
|
20
|
+
) -> tuple[bool, Optional[str]]:
|
|
16
21
|
pass # pragma: no cover
|
|
17
22
|
|
|
18
23
|
def get_name(self) -> str:
|
|
@@ -15,12 +15,17 @@ class EmptyInfoDetector(Detector):
|
|
|
15
15
|
def __init__(self):
|
|
16
16
|
super().__init__(
|
|
17
17
|
name="empty_information",
|
|
18
|
-
description="Identify packages with an empty description field"
|
|
18
|
+
description="Identify packages with an empty description field",
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
@abstractmethod
|
|
22
|
-
def detect(
|
|
23
|
-
|
|
22
|
+
def detect(
|
|
23
|
+
self,
|
|
24
|
+
package_info,
|
|
25
|
+
path: Optional[str] = None,
|
|
26
|
+
name: Optional[str] = None,
|
|
27
|
+
version: Optional[str] = None,
|
|
28
|
+
) -> tuple[bool, str]:
|
|
24
29
|
"""
|
|
25
30
|
Uses a package's information from PyPI's JSON API to determine
|
|
26
31
|
if the package has an empty description
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
6
|
from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
6
7
|
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
7
8
|
|
|
9
|
+
log = logging.getLogger("guarddog")
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class GoTyposquatDetector(TyposquatDetector):
|
|
10
13
|
"""Detector for typosquatting attacks for go modules. Checks for distance one Levenshtein,
|
|
@@ -25,19 +28,24 @@ class GoTyposquatDetector(TyposquatDetector):
|
|
|
25
28
|
)
|
|
26
29
|
|
|
27
30
|
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
28
|
-
|
|
29
|
-
top_packages_information = None
|
|
30
|
-
|
|
31
|
-
if top_packages_filename in os.listdir(resources_dir):
|
|
32
|
-
with open(top_packages_path, "r") as top_packages_file:
|
|
33
|
-
top_packages_information = json.load(top_packages_file)
|
|
31
|
+
top_packages_information = self._get_top_packages_local(top_packages_path)
|
|
34
32
|
|
|
35
33
|
if top_packages_information is None:
|
|
36
34
|
raise Exception(
|
|
37
|
-
f"Could not retrieve top Go packages from {top_packages_path}"
|
|
35
|
+
f"Could not retrieve top Go packages from {top_packages_path}"
|
|
36
|
+
)
|
|
38
37
|
|
|
39
38
|
return set(top_packages_information)
|
|
40
39
|
|
|
40
|
+
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
41
|
+
try:
|
|
42
|
+
with open(path, "r") as f:
|
|
43
|
+
result = json.load(f)
|
|
44
|
+
return result
|
|
45
|
+
except FileNotFoundError:
|
|
46
|
+
log.error(f"File not found: {path}")
|
|
47
|
+
return None
|
|
48
|
+
|
|
41
49
|
def detect(
|
|
42
50
|
self,
|
|
43
51
|
package_info,
|
|
@@ -104,8 +112,8 @@ class GoTyposquatDetector(TyposquatDetector):
|
|
|
104
112
|
continue
|
|
105
113
|
|
|
106
114
|
# Get form when replacing or removing go/golang term
|
|
107
|
-
replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
|
|
108
|
-
removed_form = terms[:i] + terms[i + 1:]
|
|
115
|
+
replaced_form = terms[:i] + [confused_term] + terms[i + 1 :]
|
|
116
|
+
removed_form = terms[:i] + terms[i + 1 :]
|
|
109
117
|
|
|
110
118
|
for form in (replaced_form, removed_form):
|
|
111
119
|
confused_forms.append("-".join(form))
|
|
@@ -3,6 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class NPMBundledBinary(BundledBinary):
|
|
6
|
-
def detect(
|
|
7
|
-
|
|
6
|
+
def detect(
|
|
7
|
+
self,
|
|
8
|
+
package_info,
|
|
9
|
+
path: Optional[str] = None,
|
|
10
|
+
name: Optional[str] = None,
|
|
11
|
+
version: Optional[str] = None,
|
|
12
|
+
) -> tuple[bool, str]:
|
|
8
13
|
return super().detect(package_info, path, name, version)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Empty Information Detector
|
|
2
2
|
|
|
3
3
|
Detects if a package contains an empty description
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import os.path
|
|
6
7
|
from typing import Optional
|
|
7
8
|
|
|
@@ -12,13 +13,15 @@ MESSAGE = "This package has an empty description on PyPi"
|
|
|
12
13
|
|
|
13
14
|
class NPMEmptyInfoDetector(EmptyInfoDetector):
|
|
14
15
|
|
|
15
|
-
def detect(
|
|
16
|
-
|
|
16
|
+
def detect(
|
|
17
|
+
self,
|
|
18
|
+
package_info,
|
|
19
|
+
path: Optional[str] = None,
|
|
20
|
+
name: Optional[str] = None,
|
|
21
|
+
version: Optional[str] = None,
|
|
22
|
+
) -> tuple[bool, str]:
|
|
17
23
|
if path is None:
|
|
18
24
|
raise TypeError("path must be a string")
|
|
19
25
|
package_path = os.path.join(path, "package")
|
|
20
|
-
content = map(
|
|
21
|
-
lambda x: x.lower(),
|
|
22
|
-
os.listdir(package_path)
|
|
23
|
-
)
|
|
26
|
+
content = map(lambda x: x.lower(), os.listdir(package_path))
|
|
24
27
|
return "readme.md" not in content, EmptyInfoDetector.MESSAGE_TEMPLATE % "npm"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Compromised Email Detector
|
|
2
2
|
|
|
3
3
|
Detects if a maintainer's email domain might have been compromised.
|
|
4
4
|
"""
|
|
@@ -8,8 +8,9 @@ from typing import Optional
|
|
|
8
8
|
|
|
9
9
|
from dateutil import parser
|
|
10
10
|
|
|
11
|
-
from guarddog.analyzer.metadata.potentially_compromised_email_domain import
|
|
12
|
-
PotentiallyCompromisedEmailDomainDetector
|
|
11
|
+
from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
|
|
12
|
+
PotentiallyCompromisedEmailDomainDetector,
|
|
13
|
+
)
|
|
13
14
|
|
|
14
15
|
from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
|
|
15
16
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Empty Information Detector
|
|
2
2
|
|
|
3
3
|
Detects when a package has its latest release version to 0.0.0
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
from typing import Optional
|
|
6
7
|
|
|
7
8
|
from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
|
|
@@ -9,7 +10,14 @@ from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
|
|
|
9
10
|
|
|
10
11
|
class NPMReleaseZeroDetector(ReleaseZeroDetector):
|
|
11
12
|
|
|
12
|
-
def detect(
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
def detect(
|
|
14
|
+
self,
|
|
15
|
+
package_info,
|
|
16
|
+
path: Optional[str] = None,
|
|
17
|
+
name: Optional[str] = None,
|
|
18
|
+
version: Optional[str] = None,
|
|
19
|
+
) -> tuple[bool, str]:
|
|
20
|
+
return (
|
|
21
|
+
package_info["dist-tags"]["latest"] in ["0.0.0", "0.0", "0"],
|
|
22
|
+
ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["dist-tags"]["latest"],
|
|
23
|
+
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
3
4
|
from datetime import datetime, timedelta
|
|
4
5
|
from typing import Optional
|
|
@@ -7,6 +8,8 @@ from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
|
7
8
|
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
8
9
|
import requests
|
|
9
10
|
|
|
11
|
+
log = logging.getLogger("guarddog")
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
class NPMTyposquatDetector(TyposquatDetector):
|
|
12
15
|
"""Detector for typosquatting attacks. Detects if a package name is a typosquat of one of the top 5000 packages.
|
|
@@ -32,24 +35,52 @@ class NPMTyposquatDetector(TyposquatDetector):
|
|
|
32
35
|
)
|
|
33
36
|
|
|
34
37
|
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
38
|
+
top_packages_information = self._get_top_packages_local(top_packages_path)
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
if self._file_is_expired(top_packages_path, days=30):
|
|
41
|
+
new_information = self._get_top_packages_network(popular_packages_url)
|
|
42
|
+
if new_information is not None:
|
|
43
|
+
top_packages_information = new_information
|
|
40
44
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
top_packages_information = json.load(top_packages_file)
|
|
45
|
+
with open(top_packages_path, "w+") as f:
|
|
46
|
+
json.dump(new_information, f, ensure_ascii=False, indent=4)
|
|
44
47
|
|
|
45
48
|
if top_packages_information is None:
|
|
46
|
-
|
|
47
|
-
top_packages_information = list([i["name"] for i in response[0:8000]])
|
|
48
|
-
with open(top_packages_path, "w+") as f:
|
|
49
|
-
json.dump(top_packages_information, f, ensure_ascii=False, indent=4)
|
|
50
|
-
|
|
49
|
+
return set()
|
|
51
50
|
return set(top_packages_information)
|
|
52
51
|
|
|
52
|
+
def _file_is_expired(self, path: str, days: int) -> bool:
|
|
53
|
+
try:
|
|
54
|
+
update_time = datetime.fromtimestamp(os.path.getmtime(path))
|
|
55
|
+
return datetime.now() - update_time > timedelta(days=days)
|
|
56
|
+
except FileNotFoundError:
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
60
|
+
try:
|
|
61
|
+
with open(path, "r") as f:
|
|
62
|
+
result = json.load(f)
|
|
63
|
+
return result
|
|
64
|
+
except FileNotFoundError:
|
|
65
|
+
log.error(f"File not found: {path}")
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def _get_top_packages_network(self, url: str) -> list[dict] | None:
|
|
69
|
+
try:
|
|
70
|
+
response = requests.get(url)
|
|
71
|
+
response.raise_for_status()
|
|
72
|
+
|
|
73
|
+
response_data = response.json()
|
|
74
|
+
result = list([i["name"] for i in response_data[0:8000]])
|
|
75
|
+
|
|
76
|
+
return result
|
|
77
|
+
except json.JSONDecodeError:
|
|
78
|
+
log.error(f'Couldn`t convert to json: "{response.text}"')
|
|
79
|
+
return None
|
|
80
|
+
except requests.exceptions.RequestException as e:
|
|
81
|
+
log.error(f"Network error: {e}")
|
|
82
|
+
return None
|
|
83
|
+
|
|
53
84
|
def detect(
|
|
54
85
|
self,
|
|
55
86
|
package_info,
|
|
@@ -79,7 +110,7 @@ class NPMTyposquatDetector(TyposquatDetector):
|
|
|
79
110
|
return False, None
|
|
80
111
|
|
|
81
112
|
def _get_confused_forms(self, package_name) -> list:
|
|
82
|
-
"""
|
|
113
|
+
"""Gets confused terms for npm packages.
|
|
83
114
|
Currently, there are no confused terms for npm packages.
|
|
84
115
|
"""
|
|
85
116
|
return []
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import
|
|
4
|
-
UnclaimedMaintainerEmailDomainDetector
|
|
3
|
+
from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import (
|
|
4
|
+
UnclaimedMaintainerEmailDomainDetector,
|
|
5
|
+
)
|
|
5
6
|
|
|
6
7
|
from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
|
|
7
8
|
|
|
@@ -6,8 +6,7 @@ NPM_API_MAINTAINER_EMAIL_WARNING = (
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def get_email_addresses(package_info: dict) -> set[str]:
|
|
9
|
-
return {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
} - {None, ""}
|
|
9
|
+
return {m["email"] for m in package_info.get("maintainers", []) if "email" in m} - {
|
|
10
|
+
None,
|
|
11
|
+
"",
|
|
12
|
+
}
|
|
@@ -60,10 +60,14 @@ class PotentiallyCompromisedEmailDomainDetector(Detector):
|
|
|
60
60
|
has_issues = True
|
|
61
61
|
|
|
62
62
|
messages.append(
|
|
63
|
-
f"The domain name of the maintainer's email address ({email}) was"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
|
|
63
|
+
f"The domain name of the maintainer's email address ({email}) was"
|
|
64
|
+
" re-registered after"
|
|
65
|
+
" the latest release of this "
|
|
66
|
+
"package. This can be an indicator that this is a"
|
|
67
|
+
" custom domain that expired, and was leveraged by"
|
|
68
|
+
" an attacker to compromise the"
|
|
69
|
+
f" package owner's {self.ecosystem}"
|
|
70
|
+
" account."
|
|
67
71
|
)
|
|
68
72
|
|
|
69
73
|
return has_issues, "\n".join(messages)
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
from guarddog.analyzer.metadata.pypi.empty_information import PypiEmptyInfoDetector
|
|
2
|
-
from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import
|
|
3
|
-
PypiPotentiallyCompromisedEmailDomainDetector
|
|
4
|
-
|
|
5
|
-
|
|
2
|
+
from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import (
|
|
3
|
+
PypiPotentiallyCompromisedEmailDomainDetector,
|
|
4
|
+
)
|
|
5
|
+
from guarddog.analyzer.metadata.pypi.unclaimed_maintainer_email_domain import (
|
|
6
|
+
PypiUnclaimedMaintainerEmailDomainDetector,
|
|
7
|
+
)
|
|
6
8
|
from guarddog.analyzer.metadata.pypi.release_zero import PypiReleaseZeroDetector
|
|
7
|
-
from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import
|
|
8
|
-
|
|
9
|
+
from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import (
|
|
10
|
+
PypiIntegrityMismatchDetector,
|
|
11
|
+
)
|
|
12
|
+
from guarddog.analyzer.metadata.pypi.single_python_file import (
|
|
13
|
+
PypiSinglePythonFileDetector,
|
|
14
|
+
)
|
|
9
15
|
from guarddog.analyzer.metadata.pypi.typosquatting import PypiTyposquatDetector
|
|
10
16
|
from guarddog.analyzer.metadata.pypi.bundled_binary import PypiBundledBinary
|
|
11
17
|
from guarddog.analyzer.metadata.pypi.deceptive_author import PypiDeceptiveAuthor
|
|
@@ -3,6 +3,11 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class PypiBundledBinary(BundledBinary):
|
|
6
|
-
def detect(
|
|
7
|
-
|
|
6
|
+
def detect(
|
|
7
|
+
self,
|
|
8
|
+
package_info,
|
|
9
|
+
path: Optional[str] = None,
|
|
10
|
+
name: Optional[str] = None,
|
|
11
|
+
version: Optional[str] = None,
|
|
12
|
+
) -> tuple[bool, str]:
|
|
8
13
|
return super().detect(package_info, path, name, version)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Empty Information Detector
|
|
2
2
|
|
|
3
3
|
Detects if a package contains an empty description
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import logging
|
|
6
7
|
from typing import Optional
|
|
7
8
|
|
|
@@ -13,7 +14,17 @@ log = logging.getLogger("guarddog")
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class PypiEmptyInfoDetector(EmptyInfoDetector):
|
|
16
|
-
def detect(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
def detect(
|
|
18
|
+
self,
|
|
19
|
+
package_info,
|
|
20
|
+
path: Optional[str] = None,
|
|
21
|
+
name: Optional[str] = None,
|
|
22
|
+
version: Optional[str] = None,
|
|
23
|
+
) -> tuple[bool, str]:
|
|
24
|
+
log.debug(
|
|
25
|
+
f"Running PyPI empty description heuristic on package {name} version {version}"
|
|
26
|
+
)
|
|
27
|
+
return (
|
|
28
|
+
len(package_info["info"]["description"].strip()) == 0,
|
|
29
|
+
EmptyInfoDetector.MESSAGE_TEMPLATE % "PyPI",
|
|
30
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Compromised Email Detector
|
|
2
2
|
|
|
3
3
|
Detects if a maintainer's email domain might have been compromised.
|
|
4
4
|
"""
|
|
@@ -9,8 +9,9 @@ from typing import Optional
|
|
|
9
9
|
from dateutil import parser
|
|
10
10
|
from packaging import version
|
|
11
11
|
|
|
12
|
-
from guarddog.analyzer.metadata.potentially_compromised_email_domain import
|
|
13
|
-
PotentiallyCompromisedEmailDomainDetector
|
|
12
|
+
from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
|
|
13
|
+
PotentiallyCompromisedEmailDomainDetector,
|
|
14
|
+
)
|
|
14
15
|
|
|
15
16
|
from .utils import get_email_addresses
|
|
16
17
|
|