guarddog 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/analyzer.py +51 -23
- guarddog/analyzer/metadata/__init__.py +2 -0
- guarddog/analyzer/metadata/bundled_binary.py +6 -6
- guarddog/analyzer/metadata/deceptive_author.py +3 -1
- guarddog/analyzer/metadata/detector.py +7 -2
- guarddog/analyzer/metadata/empty_information.py +8 -3
- guarddog/analyzer/metadata/go/typosquatting.py +17 -9
- guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
- guarddog/analyzer/metadata/npm/empty_information.py +10 -7
- guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/npm/release_zero.py +13 -5
- guarddog/analyzer/metadata/npm/typosquatting.py +44 -13
- guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
- guarddog/analyzer/metadata/npm/utils.py +4 -5
- guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
- guarddog/analyzer/metadata/pypi/__init__.py +12 -6
- guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
- guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
- guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
- guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
- guarddog/analyzer/metadata/pypi/typosquatting.py +73 -26
- guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
- guarddog/analyzer/metadata/pypi/utils.py +1 -4
- guarddog/analyzer/metadata/release_zero.py +1 -1
- guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +29998 -29986
- guarddog/analyzer/metadata/typosquatting.py +12 -8
- guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
- guarddog/analyzer/metadata/utils.py +1 -1
- guarddog/analyzer/sourcecode/__init__.py +34 -7
- guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
- guarddog/analyzer/sourcecode/code-execution.yml +1 -0
- guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
- guarddog/analyzer/sourcecode/shady-links.yml +1 -1
- guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
- guarddog/analyzer/sourcecode/unicode.yml +75 -0
- guarddog/ecosystems.py +3 -0
- guarddog/scanners/__init__.py +5 -0
- guarddog/scanners/extension_scanner.py +152 -0
- guarddog/scanners/github_action_scanner.py +6 -2
- guarddog/scanners/go_project_scanner.py +1 -1
- guarddog/scanners/npm_package_scanner.py +12 -4
- guarddog/scanners/pypi_package_scanner.py +9 -3
- guarddog/scanners/pypi_project_scanner.py +9 -12
- guarddog/scanners/scanner.py +1 -0
- guarddog/utils/archives.py +134 -9
- guarddog/utils/config.py +24 -2
- guarddog/utils/package_info.py +3 -1
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/METADATA +10 -9
- guarddog-2.7.1.dist-info/RECORD +100 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/WHEEL +1 -1
- guarddog-2.6.0.dist-info/RECORD +0 -96
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/entry_points.txt +0 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE +0 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/NOTICE +0 -0
|
@@ -5,14 +5,16 @@ from guarddog.analyzer.metadata.detector import Detector
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class TyposquatDetector(Detector):
|
|
8
|
-
MESSAGE_TEMPLATE =
|
|
9
|
-
|
|
8
|
+
MESSAGE_TEMPLATE = (
|
|
9
|
+
"This package closely resembles the following package names, and might be a typosquatting "
|
|
10
|
+
"attempt: %s"
|
|
11
|
+
)
|
|
10
12
|
|
|
11
13
|
def __init__(self) -> None:
|
|
12
14
|
self.popular_packages = self._get_top_packages() # Find top PyPI packages
|
|
13
15
|
super().__init__(
|
|
14
16
|
name="typosquatting",
|
|
15
|
-
description="Identify packages that are named closely to an highly popular package"
|
|
17
|
+
description="Identify packages that are named closely to an highly popular package",
|
|
16
18
|
)
|
|
17
19
|
|
|
18
20
|
@abc.abstractmethod
|
|
@@ -37,19 +39,19 @@ class TyposquatDetector(Detector):
|
|
|
37
39
|
# Addition to name2
|
|
38
40
|
if len(name1) > len(name2):
|
|
39
41
|
for i in range(len(name1)):
|
|
40
|
-
if name1[:i] + name1[i + 1:] == name2:
|
|
42
|
+
if name1[:i] + name1[i + 1 :] == name2:
|
|
41
43
|
return True
|
|
42
44
|
|
|
43
45
|
# Addition to name1
|
|
44
46
|
elif len(name2) > len(name1):
|
|
45
47
|
for i in range(len(name2)):
|
|
46
|
-
if name2[:i] + name2[i + 1:] == name1:
|
|
48
|
+
if name2[:i] + name2[i + 1 :] == name1:
|
|
47
49
|
return True
|
|
48
50
|
|
|
49
51
|
# Edit character
|
|
50
52
|
else:
|
|
51
53
|
for i in range(len(name1)):
|
|
52
|
-
if name1[:i] + name1[i + 1:] == name2[:i] + name2[i + 1:]:
|
|
54
|
+
if name1[:i] + name1[i + 1 :] == name2[:i] + name2[i + 1 :]:
|
|
53
55
|
return True
|
|
54
56
|
|
|
55
57
|
return False
|
|
@@ -68,7 +70,7 @@ class TyposquatDetector(Detector):
|
|
|
68
70
|
|
|
69
71
|
if len(name1) == len(name2):
|
|
70
72
|
for i in range(len(name1) - 1):
|
|
71
|
-
swapped_name1 = name1[:i] + name1[i + 1] + name1[i] + name1[i + 2:]
|
|
73
|
+
swapped_name1 = name1[:i] + name1[i + 1] + name1[i] + name1[i + 2 :]
|
|
72
74
|
if swapped_name1 == name2:
|
|
73
75
|
return True
|
|
74
76
|
|
|
@@ -106,7 +108,9 @@ class TyposquatDetector(Detector):
|
|
|
106
108
|
bool: True
|
|
107
109
|
"""
|
|
108
110
|
|
|
109
|
-
return self._is_distance_one_Levenshtein(
|
|
111
|
+
return self._is_distance_one_Levenshtein(
|
|
112
|
+
package1, package2
|
|
113
|
+
) or self._is_swapped_typo(package1, package2)
|
|
110
114
|
|
|
111
115
|
@abc.abstractmethod
|
|
112
116
|
def _get_confused_forms(self, package_name) -> list:
|
|
@@ -20,8 +20,13 @@ class UnclaimedMaintainerEmailDomainDetector(Detector):
|
|
|
20
20
|
)
|
|
21
21
|
self.ecosystem = ecosystem
|
|
22
22
|
|
|
23
|
-
def detect(
|
|
24
|
-
|
|
23
|
+
def detect(
|
|
24
|
+
self,
|
|
25
|
+
package_info,
|
|
26
|
+
path: Optional[str] = None,
|
|
27
|
+
name: Optional[str] = None,
|
|
28
|
+
version: Optional[str] = None,
|
|
29
|
+
) -> tuple[bool, str]:
|
|
25
30
|
"""
|
|
26
31
|
Uses a package's information to determine
|
|
27
32
|
if the maintainer's email domain is unclaimed and thus exposed to hijacking
|
|
@@ -26,7 +26,7 @@ def get_domain_creation_date(domain) -> tuple[Optional[datetime], bool]:
|
|
|
26
26
|
|
|
27
27
|
try:
|
|
28
28
|
domain_information = whois.whois(domain)
|
|
29
|
-
except whois.
|
|
29
|
+
except whois.exceptions.PywhoisError as e:
|
|
30
30
|
# The domain doesn't exist at all, if that's the case we consider it vulnerable
|
|
31
31
|
# since someone could register it
|
|
32
32
|
return None, (not str(e).lower().startswith("no match for"))
|
|
@@ -11,17 +11,23 @@ from guarddog.ecosystems import ECOSYSTEM
|
|
|
11
11
|
|
|
12
12
|
current_dir = pathlib.Path(__file__).parent.resolve()
|
|
13
13
|
|
|
14
|
+
EXTENSION_YARA_PREFIX = "extension_"
|
|
14
15
|
|
|
15
16
|
# These data class aim to reduce the spreading of the logic
|
|
16
|
-
# Instead of using the a dict as a structure and parse it difffently
|
|
17
|
+
# Instead of using the a dict as a structure and parse it difffently
|
|
18
|
+
# depending on the type
|
|
19
|
+
|
|
20
|
+
|
|
17
21
|
@dataclass
|
|
18
22
|
class SourceCodeRule:
|
|
19
23
|
"""
|
|
20
24
|
Base class for source code rules
|
|
21
25
|
"""
|
|
26
|
+
|
|
22
27
|
id: str
|
|
23
28
|
file: str
|
|
24
29
|
description: str
|
|
30
|
+
ecosystem: Optional[ECOSYSTEM] # None means "any ecosystem"
|
|
25
31
|
|
|
26
32
|
|
|
27
33
|
@dataclass
|
|
@@ -29,6 +35,7 @@ class YaraRule(SourceCodeRule):
|
|
|
29
35
|
"""
|
|
30
36
|
Yara rule just reimplements base
|
|
31
37
|
"""
|
|
38
|
+
|
|
32
39
|
pass
|
|
33
40
|
|
|
34
41
|
|
|
@@ -38,7 +45,7 @@ class SempgrepRule(SourceCodeRule):
|
|
|
38
45
|
Semgrep rule are language specific
|
|
39
46
|
Content of rule in yaml format is accessible through rule_content
|
|
40
47
|
"""
|
|
41
|
-
|
|
48
|
+
|
|
42
49
|
rule_content: dict
|
|
43
50
|
|
|
44
51
|
|
|
@@ -54,7 +61,8 @@ def get_sourcecode_rules(
|
|
|
54
61
|
for rule in SOURCECODE_RULES:
|
|
55
62
|
if kind and not isinstance(rule, kind):
|
|
56
63
|
continue
|
|
57
|
-
|
|
64
|
+
# Include rules that match the specific ecosystem OR rules that apply to any ecosystem (None)
|
|
65
|
+
if rule.ecosystem is not None and rule.ecosystem != ecosystem:
|
|
58
66
|
continue
|
|
59
67
|
yield rule
|
|
60
68
|
|
|
@@ -78,13 +86,15 @@ for file_name in semgrep_rule_file_names:
|
|
|
78
86
|
case "javascript" | "typescript" | "json":
|
|
79
87
|
ecosystems.add(ECOSYSTEM.NPM)
|
|
80
88
|
ecosystems.add(ECOSYSTEM.GITHUB_ACTION)
|
|
89
|
+
ecosystems.add(ECOSYSTEM.EXTENSION)
|
|
81
90
|
case "go":
|
|
82
91
|
ecosystems.add(ECOSYSTEM.GO)
|
|
83
92
|
case _:
|
|
84
93
|
continue
|
|
85
94
|
|
|
86
95
|
for ecosystem in ecosystems:
|
|
87
|
-
# avoids duplicates when multiple languages are supported
|
|
96
|
+
# avoids duplicates when multiple languages are supported
|
|
97
|
+
# by a rule
|
|
88
98
|
if not next(
|
|
89
99
|
filter(
|
|
90
100
|
lambda r: r.id == rule["id"],
|
|
@@ -96,7 +106,9 @@ for file_name in semgrep_rule_file_names:
|
|
|
96
106
|
SempgrepRule(
|
|
97
107
|
id=rule["id"],
|
|
98
108
|
ecosystem=ecosystem,
|
|
99
|
-
description=rule.get("metadata", {}).get(
|
|
109
|
+
description=rule.get("metadata", {}).get(
|
|
110
|
+
"description", ""
|
|
111
|
+
),
|
|
100
112
|
file=file_name,
|
|
101
113
|
rule_content=rule,
|
|
102
114
|
)
|
|
@@ -109,11 +121,26 @@ yara_rule_file_names = list(
|
|
|
109
121
|
# refer to README.md for more information
|
|
110
122
|
for file_name in yara_rule_file_names:
|
|
111
123
|
rule_id = pathlib.Path(file_name).stem
|
|
112
|
-
description_regex =
|
|
124
|
+
description_regex = (
|
|
125
|
+
rf"\s*rule\s+{rule_id}[^}}]+meta:[^}}]+description\s*=\s*\"(.+?)\""
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Determine ecosystem based on filename prefix
|
|
129
|
+
rule_ecosystem: Optional[ECOSYSTEM] = (
|
|
130
|
+
ECOSYSTEM.EXTENSION if file_name.startswith(EXTENSION_YARA_PREFIX) else None
|
|
131
|
+
)
|
|
113
132
|
|
|
114
133
|
with open(os.path.join(current_dir, file_name), "r") as fd:
|
|
115
134
|
match = re.search(description_regex, fd.read())
|
|
116
135
|
rule_description = ""
|
|
117
136
|
if match:
|
|
118
137
|
rule_description = match.group(1)
|
|
119
|
-
|
|
138
|
+
|
|
139
|
+
SOURCECODE_RULES.append(
|
|
140
|
+
YaraRule(
|
|
141
|
+
id=rule_id,
|
|
142
|
+
file=file_name,
|
|
143
|
+
description=rule_description,
|
|
144
|
+
ecosystem=rule_ecosystem,
|
|
145
|
+
)
|
|
146
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
- id: api-obfuscation
|
|
3
|
+
languages:
|
|
4
|
+
- python
|
|
5
|
+
message: This package uses obfuscated API calls that may evade static analysis detection
|
|
6
|
+
metadata:
|
|
7
|
+
description: Identify obfuscated API calls using alternative Python syntax patterns
|
|
8
|
+
severity: WARNING
|
|
9
|
+
patterns:
|
|
10
|
+
- pattern-either:
|
|
11
|
+
# Covered cases:
|
|
12
|
+
# 1) __dict__ access patterns: $MODULE.__dict__[$METHOD](...) / .__call__(...)
|
|
13
|
+
# 2) __getattribute__ patterns: $MODULE.__getattribute__($METHOD)(...) / .__call__(...)
|
|
14
|
+
# 3) getattr patterns: getattr($MODULE, $METHOD)(...) / .__call__(...)
|
|
15
|
+
# It also covers the case where $MODULE is imported as __import__('mod')
|
|
16
|
+
- patterns:
|
|
17
|
+
- pattern-either:
|
|
18
|
+
- pattern: $MODULE.__dict__[$METHOD]($...ARGS)
|
|
19
|
+
- pattern: $MODULE.__dict__[$METHOD].__call__($...ARGS)
|
|
20
|
+
- pattern: $MODULE.__getattribute__($METHOD)($...ARGS)
|
|
21
|
+
- pattern: $MODULE.__getattribute__($METHOD).__call__($...ARGS)
|
|
22
|
+
- pattern: getattr($MODULE, $METHOD)($...ARGS)
|
|
23
|
+
- pattern: getattr($MODULE, $METHOD).__call__($...ARGS)
|
|
24
|
+
- metavariable-regex:
|
|
25
|
+
metavariable: $MODULE
|
|
26
|
+
regex: "^[A-Za-z_][A-Za-z0-9_\\.]*$|^__import__\\([\"'][A-Za-z_][A-Za-z0-9_]*[\"']\\)$"
|
|
27
|
+
- metavariable-regex:
|
|
28
|
+
metavariable: $METHOD
|
|
29
|
+
regex: "^[\"'][A-Za-z_][A-Za-z0-9_]*[\"']$"
|
|
30
|
+
|
|
31
|
+
# --- Additional Cases: __import__('mod').method(...) / .__call__(...)
|
|
32
|
+
- patterns:
|
|
33
|
+
- pattern-either:
|
|
34
|
+
- pattern: __import__($MODULE).$METHOD($...ARGS)
|
|
35
|
+
- pattern: __import__($MODULE).$METHOD.__call__($...ARGS)
|
|
36
|
+
- metavariable-regex:
|
|
37
|
+
metavariable: $MODULE
|
|
38
|
+
regex: "^[\"'][A-Za-z_][A-Za-z0-9_]*[\"']$"
|
|
39
|
+
- metavariable-regex:
|
|
40
|
+
metavariable: $METHOD
|
|
41
|
+
# avoid matching __getattribute__
|
|
42
|
+
regex: "[^(__getattribute__)][A-Za-z_][A-Za-z0-9_]*"
|
|
@@ -55,8 +55,13 @@ rules:
|
|
|
55
55
|
# dll injection
|
|
56
56
|
- pattern-either:
|
|
57
57
|
- pattern: ....WriteProcessMemory
|
|
58
|
+
- pattern: getattr(..., "WriteProcessMemory")
|
|
58
59
|
- pattern: ....CreateRemoteThread
|
|
60
|
+
- pattern: getattr(..., "CreateRemoteThread")
|
|
59
61
|
- pattern: ....LoadLibraryA
|
|
62
|
+
- pattern: getattr(..., "LoadLibraryA")
|
|
63
|
+
- pattern: ....CDLL
|
|
64
|
+
- pattern: getattr(..., "CDLL")
|
|
60
65
|
|
|
61
66
|
# phantom dll
|
|
62
67
|
- patterns:
|
|
@@ -43,7 +43,7 @@ rules:
|
|
|
43
43
|
- pattern-regex: ((?:https?:\/\/)?[^\n\[\/\?#"']*?(files\.catbox\.moe)\b)
|
|
44
44
|
|
|
45
45
|
# top-level domains
|
|
46
|
-
- pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream|zip)
|
|
46
|
+
- pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream|zip)\b)
|
|
47
47
|
# IPv4
|
|
48
48
|
- pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))
|
|
49
49
|
# IPv6
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
rule suspicious_passwd_access_linux
|
|
2
|
+
{
|
|
3
|
+
meta:
|
|
4
|
+
author = "T HAMDOUNI, Datadog"
|
|
5
|
+
description = "Detects suspicious read access to /etc/passwd file, which is often targeted by malware for credential harvesting"
|
|
6
|
+
|
|
7
|
+
strings:
|
|
8
|
+
$cli = /(cat|less|more|head|tail)\s+.{0,100}\/etc\/passwd/ nocase
|
|
9
|
+
$read = /(readFile|readFileSync)\(\s*['"]\/etc\/passwd/ nocase
|
|
10
|
+
condition:
|
|
11
|
+
$cli or $read
|
|
12
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Ignores string contents to reduce false positives!
|
|
2
|
+
|
|
3
|
+
rules:
|
|
4
|
+
- id: unicode
|
|
5
|
+
message:
|
|
6
|
+
This package uses uncommon unicode characters in its code, it may try to
|
|
7
|
+
avoid detection.
|
|
8
|
+
metadata:
|
|
9
|
+
description: Identify suspicious unicode characters
|
|
10
|
+
languages:
|
|
11
|
+
- python
|
|
12
|
+
severity: WARNING
|
|
13
|
+
patterns:
|
|
14
|
+
# ignore comments
|
|
15
|
+
- pattern-not-regex: \#(.*)$
|
|
16
|
+
|
|
17
|
+
# ignore strings
|
|
18
|
+
- pattern-not-regex: (["'].*?["'])
|
|
19
|
+
- pattern-not-regex: ("""(.|\n)*?""")
|
|
20
|
+
- pattern-not-regex: ('''(.|\n)*?''')
|
|
21
|
+
|
|
22
|
+
- pattern-either:
|
|
23
|
+
- pattern-regex: ([ªᵃₐⓐa𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊])
|
|
24
|
+
- pattern-regex: ([ᵇⓑb𝐛𝑏𝒃𝒷𝓫𝔟𝕓𝖇𝖻𝗯𝘣𝙗𝚋])
|
|
25
|
+
- pattern-regex: ([ᶜⅽⓒc𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌])
|
|
26
|
+
- pattern-regex: ([ᵈⅆⅾⓓd𝐝𝑑𝒅𝒹𝓭𝔡𝕕𝖉𝖽𝗱𝘥𝙙𝚍])
|
|
27
|
+
- pattern-regex: ([ᵉₑℯⅇⓔe𝐞𝑒𝒆𝓮𝔢𝕖𝖊𝖾𝗲𝘦𝙚𝚎])
|
|
28
|
+
- pattern-regex: ([ᶠⓕf𝐟𝑓𝒇𝒻𝓯𝔣𝕗𝖋𝖿𝗳𝘧𝙛𝚏])
|
|
29
|
+
- pattern-regex: ([ᵍℊⓖg𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐])
|
|
30
|
+
- pattern-regex: ([ʰₕℎⓗh𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑])
|
|
31
|
+
- pattern-regex: ([ᵢⁱℹⅈⅰⓘi𝐢𝑖𝒊𝒾𝓲𝔦𝕚𝖎𝗂𝗶𝘪𝙞𝚒])
|
|
32
|
+
- pattern-regex: ([ʲⅉⓙⱼj𝐣𝑗𝒋𝒿𝓳𝔧𝕛𝖏𝗃𝗷𝘫𝙟𝚓])
|
|
33
|
+
- pattern-regex: ([ᵏₖⓚk𝐤𝑘𝒌𝓀𝓴𝔨𝕜𝖐𝗄𝗸𝘬𝙠𝚔])
|
|
34
|
+
- pattern-regex: ([ˡₗℓⅼⓛl𝐥𝑙𝒍𝓁𝓵𝔩𝕝𝖑𝗅𝗹𝘭𝙡𝚕])
|
|
35
|
+
- pattern-regex: ([ᵐₘⅿⓜm𝐦𝑚𝒎𝓂𝓶𝔪𝕞𝖒𝗆𝗺𝘮𝙢𝚖])
|
|
36
|
+
- pattern-regex: ([ⁿₙⓝn𝐧𝑛𝒏𝓃𝓷𝔫𝕟𝖓𝗇𝗻𝘯𝙣𝚗])
|
|
37
|
+
- pattern-regex: ([ºᵒₒℴⓞo𝐨𝑜𝒐𝓸𝔬𝕠𝖔𝗈𝗼𝘰𝙤𝚘])
|
|
38
|
+
- pattern-regex: ([ᵖₚⓟp𝐩𝑝𝒑𝓅𝓹𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙])
|
|
39
|
+
- pattern-regex: ([ⓠq𐞥𝐪𝑞𝒒𝓆𝓺𝔮𝕢𝖖𝗊𝗾𝘲𝙦𝚚])
|
|
40
|
+
- pattern-regex: ([ʳᵣⓡr𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧𝚛])
|
|
41
|
+
- pattern-regex: ([ſˢₛⓢs𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜])
|
|
42
|
+
- pattern-regex: ([ᵗₜⓣt𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝])
|
|
43
|
+
- pattern-regex: ([ᵘᵤⓤu𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎𝘂𝘶𝙪𝚞])
|
|
44
|
+
- pattern-regex: ([ᵛᵥⅴⓥv𝐯𝑣𝒗𝓋𝓿𝔳𝕧𝖛𝗏𝘃𝘷𝙫𝚟])
|
|
45
|
+
- pattern-regex: ([ʷⓦw𝐰𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠])
|
|
46
|
+
- pattern-regex: ([ˣₓⅹⓧx𝐱𝑥𝒙𝓍𝔁𝔵𝕩𝖝𝗑𝘅𝘹𝙭𝚡])
|
|
47
|
+
- pattern-regex: ([ʸⓨy𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢])
|
|
48
|
+
- pattern-regex: ([ᶻⓩz𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣])
|
|
49
|
+
|
|
50
|
+
- pattern-regex: ([ᴬⒶA𝐀𝐴𝑨𝒜𝓐𝔄𝔸𝕬𝖠𝗔𝘈𝘼𝙰🄰])
|
|
51
|
+
- pattern-regex: ([ᴮℬⒷB𝐁𝐵𝑩𝓑𝔅𝔹𝕭𝖡𝗕𝘉𝘽𝙱🄱])
|
|
52
|
+
- pattern-regex: ([ℂℭⅭⒸꟲC𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾𝙲🄫🄲])
|
|
53
|
+
- pattern-regex: ([ᴰⅅⅮⒹD𝐃𝐷𝑫𝒟𝓓𝔇𝔻𝕯𝖣𝗗𝘋𝘿𝙳🄳])
|
|
54
|
+
- pattern-regex: ([ᴱℰⒺE𝐄𝐸𝑬𝓔𝔈𝔼𝕰𝖤𝗘𝘌𝙀𝙴🄴])
|
|
55
|
+
- pattern-regex: ([ℱⒻꟳF𝐅𝐹𝑭𝓕𝔉𝔽𝕱𝖥𝗙𝘍𝙁𝙵🄵])
|
|
56
|
+
- pattern-regex: ([ᴳⒼG𝐆𝐺𝑮𝒢𝓖𝔊𝔾𝕲𝖦𝗚𝘎𝙂𝙶🄶])
|
|
57
|
+
- pattern-regex: ([ᴴℋℌℍⒽH𝐇𝐻𝑯𝓗𝕳𝖧𝗛𝘏𝙃𝙷🄷])
|
|
58
|
+
- pattern-regex: ([ᴵℐℑⅠⒾI𝐈𝐼𝑰𝓘𝕀𝕴𝖨𝗜𝘐𝙄𝙸🄸])
|
|
59
|
+
- pattern-regex: ([ᴶⒿJ𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹🄹])
|
|
60
|
+
- pattern-regex: ([ᴷKⓀK𝐊𝐾𝑲𝒦𝓚𝔎𝕂𝕶𝖪𝗞𝘒𝙆𝙺🄺])
|
|
61
|
+
- pattern-regex: ([ᴸℒⅬⓁL𝐋𝐿𝑳𝓛𝔏𝕃𝕷𝖫𝗟𝘓𝙇𝙻🄻])
|
|
62
|
+
- pattern-regex: ([ᴹℳⅯⓂM𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼🄼])
|
|
63
|
+
- pattern-regex: ([ᴺℕⓃN𝐍𝑁𝑵𝒩𝓝𝔑𝕹𝖭𝗡𝘕𝙉𝙽🄽])
|
|
64
|
+
- pattern-regex: ([ᴼⓄO𝐎𝑂𝑶𝒪𝓞𝔒𝕆𝕺𝖮𝗢𝘖𝙊𝙾🄾])
|
|
65
|
+
- pattern-regex: ([ᴾℙⓅP𝐏𝑃𝑷𝒫𝓟𝔓𝕻𝖯𝗣𝘗𝙋𝙿🄿])
|
|
66
|
+
- pattern-regex: ([ℚⓆꟴQ𝐐𝑄𝑸𝒬𝓠𝔔𝕼𝖰𝗤𝘘𝙌𝚀🅀])
|
|
67
|
+
- pattern-regex: ([ᴿℛℜℝⓇR𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁🄬🅁])
|
|
68
|
+
- pattern-regex: ([ⓈS𝐒𝑆𝑺𝒮𝓢𝔖𝕊𝕾𝖲𝗦𝘚𝙎𝚂🅂])
|
|
69
|
+
- pattern-regex: ([ᵀⓉT𝐓𝑇𝑻𝒯𝓣𝔗𝕋𝕿𝖳𝗧𝘛𝙏𝚃🅃])
|
|
70
|
+
- pattern-regex: ([ᵁⓊU𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄🅄])
|
|
71
|
+
- pattern-regex: ([ⅤⓋⱽV𝐕𝑉𝑽𝒱𝓥𝔙𝕍𝖁𝖵𝗩𝘝𝙑𝚅🅅])
|
|
72
|
+
- pattern-regex: ([ᵂⓌW𝐖𝑊𝑾𝒲𝓦𝔚𝕎𝖂𝖶𝗪𝘞𝙒𝚆🅆])
|
|
73
|
+
- pattern-regex: ([ⅩⓍX𝐗𝑋𝑿𝒳𝓧𝔛𝕏𝖃𝖷𝗫𝘟𝙓𝚇🅇])
|
|
74
|
+
- pattern-regex: ([ⓎY𝐘𝑌𝒀𝒴𝓨𝔜𝕐𝖄𝖸𝗬𝘠𝙔𝚈🅈])
|
|
75
|
+
- pattern-regex: ([ℤℨⓏZ𝐙𝑍𝒁𝒵𝓩𝖅𝖹𝗭𝘡𝙕𝚉🅉])
|
guarddog/ecosystems.py
CHANGED
|
@@ -6,6 +6,7 @@ class ECOSYSTEM(Enum):
|
|
|
6
6
|
NPM = "npm"
|
|
7
7
|
GO = "go"
|
|
8
8
|
GITHUB_ACTION = "github-action"
|
|
9
|
+
EXTENSION = "extension"
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
|
|
@@ -18,5 +19,7 @@ def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
|
|
|
18
19
|
return "go"
|
|
19
20
|
case ECOSYSTEM.GITHUB_ACTION:
|
|
20
21
|
return "GitHub Action"
|
|
22
|
+
case ECOSYSTEM.EXTENSION:
|
|
23
|
+
return "Extension"
|
|
21
24
|
case _:
|
|
22
25
|
return ecosystem.value
|
guarddog/scanners/__init__.py
CHANGED
|
@@ -8,6 +8,7 @@ from .pypi_project_scanner import PypiRequirementsScanner
|
|
|
8
8
|
from .go_package_scanner import GoModuleScanner
|
|
9
9
|
from .go_project_scanner import GoDependenciesScanner
|
|
10
10
|
from .github_action_scanner import GithubActionScanner
|
|
11
|
+
from .extension_scanner import ExtensionScanner
|
|
11
12
|
from .scanner import PackageScanner, ProjectScanner
|
|
12
13
|
from ..ecosystems import ECOSYSTEM
|
|
13
14
|
|
|
@@ -33,6 +34,8 @@ def get_package_scanner(ecosystem: ECOSYSTEM) -> Optional[PackageScanner]:
|
|
|
33
34
|
return GoModuleScanner()
|
|
34
35
|
case ECOSYSTEM.GITHUB_ACTION:
|
|
35
36
|
return GithubActionScanner()
|
|
37
|
+
case ECOSYSTEM.EXTENSION:
|
|
38
|
+
return ExtensionScanner()
|
|
36
39
|
return None
|
|
37
40
|
|
|
38
41
|
|
|
@@ -57,4 +60,6 @@ def get_project_scanner(ecosystem: ECOSYSTEM) -> Optional[ProjectScanner]:
|
|
|
57
60
|
return GoDependenciesScanner()
|
|
58
61
|
case ECOSYSTEM.GITHUB_ACTION:
|
|
59
62
|
return GitHubActionDependencyScanner()
|
|
63
|
+
case ECOSYSTEM.EXTENSION:
|
|
64
|
+
return None # we're not including dependency scanning for this PR
|
|
60
65
|
return None
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from guarddog.analyzer.analyzer import Analyzer
|
|
8
|
+
from guarddog.ecosystems import ECOSYSTEM
|
|
9
|
+
from guarddog.scanners.scanner import PackageScanner, noop
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger("guarddog")
|
|
12
|
+
|
|
13
|
+
MARKETPLACE_URL = (
|
|
14
|
+
"https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery"
|
|
15
|
+
)
|
|
16
|
+
MARKETPLACE_HEADERS = {
|
|
17
|
+
"Content-Type": "application/json",
|
|
18
|
+
"Accept": "application/json;api-version=3.0-preview.1",
|
|
19
|
+
}
|
|
20
|
+
MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE = "Microsoft.VisualStudio.Services.VSIXPackage"
|
|
21
|
+
VSIX_FILE_EXTENSION = ".vsix"
|
|
22
|
+
|
|
23
|
+
# VSCode Marketplace API filter types
|
|
24
|
+
# FilterType 7 = publisherName.extensionName (search by exact extension identifier)
|
|
25
|
+
MARKETPLACE_FILTER_TYPE_EXTENSION_NAME = 7
|
|
26
|
+
|
|
27
|
+
# VSCode Marketplace API flags (bitwise combination)
|
|
28
|
+
# 446 = IncludeVersions | IncludeFiles | IncludeMetadata
|
|
29
|
+
MARKETPLACE_FLAGS_FULL_METADATA = 446
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ExtensionScanner(PackageScanner):
|
|
33
|
+
def __init__(self) -> None:
|
|
34
|
+
super().__init__(Analyzer(ECOSYSTEM.EXTENSION))
|
|
35
|
+
|
|
36
|
+
def download_and_get_package_info(
|
|
37
|
+
self, directory: str, package_name: str, version=None
|
|
38
|
+
) -> typing.Tuple[dict, str]:
|
|
39
|
+
"""
|
|
40
|
+
Downloads a VSCode extension from the marketplace and extracts it
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
directory: Directory to download to
|
|
44
|
+
package_name: Extension identifier (publisher.extension format)
|
|
45
|
+
version: Specific version or default to latest
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Tuple of (marketplace API response, extracted_path)
|
|
49
|
+
"""
|
|
50
|
+
marketplace_data, vsix_url = self._get_marketplace_info_and_url(
|
|
51
|
+
package_name, version
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
vsix_path = os.path.join(
|
|
55
|
+
directory, package_name.replace("/", "-") + VSIX_FILE_EXTENSION
|
|
56
|
+
)
|
|
57
|
+
extracted_path = vsix_path.removesuffix(VSIX_FILE_EXTENSION)
|
|
58
|
+
|
|
59
|
+
log.debug(f"Downloading VSCode extension from {vsix_url}")
|
|
60
|
+
|
|
61
|
+
self.download_compressed(vsix_url, vsix_path, extracted_path)
|
|
62
|
+
|
|
63
|
+
return marketplace_data, extracted_path
|
|
64
|
+
|
|
65
|
+
def _get_marketplace_info_and_url(
|
|
66
|
+
self, package_name: str, version: typing.Optional[str] = None
|
|
67
|
+
) -> typing.Tuple[dict, str]:
|
|
68
|
+
"""Get marketplace metadata and VSIX download URL"""
|
|
69
|
+
payload = {
|
|
70
|
+
"filters": [
|
|
71
|
+
{
|
|
72
|
+
"criteria": [
|
|
73
|
+
{
|
|
74
|
+
"filterType": MARKETPLACE_FILTER_TYPE_EXTENSION_NAME,
|
|
75
|
+
"value": package_name,
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
],
|
|
80
|
+
"flags": MARKETPLACE_FLAGS_FULL_METADATA,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
response = requests.post(
|
|
84
|
+
MARKETPLACE_URL, headers=MARKETPLACE_HEADERS, json=payload
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
response.raise_for_status()
|
|
88
|
+
|
|
89
|
+
data = response.json()
|
|
90
|
+
|
|
91
|
+
if not data.get("results") or not data["results"][0].get("extensions"):
|
|
92
|
+
raise ValueError(f"Extension {package_name} not found in marketplace")
|
|
93
|
+
|
|
94
|
+
extension_info = data["results"][0]["extensions"][0]
|
|
95
|
+
versions = extension_info.get("versions", [])
|
|
96
|
+
|
|
97
|
+
if not versions:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"No versions available for this extension: {package_name}"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
target_version = None
|
|
103
|
+
if version is None:
|
|
104
|
+
# if not version is provided, default to latest
|
|
105
|
+
target_version = versions[0]
|
|
106
|
+
else:
|
|
107
|
+
for v in versions:
|
|
108
|
+
if v.get("version") == version:
|
|
109
|
+
target_version = v
|
|
110
|
+
break
|
|
111
|
+
if target_version is None:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"Version {version} not found for extension: {package_name}"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Extract download URL
|
|
117
|
+
files = target_version.get("files", [])
|
|
118
|
+
vsix_url = None
|
|
119
|
+
for file_info in files:
|
|
120
|
+
if file_info.get("assetType") == MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE:
|
|
121
|
+
vsix_url = file_info.get("source")
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
if not vsix_url:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"No VSIX download link available for this extension: {package_name}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return data, vsix_url
|
|
130
|
+
|
|
131
|
+
def scan_local(
|
|
132
|
+
self, path: str, rules=None, callback: typing.Callable[[dict], None] = noop
|
|
133
|
+
) -> dict:
|
|
134
|
+
"""
|
|
135
|
+
Scan a local VSCode extension directory
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
path: Path to extension directory containing package.json
|
|
139
|
+
rules: Set of rules to use
|
|
140
|
+
callback: Callback to apply to analyzer output
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Scan results
|
|
144
|
+
"""
|
|
145
|
+
if rules is not None:
|
|
146
|
+
rules = set(rules)
|
|
147
|
+
|
|
148
|
+
# Use only sourcecode analysis for local scans, consistent with other ecosystems
|
|
149
|
+
results = self.analyzer.analyze_sourcecode(path, rules=rules)
|
|
150
|
+
callback(results)
|
|
151
|
+
|
|
152
|
+
return results
|
|
@@ -15,7 +15,9 @@ class GithubActionScanner(PackageScanner):
|
|
|
15
15
|
def __init__(self) -> None:
|
|
16
16
|
super().__init__(Analyzer(ECOSYSTEM.GITHUB_ACTION))
|
|
17
17
|
|
|
18
|
-
def download_and_get_package_info(
|
|
18
|
+
def download_and_get_package_info(
|
|
19
|
+
self, directory: str, package_name: str, version=None
|
|
20
|
+
) -> typing.Tuple[dict, str]:
|
|
19
21
|
repo = self._get_repo(package_name)
|
|
20
22
|
tarball_url = self._get_git_tarball_url(repo, version)
|
|
21
23
|
|
|
@@ -25,7 +27,9 @@ class GithubActionScanner(PackageScanner):
|
|
|
25
27
|
if file_extension == "":
|
|
26
28
|
file_extension = ".zip"
|
|
27
29
|
|
|
28
|
-
zippath = os.path.join(
|
|
30
|
+
zippath = os.path.join(
|
|
31
|
+
directory, package_name.replace("/", "-") + file_extension
|
|
32
|
+
)
|
|
29
33
|
unzippedpath = zippath.removesuffix(file_extension)
|
|
30
34
|
self.download_compressed(tarball_url, zippath, unzippedpath)
|
|
31
35
|
|
|
@@ -17,9 +17,13 @@ class NPMPackageScanner(PackageScanner):
|
|
|
17
17
|
def __init__(self) -> None:
|
|
18
18
|
super().__init__(Analyzer(ECOSYSTEM.NPM))
|
|
19
19
|
|
|
20
|
-
def download_and_get_package_info(
|
|
20
|
+
def download_and_get_package_info(
|
|
21
|
+
self, directory: str, package_name: str, version=None
|
|
22
|
+
) -> typing.Tuple[dict, str]:
|
|
21
23
|
git_target = None
|
|
22
|
-
if urlparse(package_name).hostname is not None and package_name.endswith(
|
|
24
|
+
if urlparse(package_name).hostname is not None and package_name.endswith(
|
|
25
|
+
".git"
|
|
26
|
+
):
|
|
23
27
|
git_target = package_name
|
|
24
28
|
|
|
25
29
|
if not package_name.startswith("@") and package_name.count("/") == 1:
|
|
@@ -33,7 +37,9 @@ class NPMPackageScanner(PackageScanner):
|
|
|
33
37
|
response = requests.get(url)
|
|
34
38
|
|
|
35
39
|
if response.status_code != 200:
|
|
36
|
-
raise Exception(
|
|
40
|
+
raise Exception(
|
|
41
|
+
"Received status code: " + str(response.status_code) + " from npm"
|
|
42
|
+
)
|
|
37
43
|
data = response.json()
|
|
38
44
|
if "name" not in data:
|
|
39
45
|
raise Exception(f"Error retrieving package: {package_name}")
|
|
@@ -45,7 +51,9 @@ class NPMPackageScanner(PackageScanner):
|
|
|
45
51
|
|
|
46
52
|
tarball_url = details["dist"]["tarball"]
|
|
47
53
|
file_extension = pathlib.Path(tarball_url).suffix
|
|
48
|
-
zippath = os.path.join(
|
|
54
|
+
zippath = os.path.join(
|
|
55
|
+
directory, package_name.replace("/", "-") + file_extension
|
|
56
|
+
)
|
|
49
57
|
unzippedpath = zippath.removesuffix(file_extension)
|
|
50
58
|
self.download_compressed(tarball_url, zippath, unzippedpath)
|
|
51
59
|
|
|
@@ -12,7 +12,9 @@ class PypiPackageScanner(PackageScanner):
|
|
|
12
12
|
def __init__(self) -> None:
|
|
13
13
|
super().__init__(Analyzer(ECOSYSTEM.PYPI))
|
|
14
14
|
|
|
15
|
-
def download_and_get_package_info(
|
|
15
|
+
def download_and_get_package_info(
|
|
16
|
+
self, directory: str, package_name: str, version=None
|
|
17
|
+
) -> typing.Tuple[dict, str]:
|
|
16
18
|
extract_dir = self.download_package(package_name, directory, version)
|
|
17
19
|
return get_package_info(package_name), extract_dir
|
|
18
20
|
|
|
@@ -40,7 +42,9 @@ class PypiPackageScanner(PackageScanner):
|
|
|
40
42
|
version = data["info"]["version"]
|
|
41
43
|
|
|
42
44
|
if version not in releases:
|
|
43
|
-
raise Exception(
|
|
45
|
+
raise Exception(
|
|
46
|
+
f"Version {version} for package {package_name} doesn't exist."
|
|
47
|
+
)
|
|
44
48
|
|
|
45
49
|
files = releases[version]
|
|
46
50
|
url, file_extension = None, None
|
|
@@ -52,7 +56,9 @@ class PypiPackageScanner(PackageScanner):
|
|
|
52
56
|
break
|
|
53
57
|
|
|
54
58
|
if not (url and file_extension):
|
|
55
|
-
raise Exception(
|
|
59
|
+
raise Exception(
|
|
60
|
+
f"Compressed file for {package_name} does not exist on PyPI."
|
|
61
|
+
)
|
|
56
62
|
|
|
57
63
|
# Path to compressed package
|
|
58
64
|
zippath = os.path.join(directory, package_name + file_extension)
|