guarddog 2.0.6__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/analyzer.py +13 -1
- guarddog/analyzer/metadata/go/__init__.py +6 -1
- guarddog/analyzer/metadata/go/typosquatting.py +118 -0
- guarddog/analyzer/metadata/npm/typosquatting.py +6 -0
- guarddog/analyzer/metadata/pypi/typosquatting.py +38 -0
- guarddog/analyzer/metadata/resources/top_go_packages.json +2923 -0
- guarddog/analyzer/metadata/resources/top_npm_packages.json +7383 -7383
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +15973 -15973
- guarddog/analyzer/metadata/typosquatting.py +2 -36
- guarddog/analyzer/sourcecode/npm-exfiltrate-sensitive-data.yml +7 -0
- guarddog/analyzer/sourcecode/obfuscation.yml +5 -0
- guarddog/analyzer/sourcecode/shady-links.yml +10 -3
- guarddog/cli.py +7 -7
- guarddog/scanners/pypi_project_scanner.py +1 -2
- guarddog/scanners/scanner.py +4 -5
- {guarddog-2.0.6.dist-info → guarddog-2.2.0.dist-info}/METADATA +5 -5
- {guarddog-2.0.6.dist-info → guarddog-2.2.0.dist-info}/RECORD +22 -20
- {guarddog-2.0.6.dist-info → guarddog-2.2.0.dist-info}/WHEEL +1 -1
- {guarddog-2.0.6.dist-info → guarddog-2.2.0.dist-info}/LICENSE +0 -0
- {guarddog-2.0.6.dist-info → guarddog-2.2.0.dist-info}/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.0.6.dist-info → guarddog-2.2.0.dist-info}/NOTICE +0 -0
- {guarddog-2.0.6.dist-info → guarddog-2.2.0.dist-info}/entry_points.txt +0 -0
guarddog/analyzer/analyzer.py
CHANGED
|
@@ -178,7 +178,7 @@ class Analyzer:
|
|
|
178
178
|
errors: Dict[str, str] = {}
|
|
179
179
|
issues = 0
|
|
180
180
|
|
|
181
|
-
rule_results = defaultdict(list)
|
|
181
|
+
rule_results: defaultdict[dict, list[dict]] = defaultdict(list)
|
|
182
182
|
|
|
183
183
|
rules_path = {
|
|
184
184
|
rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yar")
|
|
@@ -210,6 +210,17 @@ class Analyzer:
|
|
|
210
210
|
"code": self.trim_code_snippet(str(i.matched_data)),
|
|
211
211
|
'message': m.meta.get("description", f"{m.rule} rule matched")
|
|
212
212
|
}
|
|
213
|
+
|
|
214
|
+
# since yara can match the multiple times in the same file
|
|
215
|
+
# leading to finding several times the same word or pattern
|
|
216
|
+
# this dedup the matches
|
|
217
|
+
if [
|
|
218
|
+
f
|
|
219
|
+
for f in rule_results[m.rule]
|
|
220
|
+
if finding["code"] == f["code"]
|
|
221
|
+
]:
|
|
222
|
+
continue
|
|
223
|
+
|
|
213
224
|
issues += len(m.strings)
|
|
214
225
|
rule_results[m.rule].append(finding)
|
|
215
226
|
except Exception as e:
|
|
@@ -272,6 +283,7 @@ class Analyzer:
|
|
|
272
283
|
cmd.append("--no-git-ignore")
|
|
273
284
|
cmd.append("--json")
|
|
274
285
|
cmd.append("--quiet")
|
|
286
|
+
cmd.append("--disable-nosem")
|
|
275
287
|
cmd.append(f"--max-target-bytes={SEMGREP_MAX_TARGET_BYTES}")
|
|
276
288
|
cmd.append(target)
|
|
277
289
|
log.debug(f"Invoking semgrep with command line: {' '.join(cmd)}")
|
|
@@ -1,8 +1,13 @@
|
|
|
1
|
+
from typing import Type
|
|
2
|
+
|
|
1
3
|
from guarddog.analyzer.metadata import Detector
|
|
4
|
+
from guarddog.analyzer.metadata.go.typosquatting import GoTyposquatDetector
|
|
2
5
|
|
|
3
6
|
GO_METADATA_RULES = {}
|
|
4
7
|
|
|
5
|
-
classes: list[Detector] = [
|
|
8
|
+
classes: list[Type[Detector]] = [
|
|
9
|
+
GoTyposquatDetector,
|
|
10
|
+
]
|
|
6
11
|
|
|
7
12
|
for detectorClass in classes:
|
|
8
13
|
detectorInstance = detectorClass() # type: ignore
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
6
|
+
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GoTyposquatDetector(TyposquatDetector):
|
|
10
|
+
"""Detector for typosquatting attacks for go modules. Checks for distance one Levenshtein,
|
|
11
|
+
one-off character swaps, permutations around hyphens, and substrings.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
popular_packages (set): set of top 500 most popular Go packages,
|
|
15
|
+
as determined by count of references across top starred repositories
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def _get_top_packages(self) -> set:
|
|
19
|
+
top_packages_filename = "top_go_packages.json"
|
|
20
|
+
|
|
21
|
+
resources_dir = TOP_PACKAGES_CACHE_LOCATION
|
|
22
|
+
if resources_dir is None:
|
|
23
|
+
resources_dir = os.path.abspath(
|
|
24
|
+
os.path.join(os.path.dirname(__file__), "..", "resources")
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
28
|
+
|
|
29
|
+
top_packages_information = None
|
|
30
|
+
|
|
31
|
+
if top_packages_filename in os.listdir(resources_dir):
|
|
32
|
+
with open(top_packages_path, "r") as top_packages_file:
|
|
33
|
+
top_packages_information = json.load(top_packages_file)
|
|
34
|
+
|
|
35
|
+
if top_packages_information is None:
|
|
36
|
+
raise Exception(
|
|
37
|
+
f"Could not retrieve top Go packages from {top_packages_path}")
|
|
38
|
+
|
|
39
|
+
return set(top_packages_information)
|
|
40
|
+
|
|
41
|
+
def detect(
|
|
42
|
+
self,
|
|
43
|
+
package_info,
|
|
44
|
+
path: Optional[str] = None,
|
|
45
|
+
name: Optional[str] = None,
|
|
46
|
+
version: Optional[str] = None,
|
|
47
|
+
) -> tuple[bool, Optional[str]]:
|
|
48
|
+
"""
|
|
49
|
+
Uses a Go package's name to determine the
|
|
50
|
+
package is attempting a typosquatting attack
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
name (str): The name of the package,
|
|
54
|
+
also known as the import path
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Tuple[bool, Optional[str]]: True if package is typosquatted,
|
|
58
|
+
along with a message indicating the similar package name.
|
|
59
|
+
False if not typosquatted and None
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
similar_package_names = self.get_typosquatted_package(name)
|
|
63
|
+
if len(similar_package_names) > 0:
|
|
64
|
+
return True, TyposquatDetector.MESSAGE_TEMPLATE % ", ".join(
|
|
65
|
+
similar_package_names
|
|
66
|
+
)
|
|
67
|
+
return False, None
|
|
68
|
+
|
|
69
|
+
def _get_confused_forms(self, package_name) -> list:
|
|
70
|
+
"""
|
|
71
|
+
Gets confused terms for Go packages
|
|
72
|
+
Confused terms are:
|
|
73
|
+
- golang to go swaps (or vice versa)
|
|
74
|
+
- the removal of go/golang terms
|
|
75
|
+
- gitlab.com to github.com swaps (or vice versa)
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
package_name (str): name of the package
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
list: list of confused terms
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
confused_forms = []
|
|
85
|
+
|
|
86
|
+
if package_name.startswith("github.com/"):
|
|
87
|
+
replaced = package_name.replace("github.com/", "gitlab.com/", 1)
|
|
88
|
+
confused_forms.append(replaced)
|
|
89
|
+
elif package_name.startswith("gitlab.com/"):
|
|
90
|
+
replaced = package_name.replace("gitlab.com/", "github.com/", 1)
|
|
91
|
+
confused_forms.append(replaced)
|
|
92
|
+
|
|
93
|
+
terms = package_name.split("-")
|
|
94
|
+
|
|
95
|
+
# Detect swaps like golang-package -> go-package
|
|
96
|
+
for i in range(len(terms)):
|
|
97
|
+
confused_term = None
|
|
98
|
+
|
|
99
|
+
if "golang" in terms[i]:
|
|
100
|
+
confused_term = terms[i].replace("golang", "go")
|
|
101
|
+
elif "go" in terms[i]:
|
|
102
|
+
confused_term = terms[i].replace("go", "golang")
|
|
103
|
+
else:
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# Get form when replacing or removing go/golang term
|
|
107
|
+
replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
|
|
108
|
+
removed_form = terms[:i] + terms[i + 1:]
|
|
109
|
+
|
|
110
|
+
for form in (replaced_form, removed_form):
|
|
111
|
+
confused_forms.append("-".join(form))
|
|
112
|
+
|
|
113
|
+
return confused_forms
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
# update top_npm_packages.json
|
|
118
|
+
GoTyposquatDetector()._get_top_packages()
|
|
@@ -78,6 +78,12 @@ class NPMTyposquatDetector(TyposquatDetector):
|
|
|
78
78
|
)
|
|
79
79
|
return False, None
|
|
80
80
|
|
|
81
|
+
def _get_confused_forms(self, package_name) -> list:
|
|
82
|
+
""" Gets confused terms for npm packages.
|
|
83
|
+
Currently, there are no confused terms for npm packages.
|
|
84
|
+
"""
|
|
85
|
+
return []
|
|
86
|
+
|
|
81
87
|
|
|
82
88
|
if __name__ == "__main__":
|
|
83
89
|
# update top_npm_packages.json
|
|
@@ -91,6 +91,44 @@ class PypiTyposquatDetector(TyposquatDetector):
|
|
|
91
91
|
return True, TyposquatDetector.MESSAGE_TEMPLATE % ", ".join(similar_package_names)
|
|
92
92
|
return False, None
|
|
93
93
|
|
|
94
|
+
def _get_confused_forms(self, package_name) -> list:
|
|
95
|
+
"""
|
|
96
|
+
Gets confused terms for python packages
|
|
97
|
+
Confused terms are:
|
|
98
|
+
- py to python swaps (or vice versa)
|
|
99
|
+
- the removal of py/python terms
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
package_name (str): name of the package
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
list: list of confused terms
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
confused_forms = []
|
|
109
|
+
|
|
110
|
+
terms = package_name.split("-")
|
|
111
|
+
|
|
112
|
+
# Detect swaps like python-package -> py-package
|
|
113
|
+
for i in range(len(terms)):
|
|
114
|
+
confused_term = None
|
|
115
|
+
|
|
116
|
+
if "python" in terms[i]:
|
|
117
|
+
confused_term = terms[i].replace("python", "py")
|
|
118
|
+
elif "py" in terms[i]:
|
|
119
|
+
confused_term = terms[i].replace("py", "python")
|
|
120
|
+
else:
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
# Get form when replacing or removing py/python term
|
|
124
|
+
replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
|
|
125
|
+
removed_form = terms[:i] + terms[i + 1:]
|
|
126
|
+
|
|
127
|
+
for form in (replaced_form, removed_form):
|
|
128
|
+
confused_forms.append("-".join(form))
|
|
129
|
+
|
|
130
|
+
return confused_forms
|
|
131
|
+
|
|
94
132
|
|
|
95
133
|
if __name__ == "__main__":
|
|
96
134
|
# update top_pypi_packages.json
|