guarddog 2.0.6__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -178,7 +178,7 @@ class Analyzer:
178
178
  errors: Dict[str, str] = {}
179
179
  issues = 0
180
180
 
181
- rule_results = defaultdict(list)
181
+ rule_results: defaultdict[dict, list[dict]] = defaultdict(list)
182
182
 
183
183
  rules_path = {
184
184
  rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yar")
@@ -210,6 +210,17 @@ class Analyzer:
210
210
  "code": self.trim_code_snippet(str(i.matched_data)),
211
211
  'message': m.meta.get("description", f"{m.rule} rule matched")
212
212
  }
213
+
214
+ # since yara can match the multiple times in the same file
215
+ # leading to finding several times the same word or pattern
216
+ # this dedup the matches
217
+ if [
218
+ f
219
+ for f in rule_results[m.rule]
220
+ if finding["code"] == f["code"]
221
+ ]:
222
+ continue
223
+
213
224
  issues += len(m.strings)
214
225
  rule_results[m.rule].append(finding)
215
226
  except Exception as e:
@@ -272,6 +283,7 @@ class Analyzer:
272
283
  cmd.append("--no-git-ignore")
273
284
  cmd.append("--json")
274
285
  cmd.append("--quiet")
286
+ cmd.append("--disable-nosem")
275
287
  cmd.append(f"--max-target-bytes={SEMGREP_MAX_TARGET_BYTES}")
276
288
  cmd.append(target)
277
289
  log.debug(f"Invoking semgrep with command line: {' '.join(cmd)}")
@@ -1,8 +1,13 @@
1
+ from typing import Type
2
+
1
3
  from guarddog.analyzer.metadata import Detector
4
+ from guarddog.analyzer.metadata.go.typosquatting import GoTyposquatDetector
2
5
 
3
6
  GO_METADATA_RULES = {}
4
7
 
5
- classes: list[Detector] = []
8
+ classes: list[Type[Detector]] = [
9
+ GoTyposquatDetector,
10
+ ]
6
11
 
7
12
  for detectorClass in classes:
8
13
  detectorInstance = detectorClass() # type: ignore
@@ -0,0 +1,118 @@
1
+ import json
2
+ import os
3
+ from typing import Optional
4
+
5
+ from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
6
+ from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
7
+
8
+
9
+ class GoTyposquatDetector(TyposquatDetector):
10
+ """Detector for typosquatting attacks for go modules. Checks for distance one Levenshtein,
11
+ one-off character swaps, permutations around hyphens, and substrings.
12
+
13
+ Attributes:
14
+ popular_packages (set): set of top 500 most popular Go packages,
15
+ as determined by count of references across top starred repositories
16
+ """
17
+
18
+ def _get_top_packages(self) -> set:
19
+ top_packages_filename = "top_go_packages.json"
20
+
21
+ resources_dir = TOP_PACKAGES_CACHE_LOCATION
22
+ if resources_dir is None:
23
+ resources_dir = os.path.abspath(
24
+ os.path.join(os.path.dirname(__file__), "..", "resources")
25
+ )
26
+
27
+ top_packages_path = os.path.join(resources_dir, top_packages_filename)
28
+
29
+ top_packages_information = None
30
+
31
+ if top_packages_filename in os.listdir(resources_dir):
32
+ with open(top_packages_path, "r") as top_packages_file:
33
+ top_packages_information = json.load(top_packages_file)
34
+
35
+ if top_packages_information is None:
36
+ raise Exception(
37
+ f"Could not retrieve top Go packages from {top_packages_path}")
38
+
39
+ return set(top_packages_information)
40
+
41
+ def detect(
42
+ self,
43
+ package_info,
44
+ path: Optional[str] = None,
45
+ name: Optional[str] = None,
46
+ version: Optional[str] = None,
47
+ ) -> tuple[bool, Optional[str]]:
48
+ """
49
+ Uses a Go package's name to determine the
50
+ package is attempting a typosquatting attack
51
+
52
+ Args:
53
+ name (str): The name of the package,
54
+ also known as the import path
55
+
56
+ Returns:
57
+ Tuple[bool, Optional[str]]: True if package is typosquatted,
58
+ along with a message indicating the similar package name.
59
+ False if not typosquatted and None
60
+ """
61
+
62
+ similar_package_names = self.get_typosquatted_package(name)
63
+ if len(similar_package_names) > 0:
64
+ return True, TyposquatDetector.MESSAGE_TEMPLATE % ", ".join(
65
+ similar_package_names
66
+ )
67
+ return False, None
68
+
69
+ def _get_confused_forms(self, package_name) -> list:
70
+ """
71
+ Gets confused terms for Go packages
72
+ Confused terms are:
73
+ - golang to go swaps (or vice versa)
74
+ - the removal of go/golang terms
75
+ - gitlab.com to github.com swaps (or vice versa)
76
+
77
+ Args:
78
+ package_name (str): name of the package
79
+
80
+ Returns:
81
+ list: list of confused terms
82
+ """
83
+
84
+ confused_forms = []
85
+
86
+ if package_name.startswith("github.com/"):
87
+ replaced = package_name.replace("github.com/", "gitlab.com/", 1)
88
+ confused_forms.append(replaced)
89
+ elif package_name.startswith("gitlab.com/"):
90
+ replaced = package_name.replace("gitlab.com/", "github.com/", 1)
91
+ confused_forms.append(replaced)
92
+
93
+ terms = package_name.split("-")
94
+
95
+ # Detect swaps like golang-package -> go-package
96
+ for i in range(len(terms)):
97
+ confused_term = None
98
+
99
+ if "golang" in terms[i]:
100
+ confused_term = terms[i].replace("golang", "go")
101
+ elif "go" in terms[i]:
102
+ confused_term = terms[i].replace("go", "golang")
103
+ else:
104
+ continue
105
+
106
+ # Get form when replacing or removing go/golang term
107
+ replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
108
+ removed_form = terms[:i] + terms[i + 1:]
109
+
110
+ for form in (replaced_form, removed_form):
111
+ confused_forms.append("-".join(form))
112
+
113
+ return confused_forms
114
+
115
+
116
+ if __name__ == "__main__":
117
+ # update top_npm_packages.json
118
+ GoTyposquatDetector()._get_top_packages()
@@ -78,6 +78,12 @@ class NPMTyposquatDetector(TyposquatDetector):
78
78
  )
79
79
  return False, None
80
80
 
81
+ def _get_confused_forms(self, package_name) -> list:
82
+ """ Gets confused terms for npm packages.
83
+ Currently, there are no confused terms for npm packages.
84
+ """
85
+ return []
86
+
81
87
 
82
88
  if __name__ == "__main__":
83
89
  # update top_npm_packages.json
@@ -91,6 +91,44 @@ class PypiTyposquatDetector(TyposquatDetector):
91
91
  return True, TyposquatDetector.MESSAGE_TEMPLATE % ", ".join(similar_package_names)
92
92
  return False, None
93
93
 
94
+ def _get_confused_forms(self, package_name) -> list:
95
+ """
96
+ Gets confused terms for python packages
97
+ Confused terms are:
98
+ - py to python swaps (or vice versa)
99
+ - the removal of py/python terms
100
+
101
+ Args:
102
+ package_name (str): name of the package
103
+
104
+ Returns:
105
+ list: list of confused terms
106
+ """
107
+
108
+ confused_forms = []
109
+
110
+ terms = package_name.split("-")
111
+
112
+ # Detect swaps like python-package -> py-package
113
+ for i in range(len(terms)):
114
+ confused_term = None
115
+
116
+ if "python" in terms[i]:
117
+ confused_term = terms[i].replace("python", "py")
118
+ elif "py" in terms[i]:
119
+ confused_term = terms[i].replace("py", "python")
120
+ else:
121
+ continue
122
+
123
+ # Get form when replacing or removing py/python term
124
+ replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
125
+ removed_form = terms[:i] + terms[i + 1:]
126
+
127
+ for form in (replaced_form, removed_form):
128
+ confused_forms.append("-".join(form))
129
+
130
+ return confused_forms
131
+
94
132
 
95
133
  if __name__ == "__main__":
96
134
  # update top_pypi_packages.json