guarddog 2.7.0__py3-none-any.whl → 2.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/metadata/go/typosquatting.py +13 -6
- guarddog/analyzer/metadata/npm/typosquatting.py +43 -12
- guarddog/analyzer/metadata/pypi/typosquatting.py +51 -17
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +29998 -29986
- guarddog/analyzer/metadata/utils.py +1 -1
- guarddog/analyzer/sourcecode/shady-links.yml +1 -1
- guarddog/utils/archives.py +133 -9
- guarddog/utils/config.py +24 -2
- {guarddog-2.7.0.dist-info → guarddog-2.7.1.dist-info}/METADATA +2 -2
- {guarddog-2.7.0.dist-info → guarddog-2.7.1.dist-info}/RECORD +15 -15
- {guarddog-2.7.0.dist-info → guarddog-2.7.1.dist-info}/WHEEL +0 -0
- {guarddog-2.7.0.dist-info → guarddog-2.7.1.dist-info}/entry_points.txt +0 -0
- {guarddog-2.7.0.dist-info → guarddog-2.7.1.dist-info}/licenses/LICENSE +0 -0
- {guarddog-2.7.0.dist-info → guarddog-2.7.1.dist-info}/licenses/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.7.0.dist-info → guarddog-2.7.1.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
6
|
from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
6
7
|
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
7
8
|
|
|
9
|
+
log = logging.getLogger("guarddog")
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class GoTyposquatDetector(TyposquatDetector):
|
|
10
13
|
"""Detector for typosquatting attacks for go modules. Checks for distance one Levenshtein,
|
|
@@ -25,12 +28,7 @@ class GoTyposquatDetector(TyposquatDetector):
|
|
|
25
28
|
)
|
|
26
29
|
|
|
27
30
|
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
28
|
-
|
|
29
|
-
top_packages_information = None
|
|
30
|
-
|
|
31
|
-
if top_packages_filename in os.listdir(resources_dir):
|
|
32
|
-
with open(top_packages_path, "r") as top_packages_file:
|
|
33
|
-
top_packages_information = json.load(top_packages_file)
|
|
31
|
+
top_packages_information = self._get_top_packages_local(top_packages_path)
|
|
34
32
|
|
|
35
33
|
if top_packages_information is None:
|
|
36
34
|
raise Exception(
|
|
@@ -39,6 +37,15 @@ class GoTyposquatDetector(TyposquatDetector):
|
|
|
39
37
|
|
|
40
38
|
return set(top_packages_information)
|
|
41
39
|
|
|
40
|
+
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
41
|
+
try:
|
|
42
|
+
with open(path, "r") as f:
|
|
43
|
+
result = json.load(f)
|
|
44
|
+
return result
|
|
45
|
+
except FileNotFoundError:
|
|
46
|
+
log.error(f"File not found: {path}")
|
|
47
|
+
return None
|
|
48
|
+
|
|
42
49
|
def detect(
|
|
43
50
|
self,
|
|
44
51
|
package_info,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
3
4
|
from datetime import datetime, timedelta
|
|
4
5
|
from typing import Optional
|
|
@@ -7,6 +8,8 @@ from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
|
7
8
|
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
8
9
|
import requests
|
|
9
10
|
|
|
11
|
+
log = logging.getLogger("guarddog")
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
class NPMTyposquatDetector(TyposquatDetector):
|
|
12
15
|
"""Detector for typosquatting attacks. Detects if a package name is a typosquat of one of the top 5000 packages.
|
|
@@ -32,24 +35,52 @@ class NPMTyposquatDetector(TyposquatDetector):
|
|
|
32
35
|
)
|
|
33
36
|
|
|
34
37
|
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
38
|
+
top_packages_information = self._get_top_packages_local(top_packages_path)
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
if self._file_is_expired(top_packages_path, days=30):
|
|
41
|
+
new_information = self._get_top_packages_network(popular_packages_url)
|
|
42
|
+
if new_information is not None:
|
|
43
|
+
top_packages_information = new_information
|
|
40
44
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
top_packages_information = json.load(top_packages_file)
|
|
45
|
+
with open(top_packages_path, "w+") as f:
|
|
46
|
+
json.dump(new_information, f, ensure_ascii=False, indent=4)
|
|
44
47
|
|
|
45
48
|
if top_packages_information is None:
|
|
46
|
-
|
|
47
|
-
top_packages_information = list([i["name"] for i in response[0:8000]])
|
|
48
|
-
with open(top_packages_path, "w+") as f:
|
|
49
|
-
json.dump(top_packages_information, f, ensure_ascii=False, indent=4)
|
|
50
|
-
|
|
49
|
+
return set()
|
|
51
50
|
return set(top_packages_information)
|
|
52
51
|
|
|
52
|
+
def _file_is_expired(self, path: str, days: int) -> bool:
|
|
53
|
+
try:
|
|
54
|
+
update_time = datetime.fromtimestamp(os.path.getmtime(path))
|
|
55
|
+
return datetime.now() - update_time > timedelta(days=days)
|
|
56
|
+
except FileNotFoundError:
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
60
|
+
try:
|
|
61
|
+
with open(path, "r") as f:
|
|
62
|
+
result = json.load(f)
|
|
63
|
+
return result
|
|
64
|
+
except FileNotFoundError:
|
|
65
|
+
log.error(f"File not found: {path}")
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def _get_top_packages_network(self, url: str) -> list[dict] | None:
|
|
69
|
+
try:
|
|
70
|
+
response = requests.get(url)
|
|
71
|
+
response.raise_for_status()
|
|
72
|
+
|
|
73
|
+
response_data = response.json()
|
|
74
|
+
result = list([i["name"] for i in response_data[0:8000]])
|
|
75
|
+
|
|
76
|
+
return result
|
|
77
|
+
except json.JSONDecodeError:
|
|
78
|
+
log.error(f'Couldn`t convert to json: "{response.text}"')
|
|
79
|
+
return None
|
|
80
|
+
except requests.exceptions.RequestException as e:
|
|
81
|
+
log.error(f"Network error: {e}")
|
|
82
|
+
return None
|
|
83
|
+
|
|
53
84
|
def detect(
|
|
54
85
|
self,
|
|
55
86
|
package_info,
|
|
@@ -51,27 +51,61 @@ class PypiTyposquatDetector(TyposquatDetector):
|
|
|
51
51
|
)
|
|
52
52
|
|
|
53
53
|
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
54
|
+
top_packages_information = self._get_top_packages_local(top_packages_path)
|
|
54
55
|
|
|
55
|
-
|
|
56
|
+
if self._file_is_expired(top_packages_path, days=30):
|
|
57
|
+
new_information = self._get_top_packages_network(popular_packages_url)
|
|
58
|
+
if new_information is not None:
|
|
59
|
+
top_packages_information = new_information
|
|
56
60
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if datetime.now() - update_time <= timedelta(days=30):
|
|
61
|
-
with open(top_packages_path, "r") as top_packages_file:
|
|
62
|
-
top_packages_information = json.load(top_packages_file)["rows"]
|
|
61
|
+
with open(top_packages_path, "w+") as f:
|
|
62
|
+
json.dump(new_information, f, ensure_ascii=False, indent=4)
|
|
63
63
|
|
|
64
64
|
if top_packages_information is None:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
65
|
+
return set()
|
|
66
|
+
return set(map(self.get_safe_name, top_packages_information))
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def get_safe_name(package):
|
|
70
|
+
return packaging.utils.canonicalize_name(package["project"])
|
|
71
|
+
|
|
72
|
+
def _file_is_expired(self, path: str, days: int) -> bool:
|
|
73
|
+
try:
|
|
74
|
+
update_time = datetime.fromtimestamp(os.path.getmtime(path))
|
|
75
|
+
return datetime.now() - update_time > timedelta(days=days)
|
|
76
|
+
except FileNotFoundError:
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
80
|
+
try:
|
|
81
|
+
with open(path, "r") as f:
|
|
82
|
+
result = json.load(f)
|
|
83
|
+
return self.extract_information(result)
|
|
84
|
+
except FileNotFoundError:
|
|
85
|
+
log.error(f"File not found: {path}")
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
def _get_top_packages_network(self, url: str) -> list[dict] | None:
|
|
89
|
+
try:
|
|
90
|
+
response = requests.get(url)
|
|
91
|
+
response.raise_for_status()
|
|
92
|
+
|
|
93
|
+
response_data = response.json()
|
|
94
|
+
result = response_data
|
|
95
|
+
|
|
96
|
+
return self.extract_information(result)
|
|
97
|
+
except json.JSONDecodeError:
|
|
98
|
+
log.error(f'Couldn`t convert to json: "{response.text}"')
|
|
99
|
+
return None
|
|
100
|
+
except requests.exceptions.RequestException as e:
|
|
101
|
+
log.error(f"Network error: {e}")
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def extract_information(data: dict | None) -> list[dict] | None:
|
|
106
|
+
if data is not None:
|
|
107
|
+
return data.get("rows")
|
|
108
|
+
return None
|
|
75
109
|
|
|
76
110
|
def detect(
|
|
77
111
|
self,
|