guarddog 2.8.4__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/metadata/go/typosquatting.py +11 -28
- guarddog/analyzer/metadata/npm/direct_url_dependency.py +0 -1
- guarddog/analyzer/metadata/npm/typosquatting.py +24 -59
- guarddog/analyzer/metadata/pypi/typosquatting.py +20 -77
- guarddog/analyzer/metadata/resources/top_go_packages.json +2926 -2923
- guarddog/analyzer/metadata/resources/top_npm_packages.json +8005 -8002
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +15003 -60021
- guarddog/analyzer/metadata/resources/top_rubygems_packages.json +979 -976
- guarddog/analyzer/metadata/rubygems/typosquatting.py +9 -58
- guarddog/analyzer/metadata/typosquatting.py +218 -0
- guarddog/analyzer/sourcecode/screenshot.yml +38 -0
- guarddog/scanners/npm_project_scanner.py +1 -1
- guarddog/scanners/scanner.py +2 -4
- guarddog/utils/archives.py +1 -1
- {guarddog-2.8.4.dist-info → guarddog-2.9.0.dist-info}/METADATA +3 -3
- {guarddog-2.8.4.dist-info → guarddog-2.9.0.dist-info}/RECORD +21 -20
- {guarddog-2.8.4.dist-info → guarddog-2.9.0.dist-info}/WHEEL +1 -1
- {guarddog-2.8.4.dist-info → guarddog-2.9.0.dist-info}/entry_points.txt +0 -0
- {guarddog-2.8.4.dist-info → guarddog-2.9.0.dist-info}/licenses/LICENSE +0 -0
- {guarddog-2.8.4.dist-info → guarddog-2.9.0.dist-info}/licenses/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.8.4.dist-info → guarddog-2.9.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,12 +1,6 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
1
|
from typing import Optional
|
|
5
2
|
|
|
6
3
|
from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
7
|
-
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
8
|
-
|
|
9
|
-
log = logging.getLogger("guarddog")
|
|
10
4
|
|
|
11
5
|
|
|
12
6
|
class GoTyposquatDetector(TyposquatDetector):
|
|
@@ -19,32 +13,21 @@ class GoTyposquatDetector(TyposquatDetector):
|
|
|
19
13
|
"""
|
|
20
14
|
|
|
21
15
|
def _get_top_packages(self) -> set:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
31
|
-
top_packages_information = self._get_top_packages_local(top_packages_path)
|
|
16
|
+
"""
|
|
17
|
+
Gets the top Go packages from local cache.
|
|
18
|
+
Uses the base class implementation without network refresh.
|
|
19
|
+
"""
|
|
20
|
+
packages = self._get_top_packages_with_refresh(
|
|
21
|
+
packages_filename="top_go_packages.json",
|
|
22
|
+
popular_packages_url=None, # No URL = no auto-refresh
|
|
23
|
+
)
|
|
32
24
|
|
|
33
|
-
if
|
|
25
|
+
if not packages:
|
|
34
26
|
raise Exception(
|
|
35
|
-
|
|
27
|
+
"Could not retrieve top Go packages from top_go_packages.json"
|
|
36
28
|
)
|
|
37
29
|
|
|
38
|
-
return
|
|
39
|
-
|
|
40
|
-
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
41
|
-
try:
|
|
42
|
-
with open(path, "r") as f:
|
|
43
|
-
result = json.load(f)
|
|
44
|
-
return result
|
|
45
|
-
except FileNotFoundError:
|
|
46
|
-
log.error(f"File not found: {path}")
|
|
47
|
-
return None
|
|
30
|
+
return packages
|
|
48
31
|
|
|
49
32
|
def detect(
|
|
50
33
|
self,
|
|
@@ -1,14 +1,6 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
from datetime import datetime, timedelta
|
|
5
1
|
from typing import Optional
|
|
6
2
|
|
|
7
3
|
from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
8
|
-
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
9
|
-
import requests
|
|
10
|
-
|
|
11
|
-
log = logging.getLogger("guarddog")
|
|
12
4
|
|
|
13
5
|
|
|
14
6
|
class NPMTyposquatDetector(TyposquatDetector):
|
|
@@ -21,65 +13,38 @@ class NPMTyposquatDetector(TyposquatDetector):
|
|
|
21
13
|
"""
|
|
22
14
|
|
|
23
15
|
def _get_top_packages(self) -> set:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
16
|
+
"""
|
|
17
|
+
Gets the top 8000 most popular NPM packages.
|
|
18
|
+
Uses the base class implementation with NPM-specific parameters.
|
|
19
|
+
"""
|
|
20
|
+
return self._get_top_packages_with_refresh(
|
|
21
|
+
packages_filename="top_npm_packages.json",
|
|
22
|
+
popular_packages_url="https://github.com/LeoDog896/npm-rank/releases/download/latest/raw.json",
|
|
23
|
+
refresh_days=30,
|
|
27
24
|
)
|
|
28
25
|
|
|
29
|
-
|
|
26
|
+
def _extract_package_names(self, data: dict | list | None) -> list | None:
|
|
27
|
+
"""
|
|
28
|
+
Extract package names from NPM data structure.
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
resources_dir = os.path.abspath(
|
|
34
|
-
os.path.join(os.path.dirname(__file__), "..", "resources")
|
|
35
|
-
)
|
|
30
|
+
Network response format: [{"name": "package-name", ...}, ...]
|
|
31
|
+
Local file format: ["package-name", "package-name", ...]
|
|
36
32
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if self._file_is_expired(top_packages_path, days=30):
|
|
41
|
-
new_information = self._get_top_packages_network(popular_packages_url)
|
|
42
|
-
if new_information is not None:
|
|
43
|
-
top_packages_information = new_information
|
|
44
|
-
|
|
45
|
-
with open(top_packages_path, "w+") as f:
|
|
46
|
-
json.dump(new_information, f, ensure_ascii=False, indent=4)
|
|
47
|
-
|
|
48
|
-
if top_packages_information is None:
|
|
49
|
-
return set()
|
|
50
|
-
return set(top_packages_information)
|
|
51
|
-
|
|
52
|
-
def _file_is_expired(self, path: str, days: int) -> bool:
|
|
53
|
-
try:
|
|
54
|
-
update_time = datetime.fromtimestamp(os.path.getmtime(path))
|
|
55
|
-
return datetime.now() - update_time > timedelta(days=days)
|
|
56
|
-
except FileNotFoundError:
|
|
57
|
-
return True
|
|
58
|
-
|
|
59
|
-
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
60
|
-
try:
|
|
61
|
-
with open(path, "r") as f:
|
|
62
|
-
result = json.load(f)
|
|
63
|
-
return result
|
|
64
|
-
except FileNotFoundError:
|
|
65
|
-
log.error(f"File not found: {path}")
|
|
33
|
+
This method handles both formats and limits to top 8000 packages.
|
|
34
|
+
"""
|
|
35
|
+
if data is None:
|
|
66
36
|
return None
|
|
67
37
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
38
|
+
# If data is already a list of strings (local file format)
|
|
39
|
+
if isinstance(data, list) and len(data) > 0:
|
|
40
|
+
if isinstance(data[0], str):
|
|
41
|
+
return data
|
|
72
42
|
|
|
73
|
-
|
|
74
|
-
|
|
43
|
+
# If data is list of dicts (network response format)
|
|
44
|
+
if isinstance(data[0], dict) and "name" in data[0]:
|
|
45
|
+
return [item["name"] for item in data[0:8000]]
|
|
75
46
|
|
|
76
|
-
|
|
77
|
-
except json.JSONDecodeError:
|
|
78
|
-
log.error(f'Couldn`t convert to json: "{response.text}"')
|
|
79
|
-
return None
|
|
80
|
-
except requests.exceptions.RequestException as e:
|
|
81
|
-
log.error(f"Network error: {e}")
|
|
82
|
-
return None
|
|
47
|
+
return None
|
|
83
48
|
|
|
84
49
|
def detect(
|
|
85
50
|
self,
|
|
@@ -1,14 +1,9 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
|
-
import os
|
|
4
|
-
from datetime import datetime, timedelta
|
|
5
2
|
from typing import Optional
|
|
6
3
|
|
|
7
|
-
import requests
|
|
8
4
|
import packaging.utils
|
|
9
5
|
|
|
10
6
|
from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
|
|
11
|
-
from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
|
|
12
7
|
|
|
13
8
|
log = logging.getLogger("guarddog")
|
|
14
9
|
|
|
@@ -25,87 +20,35 @@ class PypiTyposquatDetector(TyposquatDetector):
|
|
|
25
20
|
|
|
26
21
|
def _get_top_packages(self) -> set:
|
|
27
22
|
"""
|
|
28
|
-
Gets the package information of the top 5000 most downloaded PyPI packages
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
set: set of package data in the format:
|
|
32
|
-
{
|
|
33
|
-
...
|
|
34
|
-
{
|
|
35
|
-
download_count: ...
|
|
36
|
-
project: <package-name>
|
|
37
|
-
}
|
|
38
|
-
...
|
|
39
|
-
}
|
|
23
|
+
Gets the package information of the top 5000 most downloaded PyPI packages.
|
|
24
|
+
Uses the base class implementation with PyPI-specific parameters.
|
|
40
25
|
"""
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
"https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
|
|
26
|
+
packages = self._get_top_packages_with_refresh(
|
|
27
|
+
packages_filename="top_pypi_packages.json",
|
|
28
|
+
popular_packages_url="https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json",
|
|
29
|
+
refresh_days=30,
|
|
44
30
|
)
|
|
45
31
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if resources_dir is None:
|
|
49
|
-
resources_dir = os.path.abspath(
|
|
50
|
-
os.path.join(os.path.dirname(__file__), "..", "resources")
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
top_packages_path = os.path.join(resources_dir, top_packages_filename)
|
|
54
|
-
top_packages_information = self._get_top_packages_local(top_packages_path)
|
|
55
|
-
|
|
56
|
-
if self._file_is_expired(top_packages_path, days=30):
|
|
57
|
-
new_information = self._get_top_packages_network(popular_packages_url)
|
|
58
|
-
if new_information is not None:
|
|
59
|
-
top_packages_information = new_information
|
|
60
|
-
|
|
61
|
-
with open(top_packages_path, "w+") as f:
|
|
62
|
-
json.dump(new_information, f, ensure_ascii=False, indent=4)
|
|
63
|
-
|
|
64
|
-
if top_packages_information is None:
|
|
65
|
-
return set()
|
|
66
|
-
return set(map(self.get_safe_name, top_packages_information))
|
|
32
|
+
# Apply canonicalization to PyPI package names
|
|
33
|
+
return set(map(self._canonicalize_name, packages))
|
|
67
34
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
update_time = datetime.fromtimestamp(os.path.getmtime(path))
|
|
75
|
-
return datetime.now() - update_time > timedelta(days=days)
|
|
76
|
-
except FileNotFoundError:
|
|
77
|
-
return True
|
|
78
|
-
|
|
79
|
-
def _get_top_packages_local(self, path: str) -> list[dict] | None:
|
|
80
|
-
try:
|
|
81
|
-
with open(path, "r") as f:
|
|
82
|
-
result = json.load(f)
|
|
83
|
-
return self.extract_information(result)
|
|
84
|
-
except FileNotFoundError:
|
|
85
|
-
log.error(f"File not found: {path}")
|
|
35
|
+
def _extract_package_names(self, data: dict | list | None) -> list | None:
|
|
36
|
+
"""
|
|
37
|
+
Extract package names from PyPI data structure.
|
|
38
|
+
PyPI data has format: {"rows": [{"project": "name", "download_count": ...}, ...]}
|
|
39
|
+
"""
|
|
40
|
+
if data is None:
|
|
86
41
|
return None
|
|
87
42
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
response = requests.get(url)
|
|
91
|
-
response.raise_for_status()
|
|
92
|
-
|
|
93
|
-
response_data = response.json()
|
|
94
|
-
result = response_data
|
|
43
|
+
if isinstance(data, dict) and "rows" in data:
|
|
44
|
+
return [row["project"] for row in data["rows"]]
|
|
95
45
|
|
|
96
|
-
|
|
97
|
-
except json.JSONDecodeError:
|
|
98
|
-
log.error(f'Couldn`t convert to json: "{response.text}"')
|
|
99
|
-
return None
|
|
100
|
-
except requests.exceptions.RequestException as e:
|
|
101
|
-
log.error(f"Network error: {e}")
|
|
102
|
-
return None
|
|
46
|
+
return None
|
|
103
47
|
|
|
104
48
|
@staticmethod
|
|
105
|
-
def
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
return None
|
|
49
|
+
def _canonicalize_name(package_name: str) -> str:
|
|
50
|
+
"""Canonicalize PyPI package names according to PEP 503."""
|
|
51
|
+
return packaging.utils.canonicalize_name(package_name)
|
|
109
52
|
|
|
110
53
|
def detect(
|
|
111
54
|
self,
|