guarddog 2.8.4__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,6 @@
1
- import json
2
- import logging
3
- import os
4
1
  from typing import Optional
5
2
 
6
3
  from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
7
- from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
8
-
9
- log = logging.getLogger("guarddog")
10
4
 
11
5
 
12
6
  class GoTyposquatDetector(TyposquatDetector):
@@ -19,32 +13,21 @@ class GoTyposquatDetector(TyposquatDetector):
19
13
  """
20
14
 
21
15
  def _get_top_packages(self) -> set:
22
- top_packages_filename = "top_go_packages.json"
23
-
24
- resources_dir = TOP_PACKAGES_CACHE_LOCATION
25
- if resources_dir is None:
26
- resources_dir = os.path.abspath(
27
- os.path.join(os.path.dirname(__file__), "..", "resources")
28
- )
29
-
30
- top_packages_path = os.path.join(resources_dir, top_packages_filename)
31
- top_packages_information = self._get_top_packages_local(top_packages_path)
16
+ """
17
+ Gets the top Go packages from local cache.
18
+ Uses the base class implementation without network refresh.
19
+ """
20
+ packages = self._get_top_packages_with_refresh(
21
+ packages_filename="top_go_packages.json",
22
+ popular_packages_url=None, # No URL = no auto-refresh
23
+ )
32
24
 
33
- if top_packages_information is None:
25
+ if not packages:
34
26
  raise Exception(
35
- f"Could not retrieve top Go packages from {top_packages_path}"
27
+ "Could not retrieve top Go packages from top_go_packages.json"
36
28
  )
37
29
 
38
- return set(top_packages_information)
39
-
40
- def _get_top_packages_local(self, path: str) -> list[dict] | None:
41
- try:
42
- with open(path, "r") as f:
43
- result = json.load(f)
44
- return result
45
- except FileNotFoundError:
46
- log.error(f"File not found: {path}")
47
- return None
30
+ return packages
48
31
 
49
32
  def detect(
50
33
  self,
@@ -10,7 +10,6 @@ from guarddog.analyzer.metadata.detector import Detector
10
10
 
11
11
  from urllib.parse import urlparse
12
12
 
13
-
14
13
  github_project_pattern = re.compile(r"^([\w\-\.]+)/([\w\-\.]+)")
15
14
 
16
15
 
@@ -1,14 +1,6 @@
1
- import json
2
- import logging
3
- import os
4
- from datetime import datetime, timedelta
5
1
  from typing import Optional
6
2
 
7
3
  from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
8
- from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
9
- import requests
10
-
11
- log = logging.getLogger("guarddog")
12
4
 
13
5
 
14
6
  class NPMTyposquatDetector(TyposquatDetector):
@@ -21,65 +13,38 @@ class NPMTyposquatDetector(TyposquatDetector):
21
13
  """
22
14
 
23
15
  def _get_top_packages(self) -> set:
24
-
25
- popular_packages_url = (
26
- "https://github.com/LeoDog896/npm-rank/releases/download/latest/raw.json"
16
+ """
17
+ Gets the top 8000 most popular NPM packages.
18
+ Uses the base class implementation with NPM-specific parameters.
19
+ """
20
+ return self._get_top_packages_with_refresh(
21
+ packages_filename="top_npm_packages.json",
22
+ popular_packages_url="https://github.com/LeoDog896/npm-rank/releases/download/latest/raw.json",
23
+ refresh_days=30,
27
24
  )
28
25
 
29
- top_packages_filename = "top_npm_packages.json"
26
+ def _extract_package_names(self, data: dict | list | None) -> list | None:
27
+ """
28
+ Extract package names from NPM data structure.
30
29
 
31
- resources_dir = TOP_PACKAGES_CACHE_LOCATION
32
- if resources_dir is None:
33
- resources_dir = os.path.abspath(
34
- os.path.join(os.path.dirname(__file__), "..", "resources")
35
- )
30
+ Network response format: [{"name": "package-name", ...}, ...]
31
+ Local file format: ["package-name", "package-name", ...]
36
32
 
37
- top_packages_path = os.path.join(resources_dir, top_packages_filename)
38
- top_packages_information = self._get_top_packages_local(top_packages_path)
39
-
40
- if self._file_is_expired(top_packages_path, days=30):
41
- new_information = self._get_top_packages_network(popular_packages_url)
42
- if new_information is not None:
43
- top_packages_information = new_information
44
-
45
- with open(top_packages_path, "w+") as f:
46
- json.dump(new_information, f, ensure_ascii=False, indent=4)
47
-
48
- if top_packages_information is None:
49
- return set()
50
- return set(top_packages_information)
51
-
52
- def _file_is_expired(self, path: str, days: int) -> bool:
53
- try:
54
- update_time = datetime.fromtimestamp(os.path.getmtime(path))
55
- return datetime.now() - update_time > timedelta(days=days)
56
- except FileNotFoundError:
57
- return True
58
-
59
- def _get_top_packages_local(self, path: str) -> list[dict] | None:
60
- try:
61
- with open(path, "r") as f:
62
- result = json.load(f)
63
- return result
64
- except FileNotFoundError:
65
- log.error(f"File not found: {path}")
33
+ This method handles both formats and limits to top 8000 packages.
34
+ """
35
+ if data is None:
66
36
  return None
67
37
 
68
- def _get_top_packages_network(self, url: str) -> list[dict] | None:
69
- try:
70
- response = requests.get(url)
71
- response.raise_for_status()
38
+ # If data is already a list of strings (local file format)
39
+ if isinstance(data, list) and len(data) > 0:
40
+ if isinstance(data[0], str):
41
+ return data
72
42
 
73
- response_data = response.json()
74
- result = list([i["name"] for i in response_data[0:8000]])
43
+ # If data is list of dicts (network response format)
44
+ if isinstance(data[0], dict) and "name" in data[0]:
45
+ return [item["name"] for item in data[0:8000]]
75
46
 
76
- return result
77
- except json.JSONDecodeError:
78
- log.error(f'Couldn`t convert to json: "{response.text}"')
79
- return None
80
- except requests.exceptions.RequestException as e:
81
- log.error(f"Network error: {e}")
82
- return None
47
+ return None
83
48
 
84
49
  def detect(
85
50
  self,
@@ -1,14 +1,9 @@
1
- import json
2
1
  import logging
3
- import os
4
- from datetime import datetime, timedelta
5
2
  from typing import Optional
6
3
 
7
- import requests
8
4
  import packaging.utils
9
5
 
10
6
  from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
11
- from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
12
7
 
13
8
  log = logging.getLogger("guarddog")
14
9
 
@@ -25,87 +20,35 @@ class PypiTyposquatDetector(TyposquatDetector):
25
20
 
26
21
  def _get_top_packages(self) -> set:
27
22
  """
28
- Gets the package information of the top 5000 most downloaded PyPI packages
29
-
30
- Returns:
31
- set: set of package data in the format:
32
- {
33
- ...
34
- {
35
- download_count: ...
36
- project: <package-name>
37
- }
38
- ...
39
- }
23
+ Gets the package information of the top 5000 most downloaded PyPI packages.
24
+ Uses the base class implementation with PyPI-specific parameters.
40
25
  """
41
-
42
- popular_packages_url = (
43
- "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
26
+ packages = self._get_top_packages_with_refresh(
27
+ packages_filename="top_pypi_packages.json",
28
+ popular_packages_url="https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json",
29
+ refresh_days=30,
44
30
  )
45
31
 
46
- top_packages_filename = "top_pypi_packages.json"
47
- resources_dir = TOP_PACKAGES_CACHE_LOCATION
48
- if resources_dir is None:
49
- resources_dir = os.path.abspath(
50
- os.path.join(os.path.dirname(__file__), "..", "resources")
51
- )
52
-
53
- top_packages_path = os.path.join(resources_dir, top_packages_filename)
54
- top_packages_information = self._get_top_packages_local(top_packages_path)
55
-
56
- if self._file_is_expired(top_packages_path, days=30):
57
- new_information = self._get_top_packages_network(popular_packages_url)
58
- if new_information is not None:
59
- top_packages_information = new_information
60
-
61
- with open(top_packages_path, "w+") as f:
62
- json.dump(new_information, f, ensure_ascii=False, indent=4)
63
-
64
- if top_packages_information is None:
65
- return set()
66
- return set(map(self.get_safe_name, top_packages_information))
32
+ # Apply canonicalization to PyPI package names
33
+ return set(map(self._canonicalize_name, packages))
67
34
 
68
- @staticmethod
69
- def get_safe_name(package):
70
- return packaging.utils.canonicalize_name(package["project"])
71
-
72
- def _file_is_expired(self, path: str, days: int) -> bool:
73
- try:
74
- update_time = datetime.fromtimestamp(os.path.getmtime(path))
75
- return datetime.now() - update_time > timedelta(days=days)
76
- except FileNotFoundError:
77
- return True
78
-
79
- def _get_top_packages_local(self, path: str) -> list[dict] | None:
80
- try:
81
- with open(path, "r") as f:
82
- result = json.load(f)
83
- return self.extract_information(result)
84
- except FileNotFoundError:
85
- log.error(f"File not found: {path}")
35
+ def _extract_package_names(self, data: dict | list | None) -> list | None:
36
+ """
37
+ Extract package names from PyPI data structure.
38
+ PyPI data has format: {"rows": [{"project": "name", "download_count": ...}, ...]}
39
+ """
40
+ if data is None:
86
41
  return None
87
42
 
88
- def _get_top_packages_network(self, url: str) -> list[dict] | None:
89
- try:
90
- response = requests.get(url)
91
- response.raise_for_status()
92
-
93
- response_data = response.json()
94
- result = response_data
43
+ if isinstance(data, dict) and "rows" in data:
44
+ return [row["project"] for row in data["rows"]]
95
45
 
96
- return self.extract_information(result)
97
- except json.JSONDecodeError:
98
- log.error(f'Couldn`t convert to json: "{response.text}"')
99
- return None
100
- except requests.exceptions.RequestException as e:
101
- log.error(f"Network error: {e}")
102
- return None
46
+ return None
103
47
 
104
48
  @staticmethod
105
- def extract_information(data: dict | None) -> list[dict] | None:
106
- if data is not None:
107
- return data.get("rows")
108
- return None
49
+ def _canonicalize_name(package_name: str) -> str:
50
+ """Canonicalize PyPI package names according to PEP 503."""
51
+ return packaging.utils.canonicalize_name(package_name)
109
52
 
110
53
  def detect(
111
54
  self,