guarddog 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. guarddog/analyzer/analyzer.py +51 -23
  2. guarddog/analyzer/metadata/__init__.py +2 -0
  3. guarddog/analyzer/metadata/bundled_binary.py +6 -6
  4. guarddog/analyzer/metadata/deceptive_author.py +3 -1
  5. guarddog/analyzer/metadata/detector.py +7 -2
  6. guarddog/analyzer/metadata/empty_information.py +8 -3
  7. guarddog/analyzer/metadata/go/typosquatting.py +17 -9
  8. guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
  9. guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
  10. guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
  11. guarddog/analyzer/metadata/npm/empty_information.py +10 -7
  12. guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
  13. guarddog/analyzer/metadata/npm/release_zero.py +13 -5
  14. guarddog/analyzer/metadata/npm/typosquatting.py +44 -13
  15. guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
  16. guarddog/analyzer/metadata/npm/utils.py +4 -5
  17. guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
  18. guarddog/analyzer/metadata/pypi/__init__.py +12 -6
  19. guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
  20. guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
  21. guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
  22. guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
  23. guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
  24. guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
  25. guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
  26. guarddog/analyzer/metadata/pypi/typosquatting.py +73 -26
  27. guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
  28. guarddog/analyzer/metadata/pypi/utils.py +1 -4
  29. guarddog/analyzer/metadata/release_zero.py +1 -1
  30. guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
  31. guarddog/analyzer/metadata/resources/top_pypi_packages.json +29998 -29986
  32. guarddog/analyzer/metadata/typosquatting.py +12 -8
  33. guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
  34. guarddog/analyzer/metadata/utils.py +1 -1
  35. guarddog/analyzer/sourcecode/__init__.py +34 -7
  36. guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
  37. guarddog/analyzer/sourcecode/code-execution.yml +1 -0
  38. guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
  39. guarddog/analyzer/sourcecode/shady-links.yml +1 -1
  40. guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
  41. guarddog/analyzer/sourcecode/unicode.yml +75 -0
  42. guarddog/ecosystems.py +3 -0
  43. guarddog/scanners/__init__.py +5 -0
  44. guarddog/scanners/extension_scanner.py +152 -0
  45. guarddog/scanners/github_action_scanner.py +6 -2
  46. guarddog/scanners/go_project_scanner.py +1 -1
  47. guarddog/scanners/npm_package_scanner.py +12 -4
  48. guarddog/scanners/pypi_package_scanner.py +9 -3
  49. guarddog/scanners/pypi_project_scanner.py +9 -12
  50. guarddog/scanners/scanner.py +1 -0
  51. guarddog/utils/archives.py +134 -9
  52. guarddog/utils/config.py +24 -2
  53. guarddog/utils/package_info.py +3 -1
  54. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/METADATA +10 -9
  55. guarddog-2.7.1.dist-info/RECORD +100 -0
  56. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/WHEEL +1 -1
  57. guarddog-2.6.0.dist-info/RECORD +0 -96
  58. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/entry_points.txt +0 -0
  59. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE +0 -0
  60. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
  61. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/NOTICE +0 -0
@@ -16,9 +16,7 @@ from guarddog.ecosystems import ECOSYSTEM
16
16
  MAX_BYTES_DEFAULT = 10_000_000
17
17
  SEMGREP_TIMEOUT_DEFAULT = 10
18
18
 
19
- SOURCECODE_RULES_PATH = os.path.join(
20
- os.path.dirname(__file__), "sourcecode"
21
- )
19
+ SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
22
20
  log = logging.getLogger("guarddog")
23
21
 
24
22
 
@@ -67,7 +65,14 @@ class Analyzer:
67
65
  ".semgrep_logs",
68
66
  ]
69
67
 
70
- def analyze(self, path, info=None, rules=None, name: Optional[str] = None, version: Optional[str] = None) -> dict:
68
+ def analyze(
69
+ self,
70
+ path,
71
+ info=None,
72
+ rules=None,
73
+ name: Optional[str] = None,
74
+ version: Optional[str] = None,
75
+ ) -> dict:
71
76
  """
72
77
  Analyzes a package in the given path
73
78
 
@@ -97,8 +102,14 @@ class Analyzer:
97
102
 
98
103
  return {"issues": issues, "errors": errors, "results": results, "path": path}
99
104
 
100
- def analyze_metadata(self, path: str, info, rules=None, name: Optional[str] = None,
101
- version: Optional[str] = None) -> dict:
105
+ def analyze_metadata(
106
+ self,
107
+ path: str,
108
+ info,
109
+ rules=None,
110
+ name: Optional[str] = None,
111
+ version: Optional[str] = None,
112
+ ) -> dict:
102
113
  """
103
114
  Analyzes the metadata of a given package
104
115
 
@@ -127,7 +138,9 @@ class Analyzer:
127
138
  for rule in all_rules:
128
139
  try:
129
140
  log.debug(f"Running rule {rule} against package '{name}'")
130
- rule_matches, message = self.metadata_detectors[rule].detect(info, path, name, version)
141
+ rule_matches, message = self.metadata_detectors[rule].detect(
142
+ info, path, name, version
143
+ )
131
144
  results[rule] = None
132
145
  if rule_matches:
133
146
  issues += 1
@@ -202,16 +215,21 @@ class Analyzer:
202
215
  continue
203
216
 
204
217
  scan_file_target_abspath = os.path.join(root, f)
205
- scan_file_target_relpath = os.path.relpath(scan_file_target_abspath, path)
218
+ scan_file_target_relpath = os.path.relpath(
219
+ scan_file_target_abspath, path
220
+ )
206
221
 
207
222
  matches = scan_rules.match(scan_file_target_abspath)
208
223
  for m in matches:
224
+
209
225
  for s in m.strings:
210
226
  for i in s.instances:
211
227
  finding = {
212
228
  "location": f"{scan_file_target_relpath}:{i.offset}",
213
229
  "code": self.trim_code_snippet(str(i.matched_data)),
214
- 'message': m.meta.get("description", f"{m.rule} rule matched")
230
+ "message": m.meta.get(
231
+ "description", f"{m.rule} rule matched"
232
+ ),
215
233
  }
216
234
 
217
235
  # since yara can match the multiple times in the same file
@@ -254,10 +272,14 @@ class Analyzer:
254
272
  errors = {}
255
273
  issues = 0
256
274
 
257
- rules_path = list(map(
258
- lambda rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yml"),
259
- all_rules
260
- ))
275
+ rules_path = list(
276
+ map(
277
+ lambda rule_name: os.path.join(
278
+ SOURCECODE_RULES_PATH, f"{rule_name}.yml"
279
+ ),
280
+ all_rules,
281
+ )
282
+ )
261
283
 
262
284
  if len(rules_path) == 0:
263
285
  log.debug("No semgrep code rules to run")
@@ -266,7 +288,9 @@ class Analyzer:
266
288
  try:
267
289
  log.debug(f"Running semgrep code rules against {path}")
268
290
  response = self._invoke_semgrep(target=path, rules=rules_path)
269
- rule_results = self._format_semgrep_response(response, targetpath=targetpath)
291
+ rule_results = self._format_semgrep_response(
292
+ response, targetpath=targetpath
293
+ )
270
294
  issues += sum(len(res) for res in rule_results.values())
271
295
 
272
296
  results = results | rule_results
@@ -278,9 +302,11 @@ class Analyzer:
278
302
  def _invoke_semgrep(self, target: str, rules: Iterable[str]):
279
303
  try:
280
304
  SEMGREP_MAX_TARGET_BYTES = int(
281
- os.getenv("GUARDDOG_SEMGREP_MAX_TARGET_BYTES", MAX_BYTES_DEFAULT))
305
+ os.getenv("GUARDDOG_SEMGREP_MAX_TARGET_BYTES", MAX_BYTES_DEFAULT)
306
+ )
282
307
  SEMGREP_TIMEOUT = int(
283
- os.getenv("GUARDDOG_SEMGREP_TIMEOUT", SEMGREP_TIMEOUT_DEFAULT))
308
+ os.getenv("GUARDDOG_SEMGREP_TIMEOUT", SEMGREP_TIMEOUT_DEFAULT)
309
+ )
284
310
  cmd = ["semgrep"]
285
311
  for rule in rules:
286
312
  cmd.extend(["--config", rule])
@@ -295,7 +321,9 @@ class Analyzer:
295
321
  cmd.append(f"--max-target-bytes={SEMGREP_MAX_TARGET_BYTES}")
296
322
  cmd.append(target)
297
323
  log.debug(f"Invoking semgrep with command line: {' '.join(cmd)}")
298
- result = subprocess.run(cmd, capture_output=True, check=True, encoding="utf-8")
324
+ result = subprocess.run(
325
+ cmd, capture_output=True, check=True, encoding="utf-8"
326
+ )
299
327
  return json.loads(str(result.stdout))
300
328
  except FileNotFoundError:
301
329
  raise Exception("unable to find semgrep binary")
@@ -358,9 +386,9 @@ output: {e.output}
358
386
  location = file_path + ":" + str(start_line)
359
387
 
360
388
  finding = {
361
- 'location': location,
362
- 'code': code,
363
- 'message': result["extra"]["message"]
389
+ "location": location,
390
+ "code": code,
391
+ "message": result["extra"]["message"],
364
392
  }
365
393
 
366
394
  rule_results = results[rule_name]
@@ -384,7 +412,7 @@ output: {e.output}
384
412
  """
385
413
  snippet = []
386
414
  try:
387
- with open(file_path, 'r') as file:
415
+ with open(file_path, "r") as file:
388
416
  for current_line_number, line in enumerate(file, start=1):
389
417
  if start_line <= current_line_number <= end_line:
390
418
  snippet.append(line)
@@ -395,12 +423,12 @@ output: {e.output}
395
423
  except Exception as e:
396
424
  log.error(f"Error reading file {file_path}: {str(e)}")
397
425
 
398
- return ''.join(snippet)
426
+ return "".join(snippet)
399
427
 
400
428
  # Makes sure the matching code to be displayed isn't too long
401
429
  def trim_code_snippet(self, code):
402
430
  THRESHOLD = 250
403
431
  if len(code) > THRESHOLD:
404
- return code[: THRESHOLD - 10] + '...' + code[len(code) - 10:]
432
+ return code[: THRESHOLD - 10] + "..." + code[len(code) - 10 :]
405
433
  else:
406
434
  return code
@@ -16,3 +16,5 @@ def get_metadata_detectors(ecosystem: ECOSYSTEM) -> dict[str, Detector]:
16
16
  return GO_METADATA_RULES
17
17
  case ECOSYSTEM.GITHUB_ACTION:
18
18
  return GITHUB_ACTION_METADATA_RULES
19
+ case ECOSYSTEM.EXTENSION:
20
+ return {} # No metadata detectors for extensions currently
@@ -15,10 +15,10 @@ class BundledBinary(Detector):
15
15
  # magic bytes are the first few bytes of a file that can be used to identify the file type
16
16
  # regardless of their extension
17
17
  magic_bytes = {
18
- "exe": b"\x4D\x5A",
19
- "elf": b"\x7F\x45\x4C\x46",
20
- "macho32": b"\xFE\xED\xFA\xCE",
21
- "macho64": b"\xFE\xED\xFA\xCF",
18
+ "exe": b"\x4d\x5a",
19
+ "elf": b"\x7f\x45\x4c\x46",
20
+ "macho32": b"\xfe\xed\xfa\xce",
21
+ "macho64": b"\xfe\xed\xfa\xcf",
22
22
  }
23
23
 
24
24
  def __init__(self):
@@ -40,7 +40,7 @@ class BundledBinary(Detector):
40
40
  def sha256(file: str) -> str:
41
41
  with open(file, "rb") as f:
42
42
  hasher = hashlib.sha256()
43
- while (chunk := f.read(4096)):
43
+ while chunk := f.read(4096):
44
44
  hasher.update(chunk)
45
45
  return hasher.hexdigest()
46
46
 
@@ -65,7 +65,7 @@ class BundledBinary(Detector):
65
65
  if not bin_files:
66
66
  return False, ""
67
67
 
68
- output_lines = '\n'.join(
68
+ output_lines = "\n".join(
69
69
  f"{digest}: {', '.join(files)}" for digest, files in bin_files.items()
70
70
  )
71
71
  return True, f"Binary file/s detected in package:\n{output_lines}"
@@ -41,7 +41,9 @@ class DeceptiveAuthorDetector(Detector):
41
41
  # read internal maintained list of placeholder email domains
42
42
  # this domains are usually used by authors who want to don't want to reveal their real email
43
43
  placeholder_email_domains_data = None
44
- with open(placeholder_email_domains_path, "r") as placeholder_email_domains_file:
44
+ with open(
45
+ placeholder_email_domains_path, "r"
46
+ ) as placeholder_email_domains_file:
45
47
  placeholder_email_domains_data = set(
46
48
  placeholder_email_domains_file.read().split("\n")
47
49
  )
@@ -11,8 +11,13 @@ class Detector:
11
11
 
12
12
  # returns (ruleMatches, message)
13
13
  @abstractmethod
14
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
15
- version: Optional[str] = None) -> tuple[bool, Optional[str]]:
14
+ def detect(
15
+ self,
16
+ package_info,
17
+ path: Optional[str] = None,
18
+ name: Optional[str] = None,
19
+ version: Optional[str] = None,
20
+ ) -> tuple[bool, Optional[str]]:
16
21
  pass # pragma: no cover
17
22
 
18
23
  def get_name(self) -> str:
@@ -15,12 +15,17 @@ class EmptyInfoDetector(Detector):
15
15
  def __init__(self):
16
16
  super().__init__(
17
17
  name="empty_information",
18
- description="Identify packages with an empty description field"
18
+ description="Identify packages with an empty description field",
19
19
  )
20
20
 
21
21
  @abstractmethod
22
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
23
- version: Optional[str] = None) -> tuple[bool, str]:
22
+ def detect(
23
+ self,
24
+ package_info,
25
+ path: Optional[str] = None,
26
+ name: Optional[str] = None,
27
+ version: Optional[str] = None,
28
+ ) -> tuple[bool, str]:
24
29
  """
25
30
  Uses a package's information from PyPI's JSON API to determine
26
31
  if the package has an empty description
@@ -1,10 +1,13 @@
1
1
  import json
2
+ import logging
2
3
  import os
3
4
  from typing import Optional
4
5
 
5
6
  from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
6
7
  from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
7
8
 
9
+ log = logging.getLogger("guarddog")
10
+
8
11
 
9
12
  class GoTyposquatDetector(TyposquatDetector):
10
13
  """Detector for typosquatting attacks for go modules. Checks for distance one Levenshtein,
@@ -25,19 +28,24 @@ class GoTyposquatDetector(TyposquatDetector):
25
28
  )
26
29
 
27
30
  top_packages_path = os.path.join(resources_dir, top_packages_filename)
28
-
29
- top_packages_information = None
30
-
31
- if top_packages_filename in os.listdir(resources_dir):
32
- with open(top_packages_path, "r") as top_packages_file:
33
- top_packages_information = json.load(top_packages_file)
31
+ top_packages_information = self._get_top_packages_local(top_packages_path)
34
32
 
35
33
  if top_packages_information is None:
36
34
  raise Exception(
37
- f"Could not retrieve top Go packages from {top_packages_path}")
35
+ f"Could not retrieve top Go packages from {top_packages_path}"
36
+ )
38
37
 
39
38
  return set(top_packages_information)
40
39
 
40
+ def _get_top_packages_local(self, path: str) -> list[dict] | None:
41
+ try:
42
+ with open(path, "r") as f:
43
+ result = json.load(f)
44
+ return result
45
+ except FileNotFoundError:
46
+ log.error(f"File not found: {path}")
47
+ return None
48
+
41
49
  def detect(
42
50
  self,
43
51
  package_info,
@@ -104,8 +112,8 @@ class GoTyposquatDetector(TyposquatDetector):
104
112
  continue
105
113
 
106
114
  # Get form when replacing or removing go/golang term
107
- replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
108
- removed_form = terms[:i] + terms[i + 1:]
115
+ replaced_form = terms[:i] + [confused_term] + terms[i + 1 :]
116
+ removed_form = terms[:i] + terms[i + 1 :]
109
117
 
110
118
  for form in (replaced_form, removed_form):
111
119
  confused_forms.append("-".join(form))
@@ -3,6 +3,11 @@ from typing import Optional
3
3
 
4
4
 
5
5
  class NPMBundledBinary(BundledBinary):
6
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
7
- version: Optional[str] = None) -> tuple[bool, str]:
6
+ def detect(
7
+ self,
8
+ package_info,
9
+ path: Optional[str] = None,
10
+ name: Optional[str] = None,
11
+ version: Optional[str] = None,
12
+ ) -> tuple[bool, str]:
8
13
  return super().detect(package_info, path, name, version)
@@ -1,4 +1,4 @@
1
- """ Deceptive Author Detector
1
+ """Deceptive Author Detector
2
2
 
3
3
  Detects when an author of is using a disposable email
4
4
  """
@@ -1,7 +1,8 @@
1
- """ Direct URL Dependency Detector
1
+ """Direct URL Dependency Detector
2
2
 
3
3
  Detects if a package depends on direct URL dependencies
4
4
  """
5
+
5
6
  from typing import Optional
6
7
  import re
7
8
 
@@ -1,7 +1,8 @@
1
- """ Empty Information Detector
1
+ """Empty Information Detector
2
2
 
3
3
  Detects if a package contains an empty description
4
4
  """
5
+
5
6
  import os.path
6
7
  from typing import Optional
7
8
 
@@ -12,13 +13,15 @@ MESSAGE = "This package has an empty description on PyPi"
12
13
 
13
14
  class NPMEmptyInfoDetector(EmptyInfoDetector):
14
15
 
15
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
16
- version: Optional[str] = None) -> tuple[bool, str]:
16
+ def detect(
17
+ self,
18
+ package_info,
19
+ path: Optional[str] = None,
20
+ name: Optional[str] = None,
21
+ version: Optional[str] = None,
22
+ ) -> tuple[bool, str]:
17
23
  if path is None:
18
24
  raise TypeError("path must be a string")
19
25
  package_path = os.path.join(path, "package")
20
- content = map(
21
- lambda x: x.lower(),
22
- os.listdir(package_path)
23
- )
26
+ content = map(lambda x: x.lower(), os.listdir(package_path))
24
27
  return "readme.md" not in content, EmptyInfoDetector.MESSAGE_TEMPLATE % "npm"
@@ -1,4 +1,4 @@
1
- """ Compromised Email Detector
1
+ """Compromised Email Detector
2
2
 
3
3
  Detects if a maintainer's email domain might have been compromised.
4
4
  """
@@ -8,8 +8,9 @@ from typing import Optional
8
8
 
9
9
  from dateutil import parser
10
10
 
11
- from guarddog.analyzer.metadata.potentially_compromised_email_domain import \
12
- PotentiallyCompromisedEmailDomainDetector
11
+ from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
12
+ PotentiallyCompromisedEmailDomainDetector,
13
+ )
13
14
 
14
15
  from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
15
16
 
@@ -1,7 +1,8 @@
1
- """ Empty Information Detector
1
+ """Empty Information Detector
2
2
 
3
3
  Detects when a package has its latest release version to 0.0.0
4
4
  """
5
+
5
6
  from typing import Optional
6
7
 
7
8
  from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
@@ -9,7 +10,14 @@ from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
9
10
 
10
11
  class NPMReleaseZeroDetector(ReleaseZeroDetector):
11
12
 
12
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
13
- version: Optional[str] = None) -> tuple[bool, str]:
14
- return package_info["dist-tags"]["latest"] in ["0.0.0", "0.0", "0"], \
15
- ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["dist-tags"]["latest"]
13
+ def detect(
14
+ self,
15
+ package_info,
16
+ path: Optional[str] = None,
17
+ name: Optional[str] = None,
18
+ version: Optional[str] = None,
19
+ ) -> tuple[bool, str]:
20
+ return (
21
+ package_info["dist-tags"]["latest"] in ["0.0.0", "0.0", "0"],
22
+ ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["dist-tags"]["latest"],
23
+ )
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import logging
2
3
  import os
3
4
  from datetime import datetime, timedelta
4
5
  from typing import Optional
@@ -7,6 +8,8 @@ from guarddog.analyzer.metadata.typosquatting import TyposquatDetector
7
8
  from guarddog.utils.config import TOP_PACKAGES_CACHE_LOCATION
8
9
  import requests
9
10
 
11
+ log = logging.getLogger("guarddog")
12
+
10
13
 
11
14
  class NPMTyposquatDetector(TyposquatDetector):
12
15
  """Detector for typosquatting attacks. Detects if a package name is a typosquat of one of the top 5000 packages.
@@ -32,24 +35,52 @@ class NPMTyposquatDetector(TyposquatDetector):
32
35
  )
33
36
 
34
37
  top_packages_path = os.path.join(resources_dir, top_packages_filename)
38
+ top_packages_information = self._get_top_packages_local(top_packages_path)
35
39
 
36
- top_packages_information = None
37
-
38
- if top_packages_filename in os.listdir(resources_dir):
39
- update_time = datetime.fromtimestamp(os.path.getmtime(top_packages_path))
40
+ if self._file_is_expired(top_packages_path, days=30):
41
+ new_information = self._get_top_packages_network(popular_packages_url)
42
+ if new_information is not None:
43
+ top_packages_information = new_information
40
44
 
41
- if datetime.now() - update_time <= timedelta(days=30):
42
- with open(top_packages_path, "r") as top_packages_file:
43
- top_packages_information = json.load(top_packages_file)
45
+ with open(top_packages_path, "w+") as f:
46
+ json.dump(new_information, f, ensure_ascii=False, indent=4)
44
47
 
45
48
  if top_packages_information is None:
46
- response = requests.get(popular_packages_url).json()
47
- top_packages_information = list([i["name"] for i in response[0:8000]])
48
- with open(top_packages_path, "w+") as f:
49
- json.dump(top_packages_information, f, ensure_ascii=False, indent=4)
50
-
49
+ return set()
51
50
  return set(top_packages_information)
52
51
 
52
+ def _file_is_expired(self, path: str, days: int) -> bool:
53
+ try:
54
+ update_time = datetime.fromtimestamp(os.path.getmtime(path))
55
+ return datetime.now() - update_time > timedelta(days=days)
56
+ except FileNotFoundError:
57
+ return True
58
+
59
+ def _get_top_packages_local(self, path: str) -> list[dict] | None:
60
+ try:
61
+ with open(path, "r") as f:
62
+ result = json.load(f)
63
+ return result
64
+ except FileNotFoundError:
65
+ log.error(f"File not found: {path}")
66
+ return None
67
+
68
+ def _get_top_packages_network(self, url: str) -> list[dict] | None:
69
+ try:
70
+ response = requests.get(url)
71
+ response.raise_for_status()
72
+
73
+ response_data = response.json()
74
+ result = list([i["name"] for i in response_data[0:8000]])
75
+
76
+ return result
77
+ except json.JSONDecodeError:
78
+ log.error(f'Couldn`t convert to json: "{response.text}"')
79
+ return None
80
+ except requests.exceptions.RequestException as e:
81
+ log.error(f"Network error: {e}")
82
+ return None
83
+
53
84
  def detect(
54
85
  self,
55
86
  package_info,
@@ -79,7 +110,7 @@ class NPMTyposquatDetector(TyposquatDetector):
79
110
  return False, None
80
111
 
81
112
  def _get_confused_forms(self, package_name) -> list:
82
- """ Gets confused terms for npm packages.
113
+ """Gets confused terms for npm packages.
83
114
  Currently, there are no confused terms for npm packages.
84
115
  """
85
116
  return []
@@ -1,7 +1,8 @@
1
1
  from typing import Optional
2
2
 
3
- from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import \
4
- UnclaimedMaintainerEmailDomainDetector
3
+ from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import (
4
+ UnclaimedMaintainerEmailDomainDetector,
5
+ )
5
6
 
6
7
  from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
7
8
 
@@ -6,8 +6,7 @@ NPM_API_MAINTAINER_EMAIL_WARNING = (
6
6
 
7
7
 
8
8
  def get_email_addresses(package_info: dict) -> set[str]:
9
- return {
10
- m["email"]
11
- for m in package_info.get("maintainers", [])
12
- if "email" in m
13
- } - {None, ""}
9
+ return {m["email"] for m in package_info.get("maintainers", []) if "email" in m} - {
10
+ None,
11
+ "",
12
+ }
@@ -60,10 +60,14 @@ class PotentiallyCompromisedEmailDomainDetector(Detector):
60
60
  has_issues = True
61
61
 
62
62
  messages.append(
63
- f"The domain name of the maintainer's email address ({email}) was"" re-registered after"
64
- " the latest release of this ""package. This can be an indicator that this is a"
65
- " custom domain that expired, and was leveraged by"" an attacker to compromise the"
66
- f" package owner's {self.ecosystem}"" account."
63
+ f"The domain name of the maintainer's email address ({email}) was"
64
+ " re-registered after"
65
+ " the latest release of this "
66
+ "package. This can be an indicator that this is a"
67
+ " custom domain that expired, and was leveraged by"
68
+ " an attacker to compromise the"
69
+ f" package owner's {self.ecosystem}"
70
+ " account."
67
71
  )
68
72
 
69
73
  return has_issues, "\n".join(messages)
@@ -1,11 +1,17 @@
1
1
  from guarddog.analyzer.metadata.pypi.empty_information import PypiEmptyInfoDetector
2
- from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import \
3
- PypiPotentiallyCompromisedEmailDomainDetector
4
- from guarddog.analyzer.metadata.pypi.unclaimed_maintainer_email_domain import \
5
- PypiUnclaimedMaintainerEmailDomainDetector
2
+ from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import (
3
+ PypiPotentiallyCompromisedEmailDomainDetector,
4
+ )
5
+ from guarddog.analyzer.metadata.pypi.unclaimed_maintainer_email_domain import (
6
+ PypiUnclaimedMaintainerEmailDomainDetector,
7
+ )
6
8
  from guarddog.analyzer.metadata.pypi.release_zero import PypiReleaseZeroDetector
7
- from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import PypiIntegrityMismatchDetector
8
- from guarddog.analyzer.metadata.pypi.single_python_file import PypiSinglePythonFileDetector
9
+ from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import (
10
+ PypiIntegrityMismatchDetector,
11
+ )
12
+ from guarddog.analyzer.metadata.pypi.single_python_file import (
13
+ PypiSinglePythonFileDetector,
14
+ )
9
15
  from guarddog.analyzer.metadata.pypi.typosquatting import PypiTyposquatDetector
10
16
  from guarddog.analyzer.metadata.pypi.bundled_binary import PypiBundledBinary
11
17
  from guarddog.analyzer.metadata.pypi.deceptive_author import PypiDeceptiveAuthor
@@ -3,6 +3,11 @@ from typing import Optional
3
3
 
4
4
 
5
5
  class PypiBundledBinary(BundledBinary):
6
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
7
- version: Optional[str] = None) -> tuple[bool, str]:
6
+ def detect(
7
+ self,
8
+ package_info,
9
+ path: Optional[str] = None,
10
+ name: Optional[str] = None,
11
+ version: Optional[str] = None,
12
+ ) -> tuple[bool, str]:
8
13
  return super().detect(package_info, path, name, version)
@@ -1,4 +1,4 @@
1
- """ Deceptive Author Detector
1
+ """Deceptive Author Detector
2
2
 
3
3
  Detects when an author of is using a disposable email
4
4
  """
@@ -1,7 +1,8 @@
1
- """ Empty Information Detector
1
+ """Empty Information Detector
2
2
 
3
3
  Detects if a package contains an empty description
4
4
  """
5
+
5
6
  import logging
6
7
  from typing import Optional
7
8
 
@@ -13,7 +14,17 @@ log = logging.getLogger("guarddog")
13
14
 
14
15
 
15
16
  class PypiEmptyInfoDetector(EmptyInfoDetector):
16
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
17
- version: Optional[str] = None) -> tuple[bool, str]:
18
- log.debug(f"Running PyPI empty description heuristic on package {name} version {version}")
19
- return len(package_info["info"]["description"].strip()) == 0, EmptyInfoDetector.MESSAGE_TEMPLATE % "PyPI"
17
+ def detect(
18
+ self,
19
+ package_info,
20
+ path: Optional[str] = None,
21
+ name: Optional[str] = None,
22
+ version: Optional[str] = None,
23
+ ) -> tuple[bool, str]:
24
+ log.debug(
25
+ f"Running PyPI empty description heuristic on package {name} version {version}"
26
+ )
27
+ return (
28
+ len(package_info["info"]["description"].strip()) == 0,
29
+ EmptyInfoDetector.MESSAGE_TEMPLATE % "PyPI",
30
+ )
@@ -1,4 +1,4 @@
1
- """ Compromised Email Detector
1
+ """Compromised Email Detector
2
2
 
3
3
  Detects if a maintainer's email domain might have been compromised.
4
4
  """
@@ -9,8 +9,9 @@ from typing import Optional
9
9
  from dateutil import parser
10
10
  from packaging import version
11
11
 
12
- from guarddog.analyzer.metadata.potentially_compromised_email_domain import \
13
- PotentiallyCompromisedEmailDomainDetector
12
+ from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
13
+ PotentiallyCompromisedEmailDomainDetector,
14
+ )
14
15
 
15
16
  from .utils import get_email_addresses
16
17