guarddog 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. guarddog/analyzer/analyzer.py +58 -20
  2. guarddog/analyzer/metadata/__init__.py +2 -0
  3. guarddog/analyzer/metadata/bundled_binary.py +6 -6
  4. guarddog/analyzer/metadata/deceptive_author.py +3 -1
  5. guarddog/analyzer/metadata/detector.py +7 -2
  6. guarddog/analyzer/metadata/empty_information.py +8 -3
  7. guarddog/analyzer/metadata/go/typosquatting.py +4 -3
  8. guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
  9. guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
  10. guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
  11. guarddog/analyzer/metadata/npm/empty_information.py +10 -7
  12. guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
  13. guarddog/analyzer/metadata/npm/release_zero.py +13 -5
  14. guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
  15. guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
  16. guarddog/analyzer/metadata/npm/utils.py +4 -5
  17. guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
  18. guarddog/analyzer/metadata/pypi/__init__.py +12 -6
  19. guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
  20. guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
  21. guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
  22. guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
  23. guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
  24. guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
  25. guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
  26. guarddog/analyzer/metadata/pypi/typosquatting.py +21 -8
  27. guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
  28. guarddog/analyzer/metadata/pypi/utils.py +1 -4
  29. guarddog/analyzer/metadata/release_zero.py +1 -1
  30. guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
  31. guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
  32. guarddog/analyzer/metadata/typosquatting.py +12 -8
  33. guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
  34. guarddog/analyzer/sourcecode/__init__.py +34 -7
  35. guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
  36. guarddog/analyzer/sourcecode/code-execution.yml +1 -0
  37. guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
  38. guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
  39. guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
  40. guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
  41. guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
  42. guarddog/analyzer/sourcecode/shady-links.yml +2 -0
  43. guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
  44. guarddog/analyzer/sourcecode/unicode.yml +75 -0
  45. guarddog/cli.py +33 -107
  46. guarddog/ecosystems.py +3 -0
  47. guarddog/reporters/__init__.py +28 -0
  48. guarddog/reporters/human_readable.py +138 -0
  49. guarddog/reporters/json.py +28 -0
  50. guarddog/reporters/reporter_factory.py +50 -0
  51. guarddog/reporters/sarif.py +179 -173
  52. guarddog/scanners/__init__.py +5 -0
  53. guarddog/scanners/extension_scanner.py +152 -0
  54. guarddog/scanners/github_action_project_scanner.py +47 -8
  55. guarddog/scanners/github_action_scanner.py +6 -2
  56. guarddog/scanners/go_project_scanner.py +42 -5
  57. guarddog/scanners/npm_package_scanner.py +12 -4
  58. guarddog/scanners/npm_project_scanner.py +54 -10
  59. guarddog/scanners/pypi_package_scanner.py +9 -3
  60. guarddog/scanners/pypi_project_scanner.py +67 -29
  61. guarddog/scanners/scanner.py +247 -164
  62. guarddog/utils/archives.py +2 -1
  63. guarddog/utils/package_info.py +3 -1
  64. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/METADATA +11 -10
  65. guarddog-2.7.0.dist-info/RECORD +100 -0
  66. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/WHEEL +1 -1
  67. guarddog-2.5.0.dist-info/RECORD +0 -90
  68. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/entry_points.txt +0 -0
  69. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE +0 -0
  70. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
  71. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/NOTICE +0 -0
@@ -13,9 +13,10 @@ from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, Yar
13
13
  from guarddog.utils.config import YARA_EXT_EXCLUDE
14
14
  from guarddog.ecosystems import ECOSYSTEM
15
15
 
16
- SEMGREP_MAX_TARGET_BYTES = 10_000_000
17
- SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
16
+ MAX_BYTES_DEFAULT = 10_000_000
17
+ SEMGREP_TIMEOUT_DEFAULT = 10
18
18
 
19
+ SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
19
20
  log = logging.getLogger("guarddog")
20
21
 
21
22
 
@@ -64,7 +65,14 @@ class Analyzer:
64
65
  ".semgrep_logs",
65
66
  ]
66
67
 
67
- def analyze(self, path, info=None, rules=None, name: Optional[str] = None, version: Optional[str] = None) -> dict:
68
+ def analyze(
69
+ self,
70
+ path,
71
+ info=None,
72
+ rules=None,
73
+ name: Optional[str] = None,
74
+ version: Optional[str] = None,
75
+ ) -> dict:
68
76
  """
69
77
  Analyzes a package in the given path
70
78
 
@@ -94,8 +102,14 @@ class Analyzer:
94
102
 
95
103
  return {"issues": issues, "errors": errors, "results": results, "path": path}
96
104
 
97
- def analyze_metadata(self, path: str, info, rules=None, name: Optional[str] = None,
98
- version: Optional[str] = None) -> dict:
105
+ def analyze_metadata(
106
+ self,
107
+ path: str,
108
+ info,
109
+ rules=None,
110
+ name: Optional[str] = None,
111
+ version: Optional[str] = None,
112
+ ) -> dict:
99
113
  """
100
114
  Analyzes the metadata of a given package
101
115
 
@@ -124,7 +138,9 @@ class Analyzer:
124
138
  for rule in all_rules:
125
139
  try:
126
140
  log.debug(f"Running rule {rule} against package '{name}'")
127
- rule_matches, message = self.metadata_detectors[rule].detect(info, path, name, version)
141
+ rule_matches, message = self.metadata_detectors[rule].detect(
142
+ info, path, name, version
143
+ )
128
144
  results[rule] = None
129
145
  if rule_matches:
130
146
  issues += 1
@@ -199,16 +215,21 @@ class Analyzer:
199
215
  continue
200
216
 
201
217
  scan_file_target_abspath = os.path.join(root, f)
202
- scan_file_target_relpath = os.path.relpath(scan_file_target_abspath, path)
218
+ scan_file_target_relpath = os.path.relpath(
219
+ scan_file_target_abspath, path
220
+ )
203
221
 
204
222
  matches = scan_rules.match(scan_file_target_abspath)
205
223
  for m in matches:
224
+
206
225
  for s in m.strings:
207
226
  for i in s.instances:
208
227
  finding = {
209
228
  "location": f"{scan_file_target_relpath}:{i.offset}",
210
229
  "code": self.trim_code_snippet(str(i.matched_data)),
211
- 'message': m.meta.get("description", f"{m.rule} rule matched")
230
+ "message": m.meta.get(
231
+ "description", f"{m.rule} rule matched"
232
+ ),
212
233
  }
213
234
 
214
235
  # since yara can match the multiple times in the same file
@@ -251,10 +272,14 @@ class Analyzer:
251
272
  errors = {}
252
273
  issues = 0
253
274
 
254
- rules_path = list(map(
255
- lambda rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yml"),
256
- all_rules
257
- ))
275
+ rules_path = list(
276
+ map(
277
+ lambda rule_name: os.path.join(
278
+ SOURCECODE_RULES_PATH, f"{rule_name}.yml"
279
+ ),
280
+ all_rules,
281
+ )
282
+ )
258
283
 
259
284
  if len(rules_path) == 0:
260
285
  log.debug("No semgrep code rules to run")
@@ -263,7 +288,9 @@ class Analyzer:
263
288
  try:
264
289
  log.debug(f"Running semgrep code rules against {path}")
265
290
  response = self._invoke_semgrep(target=path, rules=rules_path)
266
- rule_results = self._format_semgrep_response(response, targetpath=targetpath)
291
+ rule_results = self._format_semgrep_response(
292
+ response, targetpath=targetpath
293
+ )
267
294
  issues += sum(len(res) for res in rule_results.values())
268
295
 
269
296
  results = results | rule_results
@@ -274,12 +301,19 @@ class Analyzer:
274
301
 
275
302
  def _invoke_semgrep(self, target: str, rules: Iterable[str]):
276
303
  try:
304
+ SEMGREP_MAX_TARGET_BYTES = int(
305
+ os.getenv("GUARDDOG_SEMGREP_MAX_TARGET_BYTES", MAX_BYTES_DEFAULT)
306
+ )
307
+ SEMGREP_TIMEOUT = int(
308
+ os.getenv("GUARDDOG_SEMGREP_TIMEOUT", SEMGREP_TIMEOUT_DEFAULT)
309
+ )
277
310
  cmd = ["semgrep"]
278
311
  for rule in rules:
279
312
  cmd.extend(["--config", rule])
280
313
 
281
314
  for excluded in self.exclude:
282
315
  cmd.append(f"--exclude='{excluded}'")
316
+ cmd.append(f"--timeout={SEMGREP_TIMEOUT}")
283
317
  cmd.append("--no-git-ignore")
284
318
  cmd.append("--json")
285
319
  cmd.append("--quiet")
@@ -287,7 +321,9 @@ class Analyzer:
287
321
  cmd.append(f"--max-target-bytes={SEMGREP_MAX_TARGET_BYTES}")
288
322
  cmd.append(target)
289
323
  log.debug(f"Invoking semgrep with command line: {' '.join(cmd)}")
290
- result = subprocess.run(cmd, capture_output=True, check=True, encoding="utf-8")
324
+ result = subprocess.run(
325
+ cmd, capture_output=True, check=True, encoding="utf-8"
326
+ )
291
327
  return json.loads(str(result.stdout))
292
328
  except FileNotFoundError:
293
329
  raise Exception("unable to find semgrep binary")
@@ -302,6 +338,8 @@ output: {e.output}
302
338
  raise Exception(error_message)
303
339
  except json.JSONDecodeError as e:
304
340
  raise Exception("unable to parse semgrep JSON output: " + str(e))
341
+ except ValueError as e:
342
+ raise Exception("Invalid environment variable value: " + str(e))
305
343
 
306
344
  def _format_semgrep_response(self, response, rule=None, targetpath=None):
307
345
  """
@@ -348,9 +386,9 @@ output: {e.output}
348
386
  location = file_path + ":" + str(start_line)
349
387
 
350
388
  finding = {
351
- 'location': location,
352
- 'code': code,
353
- 'message': result["extra"]["message"]
389
+ "location": location,
390
+ "code": code,
391
+ "message": result["extra"]["message"],
354
392
  }
355
393
 
356
394
  rule_results = results[rule_name]
@@ -374,7 +412,7 @@ output: {e.output}
374
412
  """
375
413
  snippet = []
376
414
  try:
377
- with open(file_path, 'r') as file:
415
+ with open(file_path, "r") as file:
378
416
  for current_line_number, line in enumerate(file, start=1):
379
417
  if start_line <= current_line_number <= end_line:
380
418
  snippet.append(line)
@@ -385,12 +423,12 @@ output: {e.output}
385
423
  except Exception as e:
386
424
  log.error(f"Error reading file {file_path}: {str(e)}")
387
425
 
388
- return ''.join(snippet)
426
+ return "".join(snippet)
389
427
 
390
428
  # Makes sure the matching code to be displayed isn't too long
391
429
  def trim_code_snippet(self, code):
392
430
  THRESHOLD = 250
393
431
  if len(code) > THRESHOLD:
394
- return code[: THRESHOLD - 10] + '...' + code[len(code) - 10:]
432
+ return code[: THRESHOLD - 10] + "..." + code[len(code) - 10 :]
395
433
  else:
396
434
  return code
@@ -16,3 +16,5 @@ def get_metadata_detectors(ecosystem: ECOSYSTEM) -> dict[str, Detector]:
16
16
  return GO_METADATA_RULES
17
17
  case ECOSYSTEM.GITHUB_ACTION:
18
18
  return GITHUB_ACTION_METADATA_RULES
19
+ case ECOSYSTEM.EXTENSION:
20
+ return {} # No metadata detectors for extensions currently
@@ -15,10 +15,10 @@ class BundledBinary(Detector):
15
15
  # magic bytes are the first few bytes of a file that can be used to identify the file type
16
16
  # regardless of their extension
17
17
  magic_bytes = {
18
- "exe": b"\x4D\x5A",
19
- "elf": b"\x7F\x45\x4C\x46",
20
- "macho32": b"\xFE\xED\xFA\xCE",
21
- "macho64": b"\xFE\xED\xFA\xCF",
18
+ "exe": b"\x4d\x5a",
19
+ "elf": b"\x7f\x45\x4c\x46",
20
+ "macho32": b"\xfe\xed\xfa\xce",
21
+ "macho64": b"\xfe\xed\xfa\xcf",
22
22
  }
23
23
 
24
24
  def __init__(self):
@@ -40,7 +40,7 @@ class BundledBinary(Detector):
40
40
  def sha256(file: str) -> str:
41
41
  with open(file, "rb") as f:
42
42
  hasher = hashlib.sha256()
43
- while (chunk := f.read(4096)):
43
+ while chunk := f.read(4096):
44
44
  hasher.update(chunk)
45
45
  return hasher.hexdigest()
46
46
 
@@ -65,7 +65,7 @@ class BundledBinary(Detector):
65
65
  if not bin_files:
66
66
  return False, ""
67
67
 
68
- output_lines = '\n'.join(
68
+ output_lines = "\n".join(
69
69
  f"{digest}: {', '.join(files)}" for digest, files in bin_files.items()
70
70
  )
71
71
  return True, f"Binary file/s detected in package:\n{output_lines}"
@@ -41,7 +41,9 @@ class DeceptiveAuthorDetector(Detector):
41
41
  # read internal maintained list of placeholder email domains
42
42
  # this domains are usually used by authors who want to don't want to reveal their real email
43
43
  placeholder_email_domains_data = None
44
- with open(placeholder_email_domains_path, "r") as placeholder_email_domains_file:
44
+ with open(
45
+ placeholder_email_domains_path, "r"
46
+ ) as placeholder_email_domains_file:
45
47
  placeholder_email_domains_data = set(
46
48
  placeholder_email_domains_file.read().split("\n")
47
49
  )
@@ -11,8 +11,13 @@ class Detector:
11
11
 
12
12
  # returns (ruleMatches, message)
13
13
  @abstractmethod
14
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
15
- version: Optional[str] = None) -> tuple[bool, Optional[str]]:
14
+ def detect(
15
+ self,
16
+ package_info,
17
+ path: Optional[str] = None,
18
+ name: Optional[str] = None,
19
+ version: Optional[str] = None,
20
+ ) -> tuple[bool, Optional[str]]:
16
21
  pass # pragma: no cover
17
22
 
18
23
  def get_name(self) -> str:
@@ -15,12 +15,17 @@ class EmptyInfoDetector(Detector):
15
15
  def __init__(self):
16
16
  super().__init__(
17
17
  name="empty_information",
18
- description="Identify packages with an empty description field"
18
+ description="Identify packages with an empty description field",
19
19
  )
20
20
 
21
21
  @abstractmethod
22
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
23
- version: Optional[str] = None) -> tuple[bool, str]:
22
+ def detect(
23
+ self,
24
+ package_info,
25
+ path: Optional[str] = None,
26
+ name: Optional[str] = None,
27
+ version: Optional[str] = None,
28
+ ) -> tuple[bool, str]:
24
29
  """
25
30
  Uses a package's information from PyPI's JSON API to determine
26
31
  if the package has an empty description
@@ -34,7 +34,8 @@ class GoTyposquatDetector(TyposquatDetector):
34
34
 
35
35
  if top_packages_information is None:
36
36
  raise Exception(
37
- f"Could not retrieve top Go packages from {top_packages_path}")
37
+ f"Could not retrieve top Go packages from {top_packages_path}"
38
+ )
38
39
 
39
40
  return set(top_packages_information)
40
41
 
@@ -104,8 +105,8 @@ class GoTyposquatDetector(TyposquatDetector):
104
105
  continue
105
106
 
106
107
  # Get form when replacing or removing go/golang term
107
- replaced_form = terms[:i] + [confused_term] + terms[i + 1:]
108
- removed_form = terms[:i] + terms[i + 1:]
108
+ replaced_form = terms[:i] + [confused_term] + terms[i + 1 :]
109
+ removed_form = terms[:i] + terms[i + 1 :]
109
110
 
110
111
  for form in (replaced_form, removed_form):
111
112
  confused_forms.append("-".join(form))
@@ -3,6 +3,11 @@ from typing import Optional
3
3
 
4
4
 
5
5
  class NPMBundledBinary(BundledBinary):
6
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
7
- version: Optional[str] = None) -> tuple[bool, str]:
6
+ def detect(
7
+ self,
8
+ package_info,
9
+ path: Optional[str] = None,
10
+ name: Optional[str] = None,
11
+ version: Optional[str] = None,
12
+ ) -> tuple[bool, str]:
8
13
  return super().detect(package_info, path, name, version)
@@ -1,4 +1,4 @@
1
- """ Deceptive Author Detector
1
+ """Deceptive Author Detector
2
2
 
3
3
  Detects when an author of is using a disposable email
4
4
  """
@@ -1,7 +1,8 @@
1
- """ Direct URL Dependency Detector
1
+ """Direct URL Dependency Detector
2
2
 
3
3
  Detects if a package depends on direct URL dependencies
4
4
  """
5
+
5
6
  from typing import Optional
6
7
  import re
7
8
 
@@ -1,7 +1,8 @@
1
- """ Empty Information Detector
1
+ """Empty Information Detector
2
2
 
3
3
  Detects if a package contains an empty description
4
4
  """
5
+
5
6
  import os.path
6
7
  from typing import Optional
7
8
 
@@ -12,13 +13,15 @@ MESSAGE = "This package has an empty description on PyPi"
12
13
 
13
14
  class NPMEmptyInfoDetector(EmptyInfoDetector):
14
15
 
15
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
16
- version: Optional[str] = None) -> tuple[bool, str]:
16
+ def detect(
17
+ self,
18
+ package_info,
19
+ path: Optional[str] = None,
20
+ name: Optional[str] = None,
21
+ version: Optional[str] = None,
22
+ ) -> tuple[bool, str]:
17
23
  if path is None:
18
24
  raise TypeError("path must be a string")
19
25
  package_path = os.path.join(path, "package")
20
- content = map(
21
- lambda x: x.lower(),
22
- os.listdir(package_path)
23
- )
26
+ content = map(lambda x: x.lower(), os.listdir(package_path))
24
27
  return "readme.md" not in content, EmptyInfoDetector.MESSAGE_TEMPLATE % "npm"
@@ -1,4 +1,4 @@
1
- """ Compromised Email Detector
1
+ """Compromised Email Detector
2
2
 
3
3
  Detects if a maintainer's email domain might have been compromised.
4
4
  """
@@ -8,8 +8,9 @@ from typing import Optional
8
8
 
9
9
  from dateutil import parser
10
10
 
11
- from guarddog.analyzer.metadata.potentially_compromised_email_domain import \
12
- PotentiallyCompromisedEmailDomainDetector
11
+ from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
12
+ PotentiallyCompromisedEmailDomainDetector,
13
+ )
13
14
 
14
15
  from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
15
16
 
@@ -1,7 +1,8 @@
1
- """ Empty Information Detector
1
+ """Empty Information Detector
2
2
 
3
3
  Detects when a package has its latest release version to 0.0.0
4
4
  """
5
+
5
6
  from typing import Optional
6
7
 
7
8
  from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
@@ -9,7 +10,14 @@ from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector
9
10
 
10
11
  class NPMReleaseZeroDetector(ReleaseZeroDetector):
11
12
 
12
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
13
- version: Optional[str] = None) -> tuple[bool, str]:
14
- return package_info["dist-tags"]["latest"] in ["0.0.0", "0.0", "0"], \
15
- ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["dist-tags"]["latest"]
13
+ def detect(
14
+ self,
15
+ package_info,
16
+ path: Optional[str] = None,
17
+ name: Optional[str] = None,
18
+ version: Optional[str] = None,
19
+ ) -> tuple[bool, str]:
20
+ return (
21
+ package_info["dist-tags"]["latest"] in ["0.0.0", "0.0", "0"],
22
+ ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["dist-tags"]["latest"],
23
+ )
@@ -79,7 +79,7 @@ class NPMTyposquatDetector(TyposquatDetector):
79
79
  return False, None
80
80
 
81
81
  def _get_confused_forms(self, package_name) -> list:
82
- """ Gets confused terms for npm packages.
82
+ """Gets confused terms for npm packages.
83
83
  Currently, there are no confused terms for npm packages.
84
84
  """
85
85
  return []
@@ -1,7 +1,8 @@
1
1
  from typing import Optional
2
2
 
3
- from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import \
4
- UnclaimedMaintainerEmailDomainDetector
3
+ from guarddog.analyzer.metadata.unclaimed_maintainer_email_domain import (
4
+ UnclaimedMaintainerEmailDomainDetector,
5
+ )
5
6
 
6
7
  from .utils import NPM_API_MAINTAINER_EMAIL_WARNING, get_email_addresses
7
8
 
@@ -6,8 +6,7 @@ NPM_API_MAINTAINER_EMAIL_WARNING = (
6
6
 
7
7
 
8
8
  def get_email_addresses(package_info: dict) -> set[str]:
9
- return {
10
- m["email"]
11
- for m in package_info.get("maintainers", [])
12
- if "email" in m
13
- } - {None, ""}
9
+ return {m["email"] for m in package_info.get("maintainers", []) if "email" in m} - {
10
+ None,
11
+ "",
12
+ }
@@ -60,10 +60,14 @@ class PotentiallyCompromisedEmailDomainDetector(Detector):
60
60
  has_issues = True
61
61
 
62
62
  messages.append(
63
- f"The domain name of the maintainer's email address ({email}) was"" re-registered after"
64
- " the latest release of this ""package. This can be an indicator that this is a"
65
- " custom domain that expired, and was leveraged by"" an attacker to compromise the"
66
- f" package owner's {self.ecosystem}"" account."
63
+ f"The domain name of the maintainer's email address ({email}) was"
64
+ " re-registered after"
65
+ " the latest release of this "
66
+ "package. This can be an indicator that this is a"
67
+ " custom domain that expired, and was leveraged by"
68
+ " an attacker to compromise the"
69
+ f" package owner's {self.ecosystem}"
70
+ " account."
67
71
  )
68
72
 
69
73
  return has_issues, "\n".join(messages)
@@ -1,11 +1,17 @@
1
1
  from guarddog.analyzer.metadata.pypi.empty_information import PypiEmptyInfoDetector
2
- from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import \
3
- PypiPotentiallyCompromisedEmailDomainDetector
4
- from guarddog.analyzer.metadata.pypi.unclaimed_maintainer_email_domain import \
5
- PypiUnclaimedMaintainerEmailDomainDetector
2
+ from guarddog.analyzer.metadata.pypi.potentially_compromised_email_domain import (
3
+ PypiPotentiallyCompromisedEmailDomainDetector,
4
+ )
5
+ from guarddog.analyzer.metadata.pypi.unclaimed_maintainer_email_domain import (
6
+ PypiUnclaimedMaintainerEmailDomainDetector,
7
+ )
6
8
  from guarddog.analyzer.metadata.pypi.release_zero import PypiReleaseZeroDetector
7
- from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import PypiIntegrityMismatchDetector
8
- from guarddog.analyzer.metadata.pypi.single_python_file import PypiSinglePythonFileDetector
9
+ from guarddog.analyzer.metadata.pypi.repository_integrity_mismatch import (
10
+ PypiIntegrityMismatchDetector,
11
+ )
12
+ from guarddog.analyzer.metadata.pypi.single_python_file import (
13
+ PypiSinglePythonFileDetector,
14
+ )
9
15
  from guarddog.analyzer.metadata.pypi.typosquatting import PypiTyposquatDetector
10
16
  from guarddog.analyzer.metadata.pypi.bundled_binary import PypiBundledBinary
11
17
  from guarddog.analyzer.metadata.pypi.deceptive_author import PypiDeceptiveAuthor
@@ -3,6 +3,11 @@ from typing import Optional
3
3
 
4
4
 
5
5
  class PypiBundledBinary(BundledBinary):
6
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
7
- version: Optional[str] = None) -> tuple[bool, str]:
6
+ def detect(
7
+ self,
8
+ package_info,
9
+ path: Optional[str] = None,
10
+ name: Optional[str] = None,
11
+ version: Optional[str] = None,
12
+ ) -> tuple[bool, str]:
8
13
  return super().detect(package_info, path, name, version)
@@ -1,4 +1,4 @@
1
- """ Deceptive Author Detector
1
+ """Deceptive Author Detector
2
2
 
3
3
  Detects when an author of is using a disposable email
4
4
  """
@@ -1,7 +1,8 @@
1
- """ Empty Information Detector
1
+ """Empty Information Detector
2
2
 
3
3
  Detects if a package contains an empty description
4
4
  """
5
+
5
6
  import logging
6
7
  from typing import Optional
7
8
 
@@ -13,7 +14,17 @@ log = logging.getLogger("guarddog")
13
14
 
14
15
 
15
16
  class PypiEmptyInfoDetector(EmptyInfoDetector):
16
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
17
- version: Optional[str] = None) -> tuple[bool, str]:
18
- log.debug(f"Running PyPI empty description heuristic on package {name} version {version}")
19
- return len(package_info["info"]["description"].strip()) == 0, EmptyInfoDetector.MESSAGE_TEMPLATE % "PyPI"
17
+ def detect(
18
+ self,
19
+ package_info,
20
+ path: Optional[str] = None,
21
+ name: Optional[str] = None,
22
+ version: Optional[str] = None,
23
+ ) -> tuple[bool, str]:
24
+ log.debug(
25
+ f"Running PyPI empty description heuristic on package {name} version {version}"
26
+ )
27
+ return (
28
+ len(package_info["info"]["description"].strip()) == 0,
29
+ EmptyInfoDetector.MESSAGE_TEMPLATE % "PyPI",
30
+ )
@@ -1,4 +1,4 @@
1
- """ Compromised Email Detector
1
+ """Compromised Email Detector
2
2
 
3
3
  Detects if a maintainer's email domain might have been compromised.
4
4
  """
@@ -9,8 +9,9 @@ from typing import Optional
9
9
  from dateutil import parser
10
10
  from packaging import version
11
11
 
12
- from guarddog.analyzer.metadata.potentially_compromised_email_domain import \
13
- PotentiallyCompromisedEmailDomainDetector
12
+ from guarddog.analyzer.metadata.potentially_compromised_email_domain import (
13
+ PotentiallyCompromisedEmailDomainDetector,
14
+ )
14
15
 
15
16
  from .utils import get_email_addresses
16
17
 
@@ -1,7 +1,8 @@
1
- """ Empty Information Detector
1
+ """Empty Information Detector
2
2
 
3
3
  Detects when a package has its latest release version to 0.0.0
4
4
  """
5
+
5
6
  import logging
6
7
  from typing import Optional
7
8
 
@@ -12,8 +13,17 @@ log = logging.getLogger("guarddog")
12
13
 
13
14
  class PypiReleaseZeroDetector(ReleaseZeroDetector):
14
15
 
15
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
16
- version: Optional[str] = None) -> tuple[bool, str]:
17
- log.debug(f"Running zero version heuristic on PyPI package {name} version {version}")
18
- return (package_info["info"]["version"] in ["0.0.0", "0.0"],
19
- ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["info"]["version"])
16
+ def detect(
17
+ self,
18
+ package_info,
19
+ path: Optional[str] = None,
20
+ name: Optional[str] = None,
21
+ version: Optional[str] = None,
22
+ ) -> tuple[bool, str]:
23
+ log.debug(
24
+ f"Running zero version heuristic on PyPI package {name} version {version}"
25
+ )
26
+ return (
27
+ package_info["info"]["version"] in ["0.0.0", "0.0"],
28
+ ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["info"]["version"],
29
+ )