guarddog 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. guarddog/analyzer/analyzer.py +58 -20
  2. guarddog/analyzer/metadata/__init__.py +2 -0
  3. guarddog/analyzer/metadata/bundled_binary.py +6 -6
  4. guarddog/analyzer/metadata/deceptive_author.py +3 -1
  5. guarddog/analyzer/metadata/detector.py +7 -2
  6. guarddog/analyzer/metadata/empty_information.py +8 -3
  7. guarddog/analyzer/metadata/go/typosquatting.py +4 -3
  8. guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
  9. guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
  10. guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
  11. guarddog/analyzer/metadata/npm/empty_information.py +10 -7
  12. guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
  13. guarddog/analyzer/metadata/npm/release_zero.py +13 -5
  14. guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
  15. guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
  16. guarddog/analyzer/metadata/npm/utils.py +4 -5
  17. guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
  18. guarddog/analyzer/metadata/pypi/__init__.py +12 -6
  19. guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
  20. guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
  21. guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
  22. guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
  23. guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
  24. guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
  25. guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
  26. guarddog/analyzer/metadata/pypi/typosquatting.py +21 -8
  27. guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
  28. guarddog/analyzer/metadata/pypi/utils.py +1 -4
  29. guarddog/analyzer/metadata/release_zero.py +1 -1
  30. guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
  31. guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
  32. guarddog/analyzer/metadata/typosquatting.py +12 -8
  33. guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
  34. guarddog/analyzer/sourcecode/__init__.py +34 -7
  35. guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
  36. guarddog/analyzer/sourcecode/code-execution.yml +1 -0
  37. guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
  38. guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
  39. guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
  40. guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
  41. guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
  42. guarddog/analyzer/sourcecode/shady-links.yml +2 -0
  43. guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
  44. guarddog/analyzer/sourcecode/unicode.yml +75 -0
  45. guarddog/cli.py +33 -107
  46. guarddog/ecosystems.py +3 -0
  47. guarddog/reporters/__init__.py +28 -0
  48. guarddog/reporters/human_readable.py +138 -0
  49. guarddog/reporters/json.py +28 -0
  50. guarddog/reporters/reporter_factory.py +50 -0
  51. guarddog/reporters/sarif.py +179 -173
  52. guarddog/scanners/__init__.py +5 -0
  53. guarddog/scanners/extension_scanner.py +152 -0
  54. guarddog/scanners/github_action_project_scanner.py +47 -8
  55. guarddog/scanners/github_action_scanner.py +6 -2
  56. guarddog/scanners/go_project_scanner.py +42 -5
  57. guarddog/scanners/npm_package_scanner.py +12 -4
  58. guarddog/scanners/npm_project_scanner.py +54 -10
  59. guarddog/scanners/pypi_package_scanner.py +9 -3
  60. guarddog/scanners/pypi_project_scanner.py +67 -29
  61. guarddog/scanners/scanner.py +247 -164
  62. guarddog/utils/archives.py +2 -1
  63. guarddog/utils/package_info.py +3 -1
  64. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/METADATA +11 -10
  65. guarddog-2.7.0.dist-info/RECORD +100 -0
  66. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/WHEEL +1 -1
  67. guarddog-2.5.0.dist-info/RECORD +0 -90
  68. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/entry_points.txt +0 -0
  69. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE +0 -0
  70. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
  71. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/NOTICE +0 -0
guarddog/cli.py CHANGED
@@ -1,11 +1,10 @@
1
- """ Package Malware Scanner
1
+ """Package Malware Scanner
2
2
 
3
3
  CLI command that scans a package version for user-specified malware flags.
4
4
  Includes rules based on package registry metadata and source code analysis.
5
5
  """
6
6
 
7
7
  from functools import reduce
8
- import json as js
9
8
  import logging
10
9
  import os
11
10
  import sys
@@ -14,12 +13,12 @@ from typing import Optional
14
13
 
15
14
  import click
16
15
  from prettytable import PrettyTable
17
- from termcolor import colored
18
16
 
19
17
  from guarddog.analyzer.metadata import get_metadata_detectors
20
18
  from guarddog.analyzer.sourcecode import get_sourcecode_rules
21
19
  from guarddog.ecosystems import ECOSYSTEM
22
- from guarddog.reporters.sarif import report_verify_sarif
20
+ from guarddog.reporters.reporter_factory import ReporterFactory, ReporterType
21
+
23
22
  from guarddog.scanners import get_package_scanner, get_project_scanner
24
23
  from guarddog.utils.archives import safe_extract
25
24
 
@@ -127,7 +126,7 @@ def _get_all_rules(ecosystem: ECOSYSTEM) -> set[str]:
127
126
 
128
127
  def _get_rule_param(
129
128
  rules: tuple[str, ...], exclude_rules: tuple[str, ...], ecosystem: ECOSYSTEM
130
- ) -> Optional[set]:
129
+ ) -> Optional[set[str]]:
131
130
  """
132
131
  This function should return None if no rules are provided
133
132
  Else a set of rules to be used for scanning
@@ -162,28 +161,20 @@ def _verify(
162
161
  log.error(f"Command verify is not supported for ecosystem {ecosystem}")
163
162
  exit(1)
164
163
 
165
- def display_result(result: dict) -> None:
166
- identifier = (
167
- result["dependency"]
168
- if result["version"] is None
169
- else f"{result['dependency']} version {result['version']}"
170
- )
171
- if output_format is None:
172
- print_scan_results(result.get("result"), identifier)
173
-
174
- if len(result.get("errors", [])) > 0:
175
- print_errors(result.get("error"), identifier)
164
+ dependencies, results = scanner.scan_local(path=path, rules=rule_param)
176
165
 
177
- results = scanner.scan_local(path, rule_param, display_result)
178
- if output_format == "json":
179
- return_value = js.dumps(results)
166
+ rule_docs = list(rule_param or _get_all_rules(ecosystem=ecosystem))
180
167
 
181
- if output_format == "sarif":
182
- sarif_rules = _get_all_rules(ecosystem)
183
- return_value = report_verify_sarif(path, list(sarif_rules), results, ecosystem)
168
+ reporter = ReporterFactory.create_reporter(ReporterType.from_str(output_format))
169
+ stdout, stderr = reporter.render_verify(
170
+ dependency_files=dependencies,
171
+ rule_names=rule_docs,
172
+ scan_results=results,
173
+ ecosystem=ecosystem,
174
+ )
184
175
 
185
- if output_format is not None:
186
- print(return_value)
176
+ sys.stdout.write(stdout)
177
+ sys.stderr.write(stderr)
187
178
 
188
179
  if exit_non_zero_on_finding:
189
180
  exit_with_status_code([result["result"] for result in results])
@@ -231,10 +222,10 @@ def _scan(
231
222
  log.error(f"Error occurred while scanning target {identifier}: '{e}'\n")
232
223
  sys.exit(1)
233
224
 
234
- if output_format == "json":
235
- print(js.dumps(result))
236
- else:
237
- print_scan_results(result, result["package"])
225
+ reporter = ReporterFactory.create_reporter(ReporterType.from_str(output_format))
226
+ stdout, stderr = reporter.render_scan(result)
227
+ sys.stdout.write(stdout)
228
+ sys.stderr.write(stderr)
238
229
 
239
230
  if exit_non_zero_on_finding:
240
231
  exit_with_status_code([result])
@@ -262,6 +253,7 @@ class CliEcosystem(click.Group):
262
253
  Class that dynamically represents an ecosystem in click
263
254
  It dynamically selects the ruleset to the instantiated ecosystem
264
255
  """
256
+
265
257
  def __init__(self, ecosystem: ECOSYSTEM):
266
258
  super().__init__()
267
259
  self.name = ecosystem.name.lower()
@@ -288,7 +280,12 @@ class CliEcosystem(click.Group):
288
280
  @scan_options
289
281
  @rule_options
290
282
  def scan_ecosystem(
291
- target, version, rules, exclude_rules, output_format, exit_non_zero_on_finding
283
+ target,
284
+ version,
285
+ rules,
286
+ exclude_rules,
287
+ output_format,
288
+ exit_non_zero_on_finding,
292
289
  ):
293
290
  return _scan(
294
291
  target,
@@ -304,7 +301,9 @@ class CliEcosystem(click.Group):
304
301
  @common_options
305
302
  @verify_options
306
303
  @rule_options
307
- def verify_ecosystem(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
304
+ def verify_ecosystem(
305
+ target, rules, exclude_rules, output_format, exit_non_zero_on_finding
306
+ ):
308
307
  return _verify(
309
308
  target,
310
309
  rules,
@@ -314,7 +313,9 @@ class CliEcosystem(click.Group):
314
313
  self.ecosystem,
315
314
  )
316
315
 
317
- @click.command("list-rules", help=f"List available rules for {self.ecosystem.name}")
316
+ @click.command(
317
+ "list-rules", help=f"List available rules for {self.ecosystem.name}"
318
+ )
318
319
  def list_rules_ecosystem():
319
320
  return _list_rules(self.ecosystem)
320
321
 
@@ -333,7 +334,7 @@ for e in ECOSYSTEM:
333
334
  @verify_options
334
335
  @legacy_rules_options
335
336
  def verify(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
336
- return _verify(
337
+ return verify(
337
338
  target,
338
339
  rules,
339
340
  exclude_rules,
@@ -361,81 +362,6 @@ def scan(
361
362
  )
362
363
 
363
364
 
364
- # Pretty prints scan results for the console
365
- def print_scan_results(results, identifier):
366
- num_issues = results.get("issues")
367
- errors = results.get("errors", [])
368
-
369
- if num_issues == 0:
370
- print(
371
- "Found "
372
- + colored("0 potentially malicious indicators", "green", attrs=["bold"])
373
- + " scanning "
374
- + colored(identifier, None, attrs=["bold"])
375
- )
376
- print()
377
- else:
378
- print(
379
- "Found "
380
- + colored(
381
- str(num_issues) + " potentially malicious indicators",
382
- "red",
383
- attrs=["bold"],
384
- )
385
- + " in "
386
- + colored(identifier, None, attrs=["bold"])
387
- )
388
- print()
389
-
390
- findings = results.get("results", [])
391
- for finding in findings:
392
- description = findings[finding]
393
- if isinstance(description, str): # package metadata
394
- print(colored(finding, None, attrs=["bold"]) + ": " + description)
395
- print()
396
- elif isinstance(description, list): # semgrep rule result:
397
- source_code_findings = description
398
- print(
399
- colored(finding, None, attrs=["bold"])
400
- + ": found "
401
- + str(len(source_code_findings))
402
- + " source code matches"
403
- )
404
- for finding in source_code_findings:
405
- print(
406
- " * "
407
- + finding["message"]
408
- + " at "
409
- + finding["location"]
410
- + "\n "
411
- + format_code_line_for_output(finding["code"])
412
- )
413
- print()
414
-
415
- if len(errors) > 0:
416
- print_errors(errors, identifier)
417
- print("\n")
418
-
419
-
420
- def print_errors(errors, identifier):
421
- print(
422
- colored("Some rules failed to run while scanning " + identifier + ":", "yellow")
423
- )
424
- print()
425
- for rule in errors:
426
- print(f"* {rule}: {errors[rule]}")
427
- print()
428
-
429
-
430
- def format_code_line_for_output(code):
431
- return " " + colored(
432
- code.strip().replace("\n", "\n ").replace("\t", " "),
433
- None,
434
- "on_red",
435
- attrs=["bold"],
436
- )
437
-
438
-
439
365
  # Given the results, exit with the appropriate status code
440
366
  def exit_with_status_code(results):
441
367
  for result in results:
guarddog/ecosystems.py CHANGED
@@ -6,6 +6,7 @@ class ECOSYSTEM(Enum):
6
6
  NPM = "npm"
7
7
  GO = "go"
8
8
  GITHUB_ACTION = "github-action"
9
+ EXTENSION = "extension"
9
10
 
10
11
 
11
12
  def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
@@ -18,5 +19,7 @@ def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
18
19
  return "go"
19
20
  case ECOSYSTEM.GITHUB_ACTION:
20
21
  return "GitHub Action"
22
+ case ECOSYSTEM.EXTENSION:
23
+ return "Extension"
21
24
  case _:
22
25
  return ecosystem.value
@@ -0,0 +1,28 @@
1
+ from guarddog.scanners.scanner import DependencyFile
2
+ from typing import List
3
+ from guarddog.ecosystems import ECOSYSTEM
4
+
5
+
6
+ class BaseReporter:
7
+ """
8
+ Base class for all reporters.
9
+ """
10
+
11
+ @staticmethod
12
+ def render_scan(scan_results: dict) -> tuple[str, str]:
13
+ """
14
+ Report the scans results.
15
+ """
16
+ raise NotImplementedError("Subclasses must implement this method.")
17
+
18
+ @staticmethod
19
+ def render_verify(
20
+ dependency_files: List[DependencyFile],
21
+ rule_names: list[str],
22
+ scan_results: list[dict],
23
+ ecosystem: ECOSYSTEM,
24
+ ) -> tuple[str, str]:
25
+ """
26
+ Report the scans results.
27
+ """
28
+ raise NotImplementedError("Subclasses must implement this method.")
@@ -0,0 +1,138 @@
1
+ from termcolor import colored
2
+ from guarddog.reporters import BaseReporter
3
+ from typing import List
4
+ from guarddog.scanners.scanner import DependencyFile
5
+ from guarddog.ecosystems import ECOSYSTEM
6
+
7
+
8
+ class HumanReadableReporter(BaseReporter):
9
+ """
10
+ HumanReadableReporter is a class that formats and prints scan results in a human-readable format.
11
+ """
12
+
13
+ @staticmethod
14
+ def print_errors(identifier: str, results: dict) -> str:
15
+ errors = results.get("errors", [])
16
+ if not errors:
17
+ return ""
18
+
19
+ lines = []
20
+ lines.append("")
21
+ lines.append(
22
+ colored(
23
+ "Some rules failed to run while scanning " + identifier + ":",
24
+ "yellow",
25
+ )
26
+ )
27
+ lines.append("")
28
+ for rule in errors:
29
+ lines.append(f"* {rule}: {errors[rule]}")
30
+
31
+ return "\n".join(lines)
32
+
33
+ @staticmethod
34
+ def print_scan_results(identifier: str, results: dict) -> str:
35
+
36
+ def _format_code_line_for_output(code) -> str:
37
+ return " " + colored(
38
+ code.strip().replace("\n", "\n ").replace("\t", " "),
39
+ None,
40
+ "on_red",
41
+ attrs=["bold"],
42
+ )
43
+
44
+ num_issues = results.get("issues")
45
+ lines = []
46
+
47
+ if num_issues == 0:
48
+ lines.append(
49
+ "Found "
50
+ + colored("0 potentially malicious indicators", "green", attrs=["bold"])
51
+ + " scanning "
52
+ + colored(identifier, None, attrs=["bold"])
53
+ )
54
+ lines.append("")
55
+ else:
56
+ lines.append(
57
+ "Found "
58
+ + colored(
59
+ str(num_issues) + " potentially malicious indicators",
60
+ "red",
61
+ attrs=["bold"],
62
+ )
63
+ + " in "
64
+ + colored(identifier, None, attrs=["bold"])
65
+ )
66
+ lines.append("")
67
+
68
+ findings = results.get("results", [])
69
+ for finding in findings:
70
+ description = findings[finding]
71
+ if isinstance(description, str): # package metadata
72
+ lines.append(
73
+ colored(finding, None, attrs=["bold"]) + ": " + description
74
+ )
75
+ lines.append("")
76
+ elif isinstance(description, list): # semgrep rule result:
77
+ source_code_findings = description
78
+ lines.append(
79
+ colored(finding, None, attrs=["bold"])
80
+ + ": found "
81
+ + str(len(source_code_findings))
82
+ + " source code matches"
83
+ )
84
+ for finding in source_code_findings:
85
+ lines.append(
86
+ " * "
87
+ + finding["message"]
88
+ + " at "
89
+ + finding["location"]
90
+ + "\n "
91
+ + _format_code_line_for_output(finding["code"])
92
+ )
93
+ lines.append("")
94
+
95
+ return "\n".join(lines)
96
+
97
+ @staticmethod
98
+ def render_scan(scan_results: dict) -> tuple[str, str]:
99
+ """
100
+ Report the scans results in a human-readable format.
101
+
102
+ Args:
103
+ scan_results (dict): The scan results to be reported.
104
+ """
105
+ return (
106
+ HumanReadableReporter.print_scan_results(
107
+ identifier=scan_results["package"], results=scan_results
108
+ ),
109
+ HumanReadableReporter.print_errors(
110
+ identifier=scan_results["package"], results=scan_results
111
+ ),
112
+ )
113
+
114
+ @staticmethod
115
+ def render_verify(
116
+ dependency_files: List[DependencyFile],
117
+ rule_names: list[str],
118
+ scan_results: list[dict],
119
+ ecosystem: ECOSYSTEM,
120
+ ) -> tuple[str, str]:
121
+ return (
122
+ "\n".join(
123
+ [
124
+ HumanReadableReporter.print_scan_results(
125
+ identifier=s["dependency"], results=s["result"]
126
+ )
127
+ for s in scan_results
128
+ ]
129
+ ),
130
+ "\n".join(
131
+ [
132
+ HumanReadableReporter.print_errors(
133
+ identifier=s["dependency"], results=s["result"]
134
+ )
135
+ for s in scan_results
136
+ ]
137
+ ),
138
+ )
@@ -0,0 +1,28 @@
1
+ import json
2
+ from typing import List
3
+ from guarddog.scanners.scanner import DependencyFile
4
+ from guarddog.ecosystems import ECOSYSTEM
5
+
6
+ from guarddog.reporters import BaseReporter
7
+
8
+
9
+ class JsonReporter(BaseReporter):
10
+ @staticmethod
11
+ def render_verify(
12
+ dependency_files: List[DependencyFile],
13
+ rule_names: list[str],
14
+ scan_results: list[dict],
15
+ ecosystem: ECOSYSTEM,
16
+ ) -> tuple[str, str]:
17
+ return json.dumps(scan_results), ""
18
+
19
+ @staticmethod
20
+ def render_scan(scan_results: dict) -> tuple[str, str]:
21
+ """
22
+ Report the scans results in a json format.
23
+
24
+ Args:
25
+ scan_results (dict): The scan results to be reported.
26
+ """
27
+ # this reporter will output the errors in stdout
28
+ return json.dumps(scan_results), ""
@@ -0,0 +1,50 @@
1
+ from enum import Enum, auto
2
+ from typing import Optional
3
+
4
+ from guarddog.reporters import BaseReporter
5
+ from guarddog.reporters.human_readable import HumanReadableReporter
6
+ from guarddog.reporters.sarif import SarifReporter
7
+ from guarddog.reporters.json import JsonReporter
8
+
9
+
10
+ class ReporterType(Enum):
11
+ """
12
+ Enum representing the different types of reporters available.
13
+ """
14
+
15
+ HUMAN_READABLE = auto()
16
+ SARIF = auto()
17
+ JSON = auto()
18
+
19
+ @classmethod
20
+ def from_str(cls, type: Optional[str]) -> "ReporterType":
21
+ if not type:
22
+ return cls.HUMAN_READABLE
23
+ match (type).lower():
24
+ case "human_readable":
25
+ return cls.HUMAN_READABLE
26
+ case "sarif":
27
+ return cls.SARIF
28
+ case "json":
29
+ return cls.JSON
30
+ case _:
31
+ raise ValueError(f"Unsupported reporter type: {type}")
32
+
33
+
34
+ class ReporterFactory:
35
+ """
36
+ Factory class for creating reporter instances based on the reporter type.
37
+ """
38
+
39
+ @staticmethod
40
+ def create_reporter(reporter_type: ReporterType) -> type[BaseReporter]:
41
+ """
42
+ Create a reporter instance based on the reporter type.
43
+ """
44
+ match reporter_type:
45
+ case ReporterType.HUMAN_READABLE:
46
+ return HumanReadableReporter
47
+ case ReporterType.SARIF:
48
+ return SarifReporter
49
+ case ReporterType.JSON:
50
+ return JsonReporter