PyPI - guarddog - Versions diffs - 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl - Mend

guarddog 2.5.0py3-none-any.whl → 2.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

guarddog/analyzer/analyzer.py +58 -20
guarddog/analyzer/metadata/__init__.py +2 -0
guarddog/analyzer/metadata/bundled_binary.py +6 -6
guarddog/analyzer/metadata/deceptive_author.py +3 -1
guarddog/analyzer/metadata/detector.py +7 -2
guarddog/analyzer/metadata/empty_information.py +8 -3
guarddog/analyzer/metadata/go/typosquatting.py +4 -3
guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
guarddog/analyzer/metadata/npm/empty_information.py +10 -7
guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
guarddog/analyzer/metadata/npm/release_zero.py +13 -5
guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
guarddog/analyzer/metadata/npm/utils.py +4 -5
guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
guarddog/analyzer/metadata/pypi/__init__.py +12 -6
guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
guarddog/analyzer/metadata/pypi/typosquatting.py +21 -8
guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
guarddog/analyzer/metadata/pypi/utils.py +1 -4
guarddog/analyzer/metadata/release_zero.py +1 -1
guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
guarddog/analyzer/metadata/typosquatting.py +12 -8
guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
guarddog/analyzer/sourcecode/__init__.py +34 -7
guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
guarddog/analyzer/sourcecode/code-execution.yml +1 -0
guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
guarddog/analyzer/sourcecode/shady-links.yml +2 -0
guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
guarddog/analyzer/sourcecode/unicode.yml +75 -0
guarddog/cli.py +33 -107
guarddog/ecosystems.py +3 -0
guarddog/reporters/__init__.py +28 -0
guarddog/reporters/human_readable.py +138 -0
guarddog/reporters/json.py +28 -0
guarddog/reporters/reporter_factory.py +50 -0
guarddog/reporters/sarif.py +179 -173
guarddog/scanners/__init__.py +5 -0
guarddog/scanners/extension_scanner.py +152 -0
guarddog/scanners/github_action_project_scanner.py +47 -8
guarddog/scanners/github_action_scanner.py +6 -2
guarddog/scanners/go_project_scanner.py +42 -5
guarddog/scanners/npm_package_scanner.py +12 -4
guarddog/scanners/npm_project_scanner.py +54 -10
guarddog/scanners/pypi_package_scanner.py +9 -3
guarddog/scanners/pypi_project_scanner.py +67 -29
guarddog/scanners/scanner.py +247 -164
guarddog/utils/archives.py +2 -1
guarddog/utils/package_info.py +3 -1
{guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/METADATA +11 -10
guarddog-2.7.0.dist-info/RECORD +100 -0
{guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/WHEEL +1 -1
guarddog-2.5.0.dist-info/RECORD +0 -90
{guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/entry_points.txt +0 -0
{guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE +0 -0
{guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
{guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/NOTICE +0 -0

guarddog/cli.py CHANGED Viewed

@@ -1,11 +1,10 @@
-""" Package Malware Scanner
+"""Package Malware Scanner
 CLI command that scans a package version for user-specified malware flags.
 Includes rules based on package registry metadata and source code analysis.
 """
 from functools import reduce
-import json as js
 import logging
 import os
 import sys
@@ -14,12 +13,12 @@ from typing import Optional
 import click
 from prettytable import PrettyTable
-from termcolor import colored
 from guarddog.analyzer.metadata import get_metadata_detectors
 from guarddog.analyzer.sourcecode import get_sourcecode_rules
 from guarddog.ecosystems import ECOSYSTEM
-from guarddog.reporters.sarif import report_verify_sarif
+from guarddog.reporters.reporter_factory import ReporterFactory, ReporterType
 from guarddog.scanners import get_package_scanner, get_project_scanner
 from guarddog.utils.archives import safe_extract
@@ -127,7 +126,7 @@ def _get_all_rules(ecosystem: ECOSYSTEM) -> set[str]:
 def _get_rule_param(
     rules: tuple[str, ...], exclude_rules: tuple[str, ...], ecosystem: ECOSYSTEM
-) -> Optional[set]:
+) -> Optional[set[str]]:
     """
     This function should return None if no rules are provided
     Else a set of rules to be used for scanning
@@ -162,28 +161,20 @@ def _verify(
         log.error(f"Command verify is not supported for ecosystem {ecosystem}")
         exit(1)
-    def display_result(result: dict) -> None:
-        identifier = (
-            result["dependency"]
-            if result["version"] is None
-            else f"{result['dependency']} version {result['version']}"
-        )
-        if output_format is None:
-            print_scan_results(result.get("result"), identifier)
-        if len(result.get("errors", [])) > 0:
-            print_errors(result.get("error"), identifier)
+    dependencies, results = scanner.scan_local(path=path, rules=rule_param)
-    results = scanner.scan_local(path, rule_param, display_result)
-    if output_format == "json":
-        return_value = js.dumps(results)
+    rule_docs = list(rule_param or _get_all_rules(ecosystem=ecosystem))
-    if output_format == "sarif":
-        sarif_rules = _get_all_rules(ecosystem)
-        return_value = report_verify_sarif(path, list(sarif_rules), results, ecosystem)
+    reporter = ReporterFactory.create_reporter(ReporterType.from_str(output_format))
+    stdout, stderr = reporter.render_verify(
+        dependency_files=dependencies,
+        rule_names=rule_docs,
+        scan_results=results,
+        ecosystem=ecosystem,
+    )
-    if output_format is not None:
-        print(return_value)
+    sys.stdout.write(stdout)
+    sys.stderr.write(stderr)
     if exit_non_zero_on_finding:
         exit_with_status_code([result["result"] for result in results])
@@ -231,10 +222,10 @@ def _scan(
         log.error(f"Error occurred while scanning target {identifier}: '{e}'\n")
         sys.exit(1)
-    if output_format == "json":
-        print(js.dumps(result))
-    else:
-        print_scan_results(result, result["package"])
+    reporter = ReporterFactory.create_reporter(ReporterType.from_str(output_format))
+    stdout, stderr = reporter.render_scan(result)
+    sys.stdout.write(stdout)
+    sys.stderr.write(stderr)
     if exit_non_zero_on_finding:
         exit_with_status_code([result])
@@ -262,6 +253,7 @@ class CliEcosystem(click.Group):
     Class that dynamically represents an ecosystem in click
     It dynamically selects the ruleset to the instantiated ecosystem
     """
     def __init__(self, ecosystem: ECOSYSTEM):
         super().__init__()
         self.name = ecosystem.name.lower()
@@ -288,7 +280,12 @@ class CliEcosystem(click.Group):
         @scan_options
         @rule_options
         def scan_ecosystem(
-            target, version, rules, exclude_rules, output_format, exit_non_zero_on_finding
+            target,
+            version,
+            rules,
+            exclude_rules,
+            output_format,
+            exit_non_zero_on_finding,
         ):
             return _scan(
                 target,
@@ -304,7 +301,9 @@ class CliEcosystem(click.Group):
         @common_options
         @verify_options
         @rule_options
-        def verify_ecosystem(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
+        def verify_ecosystem(
+            target, rules, exclude_rules, output_format, exit_non_zero_on_finding
+        ):
             return _verify(
                 target,
                 rules,
@@ -314,7 +313,9 @@ class CliEcosystem(click.Group):
                 self.ecosystem,
             )
-        @click.command("list-rules", help=f"List available rules for {self.ecosystem.name}")
+        @click.command(
+            "list-rules", help=f"List available rules for {self.ecosystem.name}"
+        )
         def list_rules_ecosystem():
             return _list_rules(self.ecosystem)
@@ -333,7 +334,7 @@ for e in ECOSYSTEM:
 @verify_options
 @legacy_rules_options
 def verify(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
-    return _verify(
+    return verify(
         target,
         rules,
         exclude_rules,
@@ -361,81 +362,6 @@ def scan(
     )
-# Pretty prints scan results for the console
-def print_scan_results(results, identifier):
-    num_issues = results.get("issues")
-    errors = results.get("errors", [])
-    if num_issues == 0:
-        print(
-            "Found "
-            + colored("0 potentially malicious indicators", "green", attrs=["bold"])
-            + " scanning "
-            + colored(identifier, None, attrs=["bold"])
-        )
-        print()
-    else:
-        print(
-            "Found "
-            + colored(
-                str(num_issues) + " potentially malicious indicators",
-                "red",
-                attrs=["bold"],
-            )
-            + " in "
-            + colored(identifier, None, attrs=["bold"])
-        )
-        print()
-        findings = results.get("results", [])
-        for finding in findings:
-            description = findings[finding]
-            if isinstance(description, str):  # package metadata
-                print(colored(finding, None, attrs=["bold"]) + ": " + description)
-                print()
-            elif isinstance(description, list):  # semgrep rule result:
-                source_code_findings = description
-                print(
-                    colored(finding, None, attrs=["bold"])
-                    + ": found "
-                    + str(len(source_code_findings))
-                    + " source code matches"
-                )
-                for finding in source_code_findings:
-                    print(
-                        "  * "
-                        + finding["message"]
-                        + " at "
-                        + finding["location"]
-                        + "\n    "
-                        + format_code_line_for_output(finding["code"])
-                    )
-                print()
-    if len(errors) > 0:
-        print_errors(errors, identifier)
-        print("\n")
-def print_errors(errors, identifier):
-    print(
-        colored("Some rules failed to run while scanning " + identifier + ":", "yellow")
-    )
-    print()
-    for rule in errors:
-        print(f"* {rule}: {errors[rule]}")
-    print()
-def format_code_line_for_output(code):
-    return "    " + colored(
-        code.strip().replace("\n", "\n    ").replace("\t", "  "),
-        None,
-        "on_red",
-        attrs=["bold"],
-    )
 # Given the results, exit with the appropriate status code
 def exit_with_status_code(results):
     for result in results:

guarddog/ecosystems.py CHANGED Viewed

@@ -6,6 +6,7 @@ class ECOSYSTEM(Enum):
     NPM = "npm"
     GO = "go"
     GITHUB_ACTION = "github-action"
+    EXTENSION = "extension"
 def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
@@ -18,5 +19,7 @@ def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
             return "go"
         case ECOSYSTEM.GITHUB_ACTION:
             return "GitHub Action"
+        case ECOSYSTEM.EXTENSION:
+            return "Extension"
         case _:
             return ecosystem.value

guarddog/reporters/__init__.py CHANGED Viewed

@@ -0,0 +1,28 @@
+from guarddog.scanners.scanner import DependencyFile
+from typing import List
+from guarddog.ecosystems import ECOSYSTEM
+class BaseReporter:
+    """
+    Base class for all reporters.
+    """
+    @staticmethod
+    def render_scan(scan_results: dict) -> tuple[str, str]:
+        """
+        Report the scans results.
+        """
+        raise NotImplementedError("Subclasses must implement this method.")
+    @staticmethod
+    def render_verify(
+        dependency_files: List[DependencyFile],
+        rule_names: list[str],
+        scan_results: list[dict],
+        ecosystem: ECOSYSTEM,
+    ) -> tuple[str, str]:
+        """
+        Report the scans results.
+        """
+        raise NotImplementedError("Subclasses must implement this method.")

guarddog/reporters/human_readable.py ADDED Viewed

@@ -0,0 +1,138 @@
+from termcolor import colored
+from guarddog.reporters import BaseReporter
+from typing import List
+from guarddog.scanners.scanner import DependencyFile
+from guarddog.ecosystems import ECOSYSTEM
+class HumanReadableReporter(BaseReporter):
+    """
+    HumanReadableReporter is a class that formats and prints scan results in a human-readable format.
+    """
+    @staticmethod
+    def print_errors(identifier: str, results: dict) -> str:
+        errors = results.get("errors", [])
+        if not errors:
+            return ""
+        lines = []
+        lines.append("")
+        lines.append(
+            colored(
+                "Some rules failed to run while scanning " + identifier + ":",
+                "yellow",
+            )
+        )
+        lines.append("")
+        for rule in errors:
+            lines.append(f"* {rule}: {errors[rule]}")
+        return "\n".join(lines)
+    @staticmethod
+    def print_scan_results(identifier: str, results: dict) -> str:
+        def _format_code_line_for_output(code) -> str:
+            return "    " + colored(
+                code.strip().replace("\n", "\n    ").replace("\t", "  "),
+                None,
+                "on_red",
+                attrs=["bold"],
+            )
+        num_issues = results.get("issues")
+        lines = []
+        if num_issues == 0:
+            lines.append(
+                "Found "
+                + colored("0 potentially malicious indicators", "green", attrs=["bold"])
+                + " scanning "
+                + colored(identifier, None, attrs=["bold"])
+            )
+            lines.append("")
+        else:
+            lines.append(
+                "Found "
+                + colored(
+                    str(num_issues) + " potentially malicious indicators",
+                    "red",
+                    attrs=["bold"],
+                )
+                + " in "
+                + colored(identifier, None, attrs=["bold"])
+            )
+            lines.append("")
+            findings = results.get("results", [])
+            for finding in findings:
+                description = findings[finding]
+                if isinstance(description, str):  # package metadata
+                    lines.append(
+                        colored(finding, None, attrs=["bold"]) + ": " + description
+                    )
+                    lines.append("")
+                elif isinstance(description, list):  # semgrep rule result:
+                    source_code_findings = description
+                    lines.append(
+                        colored(finding, None, attrs=["bold"])
+                        + ": found "
+                        + str(len(source_code_findings))
+                        + " source code matches"
+                    )
+                    for finding in source_code_findings:
+                        lines.append(
+                            "  * "
+                            + finding["message"]
+                            + " at "
+                            + finding["location"]
+                            + "\n    "
+                            + _format_code_line_for_output(finding["code"])
+                        )
+                    lines.append("")
+        return "\n".join(lines)
+    @staticmethod
+    def render_scan(scan_results: dict) -> tuple[str, str]:
+        """
+        Report the scans results in a human-readable format.
+        Args:
+            scan_results (dict): The scan results to be reported.
+        """
+        return (
+            HumanReadableReporter.print_scan_results(
+                identifier=scan_results["package"], results=scan_results
+            ),
+            HumanReadableReporter.print_errors(
+                identifier=scan_results["package"], results=scan_results
+            ),
+        )
+    @staticmethod
+    def render_verify(
+        dependency_files: List[DependencyFile],
+        rule_names: list[str],
+        scan_results: list[dict],
+        ecosystem: ECOSYSTEM,
+    ) -> tuple[str, str]:
+        return (
+            "\n".join(
+                [
+                    HumanReadableReporter.print_scan_results(
+                        identifier=s["dependency"], results=s["result"]
+                    )
+                    for s in scan_results
+                ]
+            ),
+            "\n".join(
+                [
+                    HumanReadableReporter.print_errors(
+                        identifier=s["dependency"], results=s["result"]
+                    )
+                    for s in scan_results
+                ]
+            ),
+        )

guarddog/reporters/json.py ADDED Viewed

@@ -0,0 +1,28 @@
+import json
+from typing import List
+from guarddog.scanners.scanner import DependencyFile
+from guarddog.ecosystems import ECOSYSTEM
+from guarddog.reporters import BaseReporter
+class JsonReporter(BaseReporter):
+    @staticmethod
+    def render_verify(
+        dependency_files: List[DependencyFile],
+        rule_names: list[str],
+        scan_results: list[dict],
+        ecosystem: ECOSYSTEM,
+    ) -> tuple[str, str]:
+        return json.dumps(scan_results), ""
+    @staticmethod
+    def render_scan(scan_results: dict) -> tuple[str, str]:
+        """
+        Report the scans results in a json format.
+        Args:
+            scan_results (dict): The scan results to be reported.
+        """
+        # this reporter will output the errors in stdout
+        return json.dumps(scan_results), ""

guarddog/reporters/reporter_factory.py ADDED Viewed

@@ -0,0 +1,50 @@
+from enum import Enum, auto
+from typing import Optional
+from guarddog.reporters import BaseReporter
+from guarddog.reporters.human_readable import HumanReadableReporter
+from guarddog.reporters.sarif import SarifReporter
+from guarddog.reporters.json import JsonReporter
+class ReporterType(Enum):
+    """
+    Enum representing the different types of reporters available.
+    """
+    HUMAN_READABLE = auto()
+    SARIF = auto()
+    JSON = auto()
+    @classmethod
+    def from_str(cls, type: Optional[str]) -> "ReporterType":
+        if not type:
+            return cls.HUMAN_READABLE
+        match (type).lower():
+            case "human_readable":
+                return cls.HUMAN_READABLE
+            case "sarif":
+                return cls.SARIF
+            case "json":
+                return cls.JSON
+            case _:
+                raise ValueError(f"Unsupported reporter type: {type}")
+class ReporterFactory:
+    """
+    Factory class for creating reporter instances based on the reporter type.
+    """
+    @staticmethod
+    def create_reporter(reporter_type: ReporterType) -> type[BaseReporter]:
+        """
+        Create a reporter instance based on the reporter type.
+        """
+        match reporter_type:
+            case ReporterType.HUMAN_READABLE:
+                return HumanReadableReporter
+            case ReporterType.SARIF:
+                return SarifReporter
+            case ReporterType.JSON:
+                return JsonReporter

guarddog 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

guarddog 2.5.0py3-none-any.whl → 2.7.0py3-none-any.whl