PyPI - guarddog - Versions diffs - 2.0.1__py3-none-any.whl → 2.0.3__py3-none-any.whl - Mend

guarddog 2.0.1py3-none-any.whl → 2.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

guarddog/analyzer/analyzer.py +19 -9
guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
guarddog/analyzer/metadata/resources/top_npm_packages.json +7531 -4531
guarddog/analyzer/metadata/resources/top_pypi_packages.json +15908 -15908
guarddog/analyzer/sourcecode/__init__.py +11 -2
guarddog/analyzer/sourcecode/npm-obfuscation.yml +8 -0
guarddog/analyzer/sourcecode/obfuscation.yml +17 -0
guarddog/analyzer/sourcecode/shady-links.yml +10 -6
guarddog/cli.py +1 -1
guarddog/reporters/sarif.py +1 -3
guarddog/utils/config.py +12 -1
{guarddog-2.0.1.dist-info → guarddog-2.0.3.dist-info}/METADATA +2 -2
{guarddog-2.0.1.dist-info → guarddog-2.0.3.dist-info}/RECORD +18 -18
{guarddog-2.0.1.dist-info → guarddog-2.0.3.dist-info}/LICENSE +0 -0
{guarddog-2.0.1.dist-info → guarddog-2.0.3.dist-info}/LICENSE-3rdparty.csv +0 -0
{guarddog-2.0.1.dist-info → guarddog-2.0.3.dist-info}/NOTICE +0 -0
{guarddog-2.0.1.dist-info → guarddog-2.0.3.dist-info}/WHEEL +0 -0
{guarddog-2.0.1.dist-info → guarddog-2.0.3.dist-info}/entry_points.txt +0 -0

guarddog/analyzer/analyzer.py CHANGED Viewed

@@ -3,15 +3,18 @@ import logging
 import os
 import subprocess
 import yara  # type: ignore
 from collections import defaultdict
 from pathlib import Path
 from typing import Iterable, Optional, Dict
 from guarddog.analyzer.metadata import get_metadata_detectors
 from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, YaraRule
+from guarddog.utils.config import YARA_EXT_EXCLUDE
 from guarddog.ecosystems import ECOSYSTEM
 SEMGREP_MAX_TARGET_BYTES = 10_000_000
+SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
 log = logging.getLogger("guarddog")
@@ -21,7 +24,6 @@ class Analyzer:
     Analyzes a local directory for threats found by source code or metadata rules
     Attributes:
-        sourcecode_rules_path (str): path to source code rules
         ecosystem (str): name of the current ecosystem
         metadata_ruleset (list): list of metadata rule names
         sourcecode_ruleset (list): list of source code rule names
@@ -33,7 +35,6 @@ class Analyzer:
     """
     def __init__(self, ecosystem=ECOSYSTEM.PYPI) -> None:
-        self.sourcecode_rules_path = os.path.join(os.path.dirname(__file__), "sourcecode")
         self.ecosystem = ecosystem
         # Rules and associated detectors
@@ -177,8 +178,10 @@ class Analyzer:
         errors: Dict[str, str] = {}
         issues = 0
+        rule_results = defaultdict(list)
         rules_path = {
-            rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yar")
+            rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yar")
             for rule_name in all_rules
         }
@@ -191,21 +194,28 @@ class Analyzer:
             for root, _, files in os.walk(path):
                 for f in files:
-                    matches = scan_rules.match(os.path.join(root, f))
+                    # Skip files with excluded extensions
+                    if f.lower().endswith(tuple(YARA_EXT_EXCLUDE)):
+                        continue
+                    scan_file_target_abspath = os.path.join(root, f)
+                    scan_file_target_relpath = os.path.relpath(scan_file_target_abspath, path)
+                    matches = scan_rules.match(scan_file_target_abspath)
                     for m in matches:
                         for s in m.strings:
                             for i in s.instances:
-                                rule_results = {
-                                    "location": f"{f}:{i.offset}",
+                                finding = {
+                                    "location": f"{scan_file_target_relpath}:{i.offset}",
                                     "code": self.trim_code_snippet(str(i.matched_data)),
                                     'message': m.meta.get("description", f"{m.rule} rule matched")
                                 }
                                 issues += len(m.strings)
-                                results[m.rule].update(rule_results)
+                                rule_results[m.rule].append(finding)
         except Exception as e:
             errors["rules-all"] = f"failed to run rule: {str(e)}"
-        return {"results": results, "errors": errors, "issues": issues}
+        return {"results": results | rule_results, "errors": errors, "issues": issues}
     def analyze_semgrep(self, path, rules=None) -> dict:
         """
@@ -231,7 +241,7 @@ class Analyzer:
         issues = 0
         rules_path = list(map(
-            lambda rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yml"),
+            lambda rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yml"),
             all_rules
         ))

guarddog/analyzer/metadata/npm/typosquatting.py CHANGED Viewed

@@ -44,7 +44,7 @@ class NPMTyposquatDetector(TyposquatDetector):
         if top_packages_information is None:
             response = requests.get(popular_packages_url).json()
-            top_packages_information = list([i["name"] for i in response[0:5000]])
+            top_packages_information = list([i["name"] for i in response[0:8000]])
             with open(top_packages_path, "w+") as f:
                 json.dump(top_packages_information, f, ensure_ascii=False, indent=4)

guarddog 2.0.1__py3-none-any.whl → 2.0.3__py3-none-any.whl

guarddog 2.0.1py3-none-any.whl → 2.0.3py3-none-any.whl