guarddog 2.0.2__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/analyzer.py +19 -9
- guarddog/analyzer/metadata/resources/top_npm_packages.json +7219 -7219
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +15952 -15952
- guarddog/analyzer/sourcecode/__init__.py +11 -2
- guarddog/analyzer/sourcecode/npm-exec-base64.yml +14 -5
- guarddog/analyzer/sourcecode/npm-install-script.yml +11 -18
- guarddog/analyzer/sourcecode/npm-obfuscation.yml +6 -2
- guarddog/analyzer/sourcecode/obfuscation.yml +4 -2
- guarddog/cli.py +1 -1
- guarddog/reporters/sarif.py +1 -3
- guarddog/utils/config.py +12 -1
- {guarddog-2.0.2.dist-info → guarddog-2.0.4.dist-info}/METADATA +4 -3
- {guarddog-2.0.2.dist-info → guarddog-2.0.4.dist-info}/RECORD +18 -18
- {guarddog-2.0.2.dist-info → guarddog-2.0.4.dist-info}/LICENSE +0 -0
- {guarddog-2.0.2.dist-info → guarddog-2.0.4.dist-info}/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.0.2.dist-info → guarddog-2.0.4.dist-info}/NOTICE +0 -0
- {guarddog-2.0.2.dist-info → guarddog-2.0.4.dist-info}/WHEEL +0 -0
- {guarddog-2.0.2.dist-info → guarddog-2.0.4.dist-info}/entry_points.txt +0 -0
guarddog/analyzer/analyzer.py
CHANGED
|
@@ -3,15 +3,18 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import subprocess
|
|
5
5
|
import yara # type: ignore
|
|
6
|
+
|
|
6
7
|
from collections import defaultdict
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Iterable, Optional, Dict
|
|
9
10
|
|
|
10
11
|
from guarddog.analyzer.metadata import get_metadata_detectors
|
|
11
12
|
from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, YaraRule
|
|
13
|
+
from guarddog.utils.config import YARA_EXT_EXCLUDE
|
|
12
14
|
from guarddog.ecosystems import ECOSYSTEM
|
|
13
15
|
|
|
14
16
|
SEMGREP_MAX_TARGET_BYTES = 10_000_000
|
|
17
|
+
SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
|
|
15
18
|
|
|
16
19
|
log = logging.getLogger("guarddog")
|
|
17
20
|
|
|
@@ -21,7 +24,6 @@ class Analyzer:
|
|
|
21
24
|
Analyzes a local directory for threats found by source code or metadata rules
|
|
22
25
|
|
|
23
26
|
Attributes:
|
|
24
|
-
sourcecode_rules_path (str): path to source code rules
|
|
25
27
|
ecosystem (str): name of the current ecosystem
|
|
26
28
|
metadata_ruleset (list): list of metadata rule names
|
|
27
29
|
sourcecode_ruleset (list): list of source code rule names
|
|
@@ -33,7 +35,6 @@ class Analyzer:
|
|
|
33
35
|
"""
|
|
34
36
|
|
|
35
37
|
def __init__(self, ecosystem=ECOSYSTEM.PYPI) -> None:
|
|
36
|
-
self.sourcecode_rules_path = os.path.join(os.path.dirname(__file__), "sourcecode")
|
|
37
38
|
self.ecosystem = ecosystem
|
|
38
39
|
|
|
39
40
|
# Rules and associated detectors
|
|
@@ -177,8 +178,10 @@ class Analyzer:
|
|
|
177
178
|
errors: Dict[str, str] = {}
|
|
178
179
|
issues = 0
|
|
179
180
|
|
|
181
|
+
rule_results = defaultdict(list)
|
|
182
|
+
|
|
180
183
|
rules_path = {
|
|
181
|
-
rule_name: os.path.join(
|
|
184
|
+
rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yar")
|
|
182
185
|
for rule_name in all_rules
|
|
183
186
|
}
|
|
184
187
|
|
|
@@ -191,21 +194,28 @@ class Analyzer:
|
|
|
191
194
|
|
|
192
195
|
for root, _, files in os.walk(path):
|
|
193
196
|
for f in files:
|
|
194
|
-
|
|
197
|
+
# Skip files with excluded extensions
|
|
198
|
+
if f.lower().endswith(tuple(YARA_EXT_EXCLUDE)):
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
scan_file_target_abspath = os.path.join(root, f)
|
|
202
|
+
scan_file_target_relpath = os.path.relpath(scan_file_target_abspath, path)
|
|
203
|
+
|
|
204
|
+
matches = scan_rules.match(scan_file_target_abspath)
|
|
195
205
|
for m in matches:
|
|
196
206
|
for s in m.strings:
|
|
197
207
|
for i in s.instances:
|
|
198
|
-
|
|
199
|
-
"location": f"{
|
|
208
|
+
finding = {
|
|
209
|
+
"location": f"{scan_file_target_relpath}:{i.offset}",
|
|
200
210
|
"code": self.trim_code_snippet(str(i.matched_data)),
|
|
201
211
|
'message': m.meta.get("description", f"{m.rule} rule matched")
|
|
202
212
|
}
|
|
203
213
|
issues += len(m.strings)
|
|
204
|
-
|
|
214
|
+
rule_results[m.rule].append(finding)
|
|
205
215
|
except Exception as e:
|
|
206
216
|
errors["rules-all"] = f"failed to run rule: {str(e)}"
|
|
207
217
|
|
|
208
|
-
return {"results": results, "errors": errors, "issues": issues}
|
|
218
|
+
return {"results": results | rule_results, "errors": errors, "issues": issues}
|
|
209
219
|
|
|
210
220
|
def analyze_semgrep(self, path, rules=None) -> dict:
|
|
211
221
|
"""
|
|
@@ -231,7 +241,7 @@ class Analyzer:
|
|
|
231
241
|
issues = 0
|
|
232
242
|
|
|
233
243
|
rules_path = list(map(
|
|
234
|
-
lambda rule_name: os.path.join(
|
|
244
|
+
lambda rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yml"),
|
|
235
245
|
all_rules
|
|
236
246
|
))
|
|
237
247
|
|