guarddog 2.0.1__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,15 +3,18 @@ import logging
3
3
  import os
4
4
  import subprocess
5
5
  import yara # type: ignore
6
+
6
7
  from collections import defaultdict
7
8
  from pathlib import Path
8
9
  from typing import Iterable, Optional, Dict
9
10
 
10
11
  from guarddog.analyzer.metadata import get_metadata_detectors
11
12
  from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, YaraRule
13
+ from guarddog.utils.config import YARA_EXT_EXCLUDE
12
14
  from guarddog.ecosystems import ECOSYSTEM
13
15
 
14
16
  SEMGREP_MAX_TARGET_BYTES = 10_000_000
17
+ SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
15
18
 
16
19
  log = logging.getLogger("guarddog")
17
20
 
@@ -21,7 +24,6 @@ class Analyzer:
21
24
  Analyzes a local directory for threats found by source code or metadata rules
22
25
 
23
26
  Attributes:
24
- sourcecode_rules_path (str): path to source code rules
25
27
  ecosystem (str): name of the current ecosystem
26
28
  metadata_ruleset (list): list of metadata rule names
27
29
  sourcecode_ruleset (list): list of source code rule names
@@ -33,7 +35,6 @@ class Analyzer:
33
35
  """
34
36
 
35
37
  def __init__(self, ecosystem=ECOSYSTEM.PYPI) -> None:
36
- self.sourcecode_rules_path = os.path.join(os.path.dirname(__file__), "sourcecode")
37
38
  self.ecosystem = ecosystem
38
39
 
39
40
  # Rules and associated detectors
@@ -177,8 +178,10 @@ class Analyzer:
177
178
  errors: Dict[str, str] = {}
178
179
  issues = 0
179
180
 
181
+ rule_results = defaultdict(list)
182
+
180
183
  rules_path = {
181
- rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yar")
184
+ rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yar")
182
185
  for rule_name in all_rules
183
186
  }
184
187
 
@@ -191,21 +194,28 @@ class Analyzer:
191
194
 
192
195
  for root, _, files in os.walk(path):
193
196
  for f in files:
194
- matches = scan_rules.match(os.path.join(root, f))
197
+ # Skip files with excluded extensions
198
+ if f.lower().endswith(tuple(YARA_EXT_EXCLUDE)):
199
+ continue
200
+
201
+ scan_file_target_abspath = os.path.join(root, f)
202
+ scan_file_target_relpath = os.path.relpath(scan_file_target_abspath, path)
203
+
204
+ matches = scan_rules.match(scan_file_target_abspath)
195
205
  for m in matches:
196
206
  for s in m.strings:
197
207
  for i in s.instances:
198
- rule_results = {
199
- "location": f"{f}:{i.offset}",
208
+ finding = {
209
+ "location": f"{scan_file_target_relpath}:{i.offset}",
200
210
  "code": self.trim_code_snippet(str(i.matched_data)),
201
211
  'message': m.meta.get("description", f"{m.rule} rule matched")
202
212
  }
203
213
  issues += len(m.strings)
204
- results[m.rule].update(rule_results)
214
+ rule_results[m.rule].append(finding)
205
215
  except Exception as e:
206
216
  errors["rules-all"] = f"failed to run rule: {str(e)}"
207
217
 
208
- return {"results": results, "errors": errors, "issues": issues}
218
+ return {"results": results | rule_results, "errors": errors, "issues": issues}
209
219
 
210
220
  def analyze_semgrep(self, path, rules=None) -> dict:
211
221
  """
@@ -231,7 +241,7 @@ class Analyzer:
231
241
  issues = 0
232
242
 
233
243
  rules_path = list(map(
234
- lambda rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yml"),
244
+ lambda rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yml"),
235
245
  all_rules
236
246
  ))
237
247
 
@@ -44,7 +44,7 @@ class NPMTyposquatDetector(TyposquatDetector):
44
44
 
45
45
  if top_packages_information is None:
46
46
  response = requests.get(popular_packages_url).json()
47
- top_packages_information = list([i["name"] for i in response[0:5000]])
47
+ top_packages_information = list([i["name"] for i in response[0:8000]])
48
48
  with open(top_packages_path, "w+") as f:
49
49
  json.dump(top_packages_information, f, ensure_ascii=False, indent=4)
50
50