guarddog 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,15 +3,18 @@ import logging
3
3
  import os
4
4
  import subprocess
5
5
  import yara # type: ignore
6
+
6
7
  from collections import defaultdict
7
8
  from pathlib import Path
8
9
  from typing import Iterable, Optional, Dict
9
10
 
10
11
  from guarddog.analyzer.metadata import get_metadata_detectors
11
12
  from guarddog.analyzer.sourcecode import get_sourcecode_rules, SempgrepRule, YaraRule
13
+ from guarddog.utils.config import YARA_EXT_EXCLUDE
12
14
  from guarddog.ecosystems import ECOSYSTEM
13
15
 
14
16
  SEMGREP_MAX_TARGET_BYTES = 10_000_000
17
+ SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
15
18
 
16
19
  log = logging.getLogger("guarddog")
17
20
 
@@ -21,7 +24,6 @@ class Analyzer:
21
24
  Analyzes a local directory for threats found by source code or metadata rules
22
25
 
23
26
  Attributes:
24
- sourcecode_rules_path (str): path to source code rules
25
27
  ecosystem (str): name of the current ecosystem
26
28
  metadata_ruleset (list): list of metadata rule names
27
29
  sourcecode_ruleset (list): list of source code rule names
@@ -33,7 +35,6 @@ class Analyzer:
33
35
  """
34
36
 
35
37
  def __init__(self, ecosystem=ECOSYSTEM.PYPI) -> None:
36
- self.sourcecode_rules_path = os.path.join(os.path.dirname(__file__), "sourcecode")
37
38
  self.ecosystem = ecosystem
38
39
 
39
40
  # Rules and associated detectors
@@ -177,8 +178,10 @@ class Analyzer:
177
178
  errors: Dict[str, str] = {}
178
179
  issues = 0
179
180
 
181
+ rule_results = defaultdict(list)
182
+
180
183
  rules_path = {
181
- rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yar")
184
+ rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yar")
182
185
  for rule_name in all_rules
183
186
  }
184
187
 
@@ -191,21 +194,28 @@ class Analyzer:
191
194
 
192
195
  for root, _, files in os.walk(path):
193
196
  for f in files:
194
- matches = scan_rules.match(os.path.join(root, f))
197
+ # Skip files with excluded extensions
198
+ if f.lower().endswith(tuple(YARA_EXT_EXCLUDE)):
199
+ continue
200
+
201
+ scan_file_target_abspath = os.path.join(root, f)
202
+ scan_file_target_relpath = os.path.relpath(scan_file_target_abspath, path)
203
+
204
+ matches = scan_rules.match(scan_file_target_abspath)
195
205
  for m in matches:
196
206
  for s in m.strings:
197
207
  for i in s.instances:
198
- rule_results = {
199
- "location": f"{f}:{i.offset}",
208
+ finding = {
209
+ "location": f"{scan_file_target_relpath}:{i.offset}",
200
210
  "code": self.trim_code_snippet(str(i.matched_data)),
201
211
  'message': m.meta.get("description", f"{m.rule} rule matched")
202
212
  }
203
213
  issues += len(m.strings)
204
- results[m.rule].update(rule_results)
214
+ rule_results[m.rule].append(finding)
205
215
  except Exception as e:
206
216
  errors["rules-all"] = f"failed to run rule: {str(e)}"
207
217
 
208
- return {"results": results, "errors": errors, "issues": issues}
218
+ return {"results": results | rule_results, "errors": errors, "issues": issues}
209
219
 
210
220
  def analyze_semgrep(self, path, rules=None) -> dict:
211
221
  """
@@ -231,7 +241,7 @@ class Analyzer:
231
241
  issues = 0
232
242
 
233
243
  rules_path = list(map(
234
- lambda rule_name: os.path.join(self.sourcecode_rules_path, f"{rule_name}.yml"),
244
+ lambda rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yml"),
235
245
  all_rules
236
246
  ))
237
247
 
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import re
2
3
  import pathlib
3
4
  from dataclasses import dataclass
4
5
  from typing import Optional, Iterable
@@ -20,6 +21,7 @@ class SourceCodeRule:
20
21
  """
21
22
  id: str
22
23
  file: str
24
+ description: str
23
25
 
24
26
 
25
27
  @dataclass
@@ -36,7 +38,6 @@ class SempgrepRule(SourceCodeRule):
36
38
  Semgrep rule are language specific
37
39
  Content of rule in yaml format is accessible through rule_content
38
40
  """
39
- description: str
40
41
  ecosystem: ECOSYSTEM
41
42
  rule_content: dict
42
43
 
@@ -105,4 +106,12 @@ yara_rule_file_names = list(
105
106
  # all yar files placed in the sourcecode directory are loaded as YARA rules
106
107
  # refer to README.md for more information
107
108
  for file_name in yara_rule_file_names:
108
- SOURCECODE_RULES.append(YaraRule(id=pathlib.Path(file_name).stem, file=file_name))
109
+ rule_id = pathlib.Path(file_name).stem
110
+ description_regex = fr'\s*rule\s+{rule_id}[^}}]+meta:[^}}]+description\s*=\s*\"(.+?)\"'
111
+
112
+ with open(os.path.join(current_dir, file_name), "r") as fd:
113
+ match = re.search(description_regex, fd.read())
114
+ rule_description = ""
115
+ if match:
116
+ rule_description = match.group(1)
117
+ SOURCECODE_RULES.append(YaraRule(id=rule_id, file=file_name, description=rule_description))
@@ -50,8 +50,12 @@ rules:
50
50
  - pattern-not-inside: //...
51
51
  - pattern-regex: ^\s*[\[\]\(\)\+\!]{10,}\s*$
52
52
 
53
- # hide code from sight
54
- - pattern-regex: ^(.*?);?[\h]{150,};?.{10,}$
53
+ # hide code from sight
54
+ - patterns:
55
+ - pattern: ...
56
+ - pattern-not-inside: /*...*/
57
+ - pattern-not-inside: //...
58
+ - pattern-regex: ^(.*?);?[\h]{150,};?.{10,}$
55
59
 
56
60
  languages:
57
61
  - javascript
@@ -17,9 +17,11 @@ rules:
17
17
  - pattern: getattr(builtins, ...)
18
18
 
19
19
  # hide code from sight
20
- - pattern-regex: ^(.*?);?[\h]{150,};?.{10,}$
20
+ - patterns:
21
+ - pattern: ...
22
+ - pattern-not-inside: '"..."'
23
+ - pattern-regex: ^(.*?);?[\h]{150,};?.{10,}$
21
24
 
22
- # using decode hardcoded content
23
25
  - patterns:
24
26
  - pattern: $HEX.decode(...)
25
27
  - metavariable-regex:
guarddog/cli.py CHANGED
@@ -246,7 +246,7 @@ def _list_rules(ecosystem: ECOSYSTEM):
246
246
  table.field_names = ["Rule type", "Rule name", "Description"]
247
247
 
248
248
  for sc_rule in get_sourcecode_rules(ecosystem):
249
- table.add_row(["Source code", sc_rule.id, getattr(sc_rule, "description", "")])
249
+ table.add_row(["Source code", sc_rule.id, sc_rule.description])
250
250
 
251
251
  metadata_rules = get_metadata_detectors(ecosystem)
252
252
  for ruleName in metadata_rules:
@@ -18,9 +18,7 @@ def build_rules_help_list() -> dict:
18
18
  detector_class = instance.__class__.__base__
19
19
  rules_documentation[name] = detector_class.__doc__
20
20
  for sourcecode_rule in get_sourcecode_rules(ecosystem):
21
- rules_documentation[sourcecode_rule.id] = getattr(
22
- sourcecode_rule, "description", ""
23
- )
21
+ rules_documentation[sourcecode_rule.id] = sourcecode_rule.description
24
22
  return rules_documentation
25
23
 
26
24
 
guarddog/utils/config.py CHANGED
@@ -22,9 +22,20 @@ VERIFY_EXHAUSTIVE_DEPENDENCIES: bool = (
22
22
  This parameter specifies the location of the top packages cache
23
23
  - Default: guarddog/analyzer/metadata/resources
24
24
  """
25
- TOP_PACKAGES_CACHE_LOCATION = os.environ.get(
25
+ TOP_PACKAGES_CACHE_LOCATION: str = os.environ.get(
26
26
  "GUARDDOG_TOP_PACKAGES_CACHE_LOCATION",
27
27
  os.path.abspath(
28
28
  os.path.join(os.path.dirname(__file__), "../analyzer/metadata/resources")
29
29
  ),
30
30
  )
31
+
32
+ """
33
+ This parameter specifies comman separated file extentions that YARA rules will not run against
34
+ - Default: ini,md,rst,txt,lock,json,yaml,yml,toml,xml,html,rst,csv,sql,pdf,doc,docx,ppt,
35
+ pptx,xls,xlsx,odt,changelog,readme,makefile,dockerfile,pkg-info
36
+ """
37
+ YARA_EXT_EXCLUDE: list[str] = os.environ.get(
38
+ "GUARDDOG_YARA_EXT_EXCLUDE",
39
+ "ini,md,rst,txt,lock,json,yaml,yml,toml,xml,html,rst,csv,sql,pdf,doc,docx,ppt,"
40
+ "pptx,xls,xlsx,odt,changelog,readme,makefile,dockerfile,pkg-info",
41
+ ).split(",")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: guarddog
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: GuardDog is a CLI tool to Identify malicious PyPI packages
5
5
  Home-page: https://github.com/DataDog/guarddog
6
6
  License: Apache-2.0
@@ -24,7 +24,7 @@ Requires-Dist: pyyaml (>=6.0,<7.0)
24
24
  Requires-Dist: requests (>=2.29.0,<3.0.0)
25
25
  Requires-Dist: semantic-version (>=2.10.0,<3.0.0)
26
26
  Requires-Dist: semgrep (==1.67.0)
27
- Requires-Dist: setuptools (>=70.3.0,<71.0.0)
27
+ Requires-Dist: setuptools (>=70.3,<74.0)
28
28
  Requires-Dist: tarsafe (>=0.0.5,<0.0.6)
29
29
  Requires-Dist: termcolor (>=2.1.0,<3.0.0)
30
30
  Requires-Dist: urllib3 (==2.2.2)
@@ -1,7 +1,7 @@
1
1
  guarddog/__init__.py,sha256=reb53KZG9b1nFmsDxj2fropaOceOCyM9bVMUdmZ2wS8,227
2
2
  guarddog/__main__.py,sha256=GEdfW6I6g2c3H7bS0G43E4C-g7kXGUswzDCPFSwPgHY,246
3
3
  guarddog/analyzer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- guarddog/analyzer/analyzer.py,sha256=9GUlJGQxDnkoIs0Z04R9UpqDRUlsH-cLCEj8guNk4TU,12504
4
+ guarddog/analyzer/analyzer.py,sha256=9cbLp0mkLIJwNgWJ41LXtO4RdjZgwOrHT2AibLwcps4,12873
5
5
  guarddog/analyzer/metadata/__init__.py,sha256=LH4oLBr2vtkGdUXkb8C6JgN35h4NmpswxE7wOOmOdrQ,581
6
6
  guarddog/analyzer/metadata/bundled_binary.py,sha256=NKwAa1QKuOG79g9up5O2JpwfZzygJZwXrBZ6w_yemkU,1835
7
7
  guarddog/analyzer/metadata/deceptive_author.py,sha256=nuFyQqKpOuBKAJxpgbcjwXt3FVLzdmOg2mioUZ1D2TI,2789
@@ -39,7 +39,7 @@ guarddog/analyzer/metadata/resources/top_pypi_packages.json,sha256=R-0P8wdW0UFj0
39
39
  guarddog/analyzer/metadata/typosquatting.py,sha256=J3X-Acago_CLjgr_-BJ0FXd3JbCtG4s4fJzaT3a62RQ,5618
40
40
  guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py,sha256=qy8AZqbVxD1U3Q--h0FYV7lKPFNlhSzfZK0GwjvQxdQ,2343
41
41
  guarddog/analyzer/metadata/utils.py,sha256=bOrkELPza4ScUx1DfQxlqU-9DQeA5weISF42c0QCtls,1768
42
- guarddog/analyzer/sourcecode/__init__.py,sha256=eTT2b4QDn3CHuAvqmcUduZng0F7o7wgwkJYe9p4d-Og,3396
42
+ guarddog/analyzer/sourcecode/__init__.py,sha256=A2mbib72TDZkMIWb94Yq9lf_3xU5WOY_Gva0wHGqObE,3769
43
43
  guarddog/analyzer/sourcecode/bidirectional-characters.yml,sha256=WNQb3EzaT6GAbiMOV3lx592KNczbhIyUAAVXH9m2ygQ,1310
44
44
  guarddog/analyzer/sourcecode/clipboard-access.yml,sha256=B36E7xKtAVgwZ29UWtvZa1AJcyfrhvehbLo6tlJqffk,524
45
45
  guarddog/analyzer/sourcecode/cmd-overwrite.yml,sha256=l-tE3_G-LqCuCZnHab6v0PpCdMpoHPutBYcijeMZEA0,682
@@ -52,18 +52,18 @@ guarddog/analyzer/sourcecode/npm-dll-hijacking.yml,sha256=TPIXvWm8Ot9RVtDXWFmoNZ
52
52
  guarddog/analyzer/sourcecode/npm-exec-base64.yml,sha256=xNIwJAmGP19wvxH_w1ySgDsxrUU3GkrxRcFjjnB9fWM,576
53
53
  guarddog/analyzer/sourcecode/npm-exfiltrate-sensitive-data.yml,sha256=UP-GlZ5VykHWFebgIiHrkrQL9PdtjxR99_m2FZddmuw,3011
54
54
  guarddog/analyzer/sourcecode/npm-install-script.yml,sha256=0resBD7upjukUWsUEYv9sWLC1bCN8xD1pgCVDAxYa_I,1355
55
- guarddog/analyzer/sourcecode/npm-obfuscation.yml,sha256=ju-lMQA2gKEIBJBoAHo3o6GPMmgDLDO7CRZs6n0qNLk,1903
55
+ guarddog/analyzer/sourcecode/npm-obfuscation.yml,sha256=FAW9toHYU8adzKv5E68M29OQ4sLO89GwORsXpSr2-50,2026
56
56
  guarddog/analyzer/sourcecode/npm-serialize-environment.yml,sha256=gFpr58INp44ZwxYZlIHyzpOgbVMDLv1ZRPTGAczX5dw,835
57
57
  guarddog/analyzer/sourcecode/npm-silent-process-execution.yml,sha256=qnJHGesNPNpxGa8n2kQMpttLGck-6vZjI_SsweDyk7M,3513
58
58
  guarddog/analyzer/sourcecode/npm-steganography.yml,sha256=XH0udcriAQq_6WOHAG4TpIedw8GgKyWx9gsG_Q_Fki8,915
59
- guarddog/analyzer/sourcecode/obfuscation.yml,sha256=GaJwPSpP_d7MLiITvbrewoD0svYzTybqgH9X8vqvQUg,1041
59
+ guarddog/analyzer/sourcecode/obfuscation.yml,sha256=EwGwmQSftIvyDZ0BZZBT37kS7chtO99s36MfXeZ6hHw,1091
60
60
  guarddog/analyzer/sourcecode/shady-links.yml,sha256=vdY49_JXF12ACDlwKAZ-DSY3RuePerQXAClAFtXJXFY,2190
61
61
  guarddog/analyzer/sourcecode/silent-process-execution.yml,sha256=b6RjenMv7si7lXGak3uMmD7PMtQRuKPeJFggPW6UDNI,418
62
62
  guarddog/analyzer/sourcecode/steganography.yml,sha256=3ceO6SJhu4XpZEjfwelLdOxeZ4Ho1OgUjbcacwtOhR0,606
63
- guarddog/cli.py,sha256=P5pc_qkX_SHPHRoPnjjq7au2Vj7GYW906r7dh6ADzg4,13201
63
+ guarddog/cli.py,sha256=TPSKAb9b6AHZB3rqtqd4zZeoCYWMYhEdCOzwFvMJQYI,13185
64
64
  guarddog/ecosystems.py,sha256=kgM4v5E8PZBQksWgzuWwODS5R7P16klDi1SGWKLy1e0,380
65
65
  guarddog/reporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- guarddog/reporters/sarif.py,sha256=44Ixtfk9IhHHaU0MtRLsaBUnPl23kDRuCbtWJ84Z9LA,5866
66
+ guarddog/reporters/sarif.py,sha256=92HjvASZFyv5otB1qbsUqj6423tNgZbmSQS4qApffAw,5820
67
67
  guarddog/scanners/__init__.py,sha256=yEsDvThkIAhFkP59gSCFxYe5HTLmoSzfjAkDrtFT1LY,1628
68
68
  guarddog/scanners/go_package_scanner.py,sha256=OdCbwtjJow9AxEv34z7WBfgTamqKj5DxJh7dly_1NuY,2926
69
69
  guarddog/scanners/go_project_scanner.py,sha256=3D5dYSA7FVqc7IIM7uAHlCJZalshP_WhagWmOcYirog,2123
@@ -74,13 +74,13 @@ guarddog/scanners/pypi_project_scanner.py,sha256=NY-xO27r9xIGik7y-btoBKX54_VPSV_
74
74
  guarddog/scanners/scanner.py,sha256=7-OGs8GoRfyexEYOfVRSmV7P-7ZJDXtgj2Z1UrKGx30,10929
75
75
  guarddog/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  guarddog/utils/archives.py,sha256=jOXAhxZx-mTtpDidGGKxQg052CvaQOAVklvOeUn9HTQ,2593
77
- guarddog/utils/config.py,sha256=KCiGsaeautOo-1p0CSbHY5HWT8cC17-_8tqKH7hPa1E,883
77
+ guarddog/utils/config.py,sha256=Msz7altsmNKry0vBPtL2BJ_VdBXsBFZX5ksLvXc2ix4,1403
78
78
  guarddog/utils/exceptions.py,sha256=23Kzl3exqYK6X-bcGUeb8wPmSglWNX3GIDPkJ6lQzo4,54
79
79
  guarddog/utils/package_info.py,sha256=TFjE1xsGNf60SuHlIeDV2pzMUbogl5TKJdSzswat6jI,953
80
- guarddog-2.0.2.dist-info/LICENSE,sha256=w1aNZxHyoyOPJ4fSdiyrr06tCJZbTjCsH9K1uqeDVyU,11377
81
- guarddog-2.0.2.dist-info/LICENSE-3rdparty.csv,sha256=cS61ONZL_xlXaTMvQXyBEi3J3es-40Gg6G-6idoa5Qk,314
82
- guarddog-2.0.2.dist-info/METADATA,sha256=3aEJ6d9_rvoJt9fVnrPzIyVgGILNXPDLCsJQEBq9scE,1417
83
- guarddog-2.0.2.dist-info/NOTICE,sha256=nlyNt2IjG8IBoQkb7n6jszwAvmREpKAx0POzFO1s2JM,140
84
- guarddog-2.0.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
85
- guarddog-2.0.2.dist-info/entry_points.txt,sha256=vX2fvhnNdkbEL4pDzrH2NqjWVxeOaEYi0sJYmNgS2-s,45
86
- guarddog-2.0.2.dist-info/RECORD,,
80
+ guarddog-2.0.3.dist-info/LICENSE,sha256=w1aNZxHyoyOPJ4fSdiyrr06tCJZbTjCsH9K1uqeDVyU,11377
81
+ guarddog-2.0.3.dist-info/LICENSE-3rdparty.csv,sha256=cS61ONZL_xlXaTMvQXyBEi3J3es-40Gg6G-6idoa5Qk,314
82
+ guarddog-2.0.3.dist-info/METADATA,sha256=5knEEdt2PwaRult-To_Rf_KQyi8kGKcQogGj-9xKKeM,1413
83
+ guarddog-2.0.3.dist-info/NOTICE,sha256=nlyNt2IjG8IBoQkb7n6jszwAvmREpKAx0POzFO1s2JM,140
84
+ guarddog-2.0.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
85
+ guarddog-2.0.3.dist-info/entry_points.txt,sha256=vX2fvhnNdkbEL4pDzrH2NqjWVxeOaEYi0sJYmNgS2-s,45
86
+ guarddog-2.0.3.dist-info/RECORD,,