guarddog 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. guarddog/analyzer/analyzer.py +58 -20
  2. guarddog/analyzer/metadata/__init__.py +2 -0
  3. guarddog/analyzer/metadata/bundled_binary.py +6 -6
  4. guarddog/analyzer/metadata/deceptive_author.py +3 -1
  5. guarddog/analyzer/metadata/detector.py +7 -2
  6. guarddog/analyzer/metadata/empty_information.py +8 -3
  7. guarddog/analyzer/metadata/go/typosquatting.py +4 -3
  8. guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
  9. guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
  10. guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
  11. guarddog/analyzer/metadata/npm/empty_information.py +10 -7
  12. guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
  13. guarddog/analyzer/metadata/npm/release_zero.py +13 -5
  14. guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
  15. guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
  16. guarddog/analyzer/metadata/npm/utils.py +4 -5
  17. guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
  18. guarddog/analyzer/metadata/pypi/__init__.py +12 -6
  19. guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
  20. guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
  21. guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
  22. guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
  23. guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
  24. guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
  25. guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
  26. guarddog/analyzer/metadata/pypi/typosquatting.py +21 -8
  27. guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
  28. guarddog/analyzer/metadata/pypi/utils.py +1 -4
  29. guarddog/analyzer/metadata/release_zero.py +1 -1
  30. guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
  31. guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
  32. guarddog/analyzer/metadata/typosquatting.py +12 -8
  33. guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
  34. guarddog/analyzer/sourcecode/__init__.py +34 -7
  35. guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
  36. guarddog/analyzer/sourcecode/code-execution.yml +1 -0
  37. guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
  38. guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
  39. guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
  40. guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
  41. guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
  42. guarddog/analyzer/sourcecode/shady-links.yml +2 -0
  43. guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
  44. guarddog/analyzer/sourcecode/unicode.yml +75 -0
  45. guarddog/cli.py +33 -107
  46. guarddog/ecosystems.py +3 -0
  47. guarddog/reporters/__init__.py +28 -0
  48. guarddog/reporters/human_readable.py +138 -0
  49. guarddog/reporters/json.py +28 -0
  50. guarddog/reporters/reporter_factory.py +50 -0
  51. guarddog/reporters/sarif.py +179 -173
  52. guarddog/scanners/__init__.py +5 -0
  53. guarddog/scanners/extension_scanner.py +152 -0
  54. guarddog/scanners/github_action_project_scanner.py +47 -8
  55. guarddog/scanners/github_action_scanner.py +6 -2
  56. guarddog/scanners/go_project_scanner.py +42 -5
  57. guarddog/scanners/npm_package_scanner.py +12 -4
  58. guarddog/scanners/npm_project_scanner.py +54 -10
  59. guarddog/scanners/pypi_package_scanner.py +9 -3
  60. guarddog/scanners/pypi_project_scanner.py +67 -29
  61. guarddog/scanners/scanner.py +247 -164
  62. guarddog/utils/archives.py +2 -1
  63. guarddog/utils/package_info.py +3 -1
  64. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/METADATA +11 -10
  65. guarddog-2.7.0.dist-info/RECORD +100 -0
  66. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/WHEEL +1 -1
  67. guarddog-2.5.0.dist-info/RECORD +0 -90
  68. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/entry_points.txt +0 -0
  69. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE +0 -0
  70. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
  71. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/NOTICE +0 -0
@@ -1,9 +1,12 @@
1
1
  import logging
2
+ import os
3
+ import re
2
4
  from dataclasses import dataclass
3
5
  from typing import List
4
6
 
5
7
  from guarddog.scanners.go_package_scanner import GoModuleScanner
6
8
  from guarddog.scanners.scanner import ProjectScanner
9
+ from guarddog.scanners.scanner import Dependency, DependencyVersion
7
10
 
8
11
  log = logging.getLogger("guarddog")
9
12
 
@@ -26,13 +29,39 @@ class GoDependenciesScanner(ProjectScanner):
26
29
  def __init__(self) -> None:
27
30
  super().__init__(GoModuleScanner())
28
31
 
29
- def parse_requirements(self, raw_requirements: str) -> dict[str, set[str]]:
32
+ def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
30
33
  main_mod = self.parse_go_mod_file(raw_requirements)
31
34
 
32
- return {
33
- requirement.module: set([requirement.version])
34
- for requirement in main_mod.requirements
35
- }
35
+ dependencies: List[Dependency] = []
36
+ for dependency in main_mod.requirements:
37
+ version = dependency.version
38
+ name = dependency.module
39
+ idx = next(
40
+ iter(
41
+ [
42
+ ix
43
+ for ix, line in enumerate(raw_requirements.splitlines())
44
+ if name in line
45
+ ]
46
+ ),
47
+ 0,
48
+ )
49
+
50
+ dep_versions = [DependencyVersion(version=version, location=idx + 1)]
51
+
52
+ dep = next(
53
+ filter(
54
+ lambda d: d.name == name,
55
+ dependencies,
56
+ ),
57
+ None,
58
+ )
59
+ if not dep:
60
+ dep = Dependency(name=name, versions=set())
61
+ dependencies.append(dep)
62
+
63
+ dep.versions.update(dep_versions)
64
+ return dependencies
36
65
 
37
66
  # Read https://go.dev/ref/mod#go-mod-file to learn more about the go.mod syntax
38
67
  def parse_go_mod_file(self, go_mod_content: str) -> GoModule:
@@ -66,3 +95,11 @@ class GoDependenciesScanner(ProjectScanner):
66
95
  # TODO: support exclude, replace and retract statements
67
96
 
68
97
  return GoModule(module, go, toolchain, requirements)
98
+
99
+ def find_requirements(self, directory: str) -> list[str]:
100
+ requirement_files = []
101
+ for root, dirs, files in os.walk(directory):
102
+ for name in files:
103
+ if re.match(r"^go\.mod$", name, flags=re.IGNORECASE):
104
+ requirement_files.append(os.path.join(root, name))
105
+ return requirement_files
@@ -17,9 +17,13 @@ class NPMPackageScanner(PackageScanner):
17
17
  def __init__(self) -> None:
18
18
  super().__init__(Analyzer(ECOSYSTEM.NPM))
19
19
 
20
- def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
20
+ def download_and_get_package_info(
21
+ self, directory: str, package_name: str, version=None
22
+ ) -> typing.Tuple[dict, str]:
21
23
  git_target = None
22
- if urlparse(package_name).hostname is not None and package_name.endswith('.git'):
24
+ if urlparse(package_name).hostname is not None and package_name.endswith(
25
+ ".git"
26
+ ):
23
27
  git_target = package_name
24
28
 
25
29
  if not package_name.startswith("@") and package_name.count("/") == 1:
@@ -33,7 +37,9 @@ class NPMPackageScanner(PackageScanner):
33
37
  response = requests.get(url)
34
38
 
35
39
  if response.status_code != 200:
36
- raise Exception("Received status code: " + str(response.status_code) + " from npm")
40
+ raise Exception(
41
+ "Received status code: " + str(response.status_code) + " from npm"
42
+ )
37
43
  data = response.json()
38
44
  if "name" not in data:
39
45
  raise Exception(f"Error retrieving package: {package_name}")
@@ -45,7 +51,9 @@ class NPMPackageScanner(PackageScanner):
45
51
 
46
52
  tarball_url = details["dist"]["tarball"]
47
53
  file_extension = pathlib.Path(tarball_url).suffix
48
- zippath = os.path.join(directory, package_name.replace("/", "-") + file_extension)
54
+ zippath = os.path.join(
55
+ directory, package_name.replace("/", "-") + file_extension
56
+ )
49
57
  unzippedpath = zippath.removesuffix(file_extension)
50
58
  self.download_compressed(tarball_url, zippath, unzippedpath)
51
59
 
@@ -1,11 +1,15 @@
1
1
  import json
2
2
  import logging
3
+ import os
4
+ import re
5
+ from typing import List
6
+
3
7
  import requests
4
8
  from semantic_version import NpmSpec, Version # type:ignore
5
9
 
6
- from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
7
10
  from guarddog.scanners.npm_package_scanner import NPMPackageScanner
8
- from guarddog.scanners.scanner import ProjectScanner
11
+ from guarddog.scanners.scanner import Dependency, DependencyVersion, ProjectScanner
12
+ from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
9
13
 
10
14
  log = logging.getLogger("guarddog")
11
15
 
@@ -21,7 +25,7 @@ class NPMRequirementsScanner(ProjectScanner):
21
25
  def __init__(self) -> None:
22
26
  super().__init__(NPMPackageScanner())
23
27
 
24
- def parse_requirements(self, raw_requirements: str) -> dict:
28
+ def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
25
29
  """
26
30
  Parses requirements.txt specification and finds all valid
27
31
  versions of each dependency
@@ -40,8 +44,8 @@ class NPMRequirementsScanner(ProjectScanner):
40
44
  }
41
45
  """
42
46
  package = json.loads(raw_requirements)
43
- dependencies = package["dependencies"] if "dependencies" in package else {}
44
- dev_dependencies = (
47
+ dependencies_attr = package["dependencies"] if "dependencies" in package else {}
48
+ dev_dependencies_attr = (
45
49
  package["devDependencies"] if "devDependencies" in package else {}
46
50
  )
47
51
 
@@ -82,23 +86,63 @@ class NPMRequirementsScanner(ProjectScanner):
82
86
  return versions
83
87
 
84
88
  merged = {} # type: dict[str, set[str]]
85
- for package, selector in list(dependencies.items()) + list(
86
- dev_dependencies.items()
89
+ for package, selector in list(dependencies_attr.items()) + list(
90
+ dev_dependencies_attr.items()
87
91
  ):
88
92
  if package not in merged:
89
93
  merged[package] = set()
90
94
  merged[package].add(selector)
91
95
 
92
- results = {}
96
+ dependencies: List[Dependency] = []
93
97
  for package, all_selectors in merged.items():
94
98
  versions = set() # type: set[str]
95
99
  for selector in all_selectors:
96
100
  versions = versions.union(
97
101
  get_matched_versions(find_all_versions(package), selector)
98
102
  )
103
+
99
104
  if len(versions) == 0:
100
105
  log.error(f"Package/Version {package} not on NPM\n")
101
106
  continue
102
107
 
103
- results[package] = versions
104
- return results
108
+ idx = next(
109
+ iter(
110
+ [
111
+ ix
112
+ for ix, line in enumerate(raw_requirements.splitlines())
113
+ if package in line
114
+ ]
115
+ ),
116
+ 0,
117
+ )
118
+
119
+ dep_versions = list(
120
+ map(
121
+ lambda d: DependencyVersion(version=d, location=idx + 1),
122
+ versions,
123
+ )
124
+ )
125
+
126
+ # find the dep with the same name or create a new one
127
+ dep = next(
128
+ filter(
129
+ lambda d: d.name == package,
130
+ dependencies,
131
+ ),
132
+ None,
133
+ )
134
+ if not dep:
135
+ dep = Dependency(name=package, versions=set())
136
+ dependencies.append(dep)
137
+
138
+ dep.versions.update(dep_versions)
139
+
140
+ return dependencies
141
+
142
+ def find_requirements(self, directory: str) -> list[str]:
143
+ requirement_files = []
144
+ for root, dirs, files in os.walk(directory):
145
+ for name in files:
146
+ if re.match(r"^package\.json$", name, flags=re.IGNORECASE):
147
+ requirement_files.append(os.path.join(root, name))
148
+ return requirement_files
@@ -12,7 +12,9 @@ class PypiPackageScanner(PackageScanner):
12
12
  def __init__(self) -> None:
13
13
  super().__init__(Analyzer(ECOSYSTEM.PYPI))
14
14
 
15
- def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
15
+ def download_and_get_package_info(
16
+ self, directory: str, package_name: str, version=None
17
+ ) -> typing.Tuple[dict, str]:
16
18
  extract_dir = self.download_package(package_name, directory, version)
17
19
  return get_package_info(package_name), extract_dir
18
20
 
@@ -40,7 +42,9 @@ class PypiPackageScanner(PackageScanner):
40
42
  version = data["info"]["version"]
41
43
 
42
44
  if version not in releases:
43
- raise Exception(f"Version {version} for package {package_name} doesn't exist.")
45
+ raise Exception(
46
+ f"Version {version} for package {package_name} doesn't exist."
47
+ )
44
48
 
45
49
  files = releases[version]
46
50
  url, file_extension = None, None
@@ -52,7 +56,9 @@ class PypiPackageScanner(PackageScanner):
52
56
  break
53
57
 
54
58
  if not (url and file_extension):
55
- raise Exception(f"Compressed file for {package_name} does not exist on PyPI.")
59
+ raise Exception(
60
+ f"Compressed file for {package_name} does not exist on PyPI."
61
+ )
56
62
 
57
63
  # Path to compressed package
58
64
  zippath = os.path.join(directory, package_name + file_extension)
@@ -1,11 +1,14 @@
1
1
  import logging
2
+ import os
2
3
  import re
3
- import pkg_resources
4
+ from typing import List
5
+
6
+ from packaging.requirements import Requirement
4
7
  import requests
5
8
  from packaging.specifiers import Specifier, Version
6
9
 
7
10
  from guarddog.scanners.pypi_package_scanner import PypiPackageScanner
8
- from guarddog.scanners.scanner import ProjectScanner
11
+ from guarddog.scanners.scanner import Dependency, DependencyVersion, ProjectScanner
9
12
  from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
10
13
 
11
14
  log = logging.getLogger("guarddog")
@@ -37,17 +40,20 @@ class PypiRequirementsScanner(ProjectScanner):
37
40
 
38
41
  for line in requirements:
39
42
  is_requirement = re.match(r"\w", line)
40
- if is_requirement:
41
- if "\\" in line:
42
- line = line.replace("\\", "")
43
43
 
44
- stripped_line = line.strip()
45
- if len(stripped_line) > 0:
46
- sanitized_lines.append(stripped_line)
44
+ if not is_requirement:
45
+ sanitized_lines.append("") # empty line to keep the line number
46
+ continue
47
+
48
+ if "\\" in line:
49
+ line = line.replace("\\", "")
50
+
51
+ stripped_line = line.strip()
52
+ sanitized_lines.append(stripped_line)
47
53
 
48
54
  return sanitized_lines
49
55
 
50
- def parse_requirements(self, raw_requirements: str) -> dict[str, set[str]]:
56
+ def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
51
57
  """
52
58
  Parses requirements.txt specification and finds all valid
53
59
  versions of each dependency
@@ -57,17 +63,10 @@ class PypiRequirementsScanner(ProjectScanner):
57
63
 
58
64
  Returns:
59
65
  dict: mapping of dependencies to valid versions
60
-
61
- ex.
62
- {
63
- ....
64
- <dependency-name>: [0.0.1, 0.0.2, ...],
65
- ...
66
- }
67
66
  """
68
67
  requirements = raw_requirements.splitlines()
69
68
  sanitized_requirements = self._sanitize_requirements(requirements)
70
- dependencies = {}
69
+ dependencies: List[Dependency] = []
71
70
 
72
71
  def get_matched_versions(versions: set[str], semver_range: str) -> set[str]:
73
72
  """
@@ -77,8 +76,11 @@ class PypiRequirementsScanner(ProjectScanner):
77
76
 
78
77
  # Filters to specified versions
79
78
  try:
80
- spec = Specifier(semver_range)
81
- result = [Version(m) for m in spec.filter(versions)]
79
+ matching_versions = versions
80
+ if semver_range:
81
+ spec = Specifier(semver_range)
82
+ matching_versions = set(spec.filter(versions))
83
+ result = [Version(m) for m in matching_versions]
82
84
  except ValueError:
83
85
  # use it raw
84
86
  return set([semver_range])
@@ -109,12 +111,11 @@ class PypiRequirementsScanner(ProjectScanner):
109
111
  """
110
112
  This helper function yields one valid requirement line at a time
111
113
  """
112
- parsed = pkg_resources.parse_requirements(req)
113
- while True:
114
+ for req_line in req:
115
+ if not req_line.strip():
116
+ continue
114
117
  try:
115
- yield next(parsed)
116
- except StopIteration:
117
- break
118
+ yield Requirement(req_line)
118
119
  except Exception as e:
119
120
  log.error(
120
121
  f"Error when parsing requirements, received error {str(e)}. This entry will be "
@@ -128,7 +129,7 @@ class PypiRequirementsScanner(ProjectScanner):
128
129
  continue
129
130
 
130
131
  versions = get_matched_versions(
131
- find_all_versions(requirement.project_name),
132
+ find_all_versions(requirement.name),
132
133
  (
133
134
  requirement.url
134
135
  if requirement.url
@@ -137,13 +138,50 @@ class PypiRequirementsScanner(ProjectScanner):
137
138
  )
138
139
 
139
140
  if len(versions) == 0:
140
- log.error(
141
- f"Package/Version {requirement.project_name} not on PyPI\n"
142
- )
141
+ log.error(f"Package/Version {requirement.name} not on PyPI\n")
143
142
  continue
144
143
 
145
- dependencies[requirement.project_name] = versions
144
+ idx = next(
145
+ iter(
146
+ [
147
+ ix
148
+ for ix, line in enumerate(requirements)
149
+ if str(requirement) in line
150
+ ]
151
+ ),
152
+ 0,
153
+ )
154
+
155
+ dep_versions = list(
156
+ map(
157
+ lambda d: DependencyVersion(version=d, location=idx + 1),
158
+ versions,
159
+ )
160
+ )
161
+
162
+ # find the dep with the same name or create a new one
163
+ dep = next(
164
+ filter(
165
+ lambda d: d.name == requirement.name,
166
+ dependencies,
167
+ ),
168
+ None,
169
+ )
170
+ if not dep:
171
+ dep = Dependency(name=requirement.name, versions=set())
172
+ dependencies.append(dep)
173
+
174
+ dep.versions.update(dep_versions)
175
+
146
176
  except Exception as e:
147
177
  log.error(f"Received error {str(e)}")
148
178
 
149
179
  return dependencies
180
+
181
+ def find_requirements(self, directory: str) -> list[str]:
182
+ requirement_files = []
183
+ for root, dirs, files in os.walk(directory):
184
+ for name in files:
185
+ if re.match(r"^requirements(-dev)?\.txt$", name, flags=re.IGNORECASE):
186
+ requirement_files.append(os.path.join(root, name))
187
+ return requirement_files