guarddog 2.4.0__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/analyzer.py +12 -2
- guarddog/analyzer/metadata/pypi/typosquatting.py +1 -1
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
- guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
- guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
- guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
- guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
- guarddog/analyzer/sourcecode/shady-links.yml +3 -1
- guarddog/cli.py +33 -107
- guarddog/reporters/__init__.py +28 -0
- guarddog/reporters/human_readable.py +138 -0
- guarddog/reporters/json.py +28 -0
- guarddog/reporters/reporter_factory.py +50 -0
- guarddog/reporters/sarif.py +179 -173
- guarddog/scanners/__init__.py +3 -0
- guarddog/scanners/github_action_project_scanner.py +140 -0
- guarddog/scanners/go_project_scanner.py +42 -5
- guarddog/scanners/npm_project_scanner.py +54 -10
- guarddog/scanners/pypi_project_scanner.py +60 -19
- guarddog/scanners/scanner.py +247 -165
- {guarddog-2.4.0.dist-info → guarddog-2.6.0.dist-info}/METADATA +3 -3
- {guarddog-2.4.0.dist-info → guarddog-2.6.0.dist-info}/RECORD +27 -20
- {guarddog-2.4.0.dist-info → guarddog-2.6.0.dist-info}/WHEEL +1 -1
- {guarddog-2.4.0.dist-info → guarddog-2.6.0.dist-info}/LICENSE +0 -0
- {guarddog-2.4.0.dist-info → guarddog-2.6.0.dist-info}/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.4.0.dist-info → guarddog-2.6.0.dist-info}/NOTICE +0 -0
- {guarddog-2.4.0.dist-info → guarddog-2.6.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
3
7
|
import requests
|
|
4
8
|
from semantic_version import NpmSpec, Version # type:ignore
|
|
5
9
|
|
|
6
|
-
from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
|
|
7
10
|
from guarddog.scanners.npm_package_scanner import NPMPackageScanner
|
|
8
|
-
from guarddog.scanners.scanner import ProjectScanner
|
|
11
|
+
from guarddog.scanners.scanner import Dependency, DependencyVersion, ProjectScanner
|
|
12
|
+
from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
|
|
9
13
|
|
|
10
14
|
log = logging.getLogger("guarddog")
|
|
11
15
|
|
|
@@ -21,7 +25,7 @@ class NPMRequirementsScanner(ProjectScanner):
|
|
|
21
25
|
def __init__(self) -> None:
|
|
22
26
|
super().__init__(NPMPackageScanner())
|
|
23
27
|
|
|
24
|
-
def parse_requirements(self, raw_requirements: str) ->
|
|
28
|
+
def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
|
|
25
29
|
"""
|
|
26
30
|
Parses requirements.txt specification and finds all valid
|
|
27
31
|
versions of each dependency
|
|
@@ -40,8 +44,8 @@ class NPMRequirementsScanner(ProjectScanner):
|
|
|
40
44
|
}
|
|
41
45
|
"""
|
|
42
46
|
package = json.loads(raw_requirements)
|
|
43
|
-
|
|
44
|
-
|
|
47
|
+
dependencies_attr = package["dependencies"] if "dependencies" in package else {}
|
|
48
|
+
dev_dependencies_attr = (
|
|
45
49
|
package["devDependencies"] if "devDependencies" in package else {}
|
|
46
50
|
)
|
|
47
51
|
|
|
@@ -82,23 +86,63 @@ class NPMRequirementsScanner(ProjectScanner):
|
|
|
82
86
|
return versions
|
|
83
87
|
|
|
84
88
|
merged = {} # type: dict[str, set[str]]
|
|
85
|
-
for package, selector in list(
|
|
86
|
-
|
|
89
|
+
for package, selector in list(dependencies_attr.items()) + list(
|
|
90
|
+
dev_dependencies_attr.items()
|
|
87
91
|
):
|
|
88
92
|
if package not in merged:
|
|
89
93
|
merged[package] = set()
|
|
90
94
|
merged[package].add(selector)
|
|
91
95
|
|
|
92
|
-
|
|
96
|
+
dependencies: List[Dependency] = []
|
|
93
97
|
for package, all_selectors in merged.items():
|
|
94
98
|
versions = set() # type: set[str]
|
|
95
99
|
for selector in all_selectors:
|
|
96
100
|
versions = versions.union(
|
|
97
101
|
get_matched_versions(find_all_versions(package), selector)
|
|
98
102
|
)
|
|
103
|
+
|
|
99
104
|
if len(versions) == 0:
|
|
100
105
|
log.error(f"Package/Version {package} not on NPM\n")
|
|
101
106
|
continue
|
|
102
107
|
|
|
103
|
-
|
|
104
|
-
|
|
108
|
+
idx = next(
|
|
109
|
+
iter(
|
|
110
|
+
[
|
|
111
|
+
ix
|
|
112
|
+
for ix, line in enumerate(raw_requirements.splitlines())
|
|
113
|
+
if package in line
|
|
114
|
+
]
|
|
115
|
+
),
|
|
116
|
+
0,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
dep_versions = list(
|
|
120
|
+
map(
|
|
121
|
+
lambda d: DependencyVersion(version=d, location=idx + 1),
|
|
122
|
+
versions,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# find the dep with the same name or create a new one
|
|
127
|
+
dep = next(
|
|
128
|
+
filter(
|
|
129
|
+
lambda d: d.name == package,
|
|
130
|
+
dependencies,
|
|
131
|
+
),
|
|
132
|
+
None,
|
|
133
|
+
)
|
|
134
|
+
if not dep:
|
|
135
|
+
dep = Dependency(name=package, versions=set())
|
|
136
|
+
dependencies.append(dep)
|
|
137
|
+
|
|
138
|
+
dep.versions.update(dep_versions)
|
|
139
|
+
|
|
140
|
+
return dependencies
|
|
141
|
+
|
|
142
|
+
def find_requirements(self, directory: str) -> list[str]:
|
|
143
|
+
requirement_files = []
|
|
144
|
+
for root, dirs, files in os.walk(directory):
|
|
145
|
+
for name in files:
|
|
146
|
+
if re.match(r"^package\.json$", name, flags=re.IGNORECASE):
|
|
147
|
+
requirement_files.append(os.path.join(root, name))
|
|
148
|
+
return requirement_files
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
import re
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
3
6
|
import pkg_resources
|
|
4
7
|
import requests
|
|
5
8
|
from packaging.specifiers import Specifier, Version
|
|
6
9
|
|
|
7
10
|
from guarddog.scanners.pypi_package_scanner import PypiPackageScanner
|
|
8
|
-
from guarddog.scanners.scanner import ProjectScanner
|
|
11
|
+
from guarddog.scanners.scanner import Dependency, DependencyVersion, ProjectScanner
|
|
9
12
|
from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
|
|
10
13
|
|
|
11
14
|
log = logging.getLogger("guarddog")
|
|
@@ -37,17 +40,20 @@ class PypiRequirementsScanner(ProjectScanner):
|
|
|
37
40
|
|
|
38
41
|
for line in requirements:
|
|
39
42
|
is_requirement = re.match(r"\w", line)
|
|
40
|
-
if is_requirement:
|
|
41
|
-
if "\\" in line:
|
|
42
|
-
line = line.replace("\\", "")
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
if not is_requirement:
|
|
45
|
+
sanitized_lines.append("") # empty line to keep the line number
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
if "\\" in line:
|
|
49
|
+
line = line.replace("\\", "")
|
|
50
|
+
|
|
51
|
+
stripped_line = line.strip()
|
|
52
|
+
sanitized_lines.append(stripped_line)
|
|
47
53
|
|
|
48
54
|
return sanitized_lines
|
|
49
55
|
|
|
50
|
-
def parse_requirements(self, raw_requirements: str) ->
|
|
56
|
+
def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
|
|
51
57
|
"""
|
|
52
58
|
Parses requirements.txt specification and finds all valid
|
|
53
59
|
versions of each dependency
|
|
@@ -57,17 +63,10 @@ class PypiRequirementsScanner(ProjectScanner):
|
|
|
57
63
|
|
|
58
64
|
Returns:
|
|
59
65
|
dict: mapping of dependencies to valid versions
|
|
60
|
-
|
|
61
|
-
ex.
|
|
62
|
-
{
|
|
63
|
-
....
|
|
64
|
-
<dependency-name>: [0.0.1, 0.0.2, ...],
|
|
65
|
-
...
|
|
66
|
-
}
|
|
67
66
|
"""
|
|
68
67
|
requirements = raw_requirements.splitlines()
|
|
69
68
|
sanitized_requirements = self._sanitize_requirements(requirements)
|
|
70
|
-
dependencies =
|
|
69
|
+
dependencies: List[Dependency] = []
|
|
71
70
|
|
|
72
71
|
def get_matched_versions(versions: set[str], semver_range: str) -> set[str]:
|
|
73
72
|
"""
|
|
@@ -77,8 +76,11 @@ class PypiRequirementsScanner(ProjectScanner):
|
|
|
77
76
|
|
|
78
77
|
# Filters to specified versions
|
|
79
78
|
try:
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
matching_versions = versions
|
|
80
|
+
if semver_range:
|
|
81
|
+
spec = Specifier(semver_range)
|
|
82
|
+
matching_versions = set(spec.filter(versions))
|
|
83
|
+
result = [Version(m) for m in matching_versions]
|
|
82
84
|
except ValueError:
|
|
83
85
|
# use it raw
|
|
84
86
|
return set([semver_range])
|
|
@@ -142,8 +144,47 @@ class PypiRequirementsScanner(ProjectScanner):
|
|
|
142
144
|
)
|
|
143
145
|
continue
|
|
144
146
|
|
|
145
|
-
|
|
147
|
+
idx = next(
|
|
148
|
+
iter(
|
|
149
|
+
[
|
|
150
|
+
ix
|
|
151
|
+
for ix, line in enumerate(requirements)
|
|
152
|
+
if str(requirement) in line
|
|
153
|
+
]
|
|
154
|
+
),
|
|
155
|
+
0,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
dep_versions = list(
|
|
159
|
+
map(
|
|
160
|
+
lambda d: DependencyVersion(version=d, location=idx + 1),
|
|
161
|
+
versions,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# find the dep with the same name or create a new one
|
|
166
|
+
dep = next(
|
|
167
|
+
filter(
|
|
168
|
+
lambda d: d.name == requirement.project_name,
|
|
169
|
+
dependencies,
|
|
170
|
+
),
|
|
171
|
+
None,
|
|
172
|
+
)
|
|
173
|
+
if not dep:
|
|
174
|
+
dep = Dependency(name=requirement.project_name, versions=set())
|
|
175
|
+
dependencies.append(dep)
|
|
176
|
+
|
|
177
|
+
dep.versions.update(dep_versions)
|
|
178
|
+
|
|
146
179
|
except Exception as e:
|
|
147
180
|
log.error(f"Received error {str(e)}")
|
|
148
181
|
|
|
149
182
|
return dependencies
|
|
183
|
+
|
|
184
|
+
def find_requirements(self, directory: str) -> list[str]:
|
|
185
|
+
requirement_files = []
|
|
186
|
+
for root, dirs, files in os.walk(directory):
|
|
187
|
+
for name in files:
|
|
188
|
+
if re.match(r"^requirements(-dev)?\.txt$", name, flags=re.IGNORECASE):
|
|
189
|
+
requirement_files.append(os.path.join(root, name))
|
|
190
|
+
return requirement_files
|
guarddog/scanners/scanner.py
CHANGED
|
@@ -2,11 +2,12 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
import sys
|
|
6
5
|
import tempfile
|
|
7
6
|
import typing
|
|
8
7
|
from abc import abstractmethod
|
|
9
8
|
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import List, Optional, Set, Tuple
|
|
10
11
|
|
|
11
12
|
import requests
|
|
12
13
|
|
|
@@ -21,183 +22,65 @@ def noop(arg: typing.Any) -> None:
|
|
|
21
22
|
pass
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _authenticate_by_access_token(self) -> tuple[str, str]:
|
|
30
|
-
"""
|
|
31
|
-
Gives GitHub authentication through access token
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
tuple[str, str]: username, personal access token
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
user = os.getenv("GIT_USERNAME")
|
|
38
|
-
personal_access_token = os.getenv("GH_TOKEN")
|
|
39
|
-
if not user or not personal_access_token:
|
|
40
|
-
log.error(
|
|
41
|
-
"""WARNING: Please set GIT_USERNAME (Github handle) and GH_TOKEN
|
|
42
|
-
(generate a personal access token in Github settings > developer)
|
|
43
|
-
as environment variables before proceeding."""
|
|
44
|
-
)
|
|
45
|
-
exit(1)
|
|
46
|
-
return (user, personal_access_token)
|
|
47
|
-
|
|
48
|
-
def scan_requirements(
|
|
49
|
-
self,
|
|
50
|
-
requirements: str,
|
|
51
|
-
rules=None,
|
|
52
|
-
callback: typing.Callable[[dict], None] = noop,
|
|
53
|
-
) -> dict:
|
|
54
|
-
"""
|
|
55
|
-
Reads the requirements.txt file and scans each possible
|
|
56
|
-
dependency and version
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
requirements (str): contents of requirements.txt file
|
|
60
|
-
rules: list of rules to apply
|
|
61
|
-
callback: callback to call for each result
|
|
62
|
-
|
|
63
|
-
Returns:
|
|
64
|
-
dict: mapping of dependencies to scan results
|
|
65
|
-
|
|
66
|
-
ex.
|
|
67
|
-
{
|
|
68
|
-
....
|
|
69
|
-
<dependency-name>: {
|
|
70
|
-
issues: ...,
|
|
71
|
-
results: {
|
|
72
|
-
...
|
|
73
|
-
}
|
|
74
|
-
},
|
|
75
|
-
...
|
|
76
|
-
}
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
def scan_single_dependency(dependency, version):
|
|
80
|
-
log.debug(f"Scanning {dependency} version {version}")
|
|
81
|
-
result = self.package_scanner.scan_remote(dependency, version, rules)
|
|
82
|
-
return {"dependency": dependency, "version": version, "result": result}
|
|
83
|
-
|
|
84
|
-
dependencies = self.parse_requirements(requirements)
|
|
85
|
-
num_workers = PARALLELISM
|
|
86
|
-
|
|
87
|
-
log.info(
|
|
88
|
-
f"Scanning using at most {num_workers} parallel worker threads\n"
|
|
89
|
-
)
|
|
90
|
-
with ThreadPoolExecutor(max_workers=num_workers) as pool:
|
|
91
|
-
try:
|
|
92
|
-
futures: typing.List[concurrent.futures.Future] = []
|
|
93
|
-
for dependency, versions in dependencies.items():
|
|
94
|
-
assert versions is None or len(versions) > 0
|
|
95
|
-
if versions is None:
|
|
96
|
-
# this will cause scan_remote to use the latest version
|
|
97
|
-
futures.append(
|
|
98
|
-
pool.submit(scan_single_dependency, dependency, None)
|
|
99
|
-
)
|
|
100
|
-
else:
|
|
101
|
-
futures.extend(
|
|
102
|
-
map(
|
|
103
|
-
lambda version: pool.submit(
|
|
104
|
-
scan_single_dependency, dependency, version
|
|
105
|
-
),
|
|
106
|
-
versions,
|
|
107
|
-
)
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
results = []
|
|
111
|
-
for future in concurrent.futures.as_completed(futures):
|
|
112
|
-
result = future.result()
|
|
113
|
-
if callback is not None:
|
|
114
|
-
callback(result)
|
|
115
|
-
results.append(result)
|
|
116
|
-
except KeyboardInterrupt:
|
|
117
|
-
log.warning("Received keyboard interrupt, cancelling scan\n")
|
|
118
|
-
pool.shutdown(wait=False, cancel_futures=True)
|
|
119
|
-
|
|
120
|
-
return results # type: ignore
|
|
25
|
+
@dataclass
|
|
26
|
+
class DependencyVersion:
|
|
27
|
+
"""
|
|
28
|
+
This class represents the identified dependency versions in a project,
|
|
29
|
+
usually defined in a specification file (requirements.txt, package.json, etc.)
|
|
121
30
|
|
|
122
|
-
|
|
123
|
-
""
|
|
124
|
-
|
|
31
|
+
Attributes:
|
|
32
|
+
version (str): The version of the dependency. e.g., "1.0.0"
|
|
33
|
+
location (int): This indicates the line number in the specification file where the dependency is defined.
|
|
34
|
+
"""
|
|
125
35
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
branch (str): branch containing requirements.txt
|
|
129
|
-
requirements_name (str, optional): name of requirements file.
|
|
130
|
-
Defaults to "requirements.txt".
|
|
36
|
+
version: str # the version number of the dependency
|
|
37
|
+
location: int
|
|
131
38
|
|
|
132
|
-
|
|
133
|
-
|
|
39
|
+
def __eq__(self, other):
|
|
40
|
+
if isinstance(other, str):
|
|
41
|
+
return self.version == other
|
|
42
|
+
if isinstance(other, DependencyVersion):
|
|
43
|
+
return self.version == other.version
|
|
44
|
+
return NotImplemented
|
|
134
45
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
....
|
|
138
|
-
<dependency-name>: {
|
|
139
|
-
issues: ...,
|
|
140
|
-
results: {
|
|
141
|
-
...
|
|
142
|
-
}
|
|
143
|
-
},
|
|
144
|
-
...
|
|
145
|
-
}
|
|
146
|
-
"""
|
|
46
|
+
def __hash__(self):
|
|
47
|
+
return hash(self.version)
|
|
147
48
|
|
|
148
|
-
|
|
149
|
-
|
|
49
|
+
def __repr__(self):
|
|
50
|
+
return f"DependencyVersion({self.version!r})"
|
|
150
51
|
|
|
151
|
-
req_url = f"{githubusercontent_url}/{branch}/{requirements_name}"
|
|
152
|
-
resp = requests.get(url=req_url, auth=token)
|
|
153
52
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
f"{req_url} does not exist. Check your link or branch name."
|
|
159
|
-
)
|
|
160
|
-
sys.exit(255)
|
|
53
|
+
@dataclass
|
|
54
|
+
class Dependency:
|
|
55
|
+
"""
|
|
56
|
+
This class represents a dependency in a project, usually defined in a specification file
|
|
161
57
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
58
|
+
Attributes:
|
|
59
|
+
name (str): The name of the dependency. e.g., "requests"
|
|
60
|
+
versions (Set[DependencyVersion]): A set of identified versions of the dependency.
|
|
61
|
+
"""
|
|
62
|
+
name: str
|
|
63
|
+
versions: Set[DependencyVersion]
|
|
167
64
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
65
|
+
def __eq__(self, other):
|
|
66
|
+
if isinstance(other, str):
|
|
67
|
+
return self.name == other
|
|
68
|
+
if isinstance(other, Dependency):
|
|
69
|
+
return self.name == other.name
|
|
70
|
+
return NotImplemented
|
|
172
71
|
|
|
173
|
-
|
|
174
|
-
|
|
72
|
+
def __repr__(self):
|
|
73
|
+
return f"Dependency({self.name!r})"
|
|
175
74
|
|
|
176
|
-
ex.
|
|
177
|
-
{
|
|
178
|
-
....
|
|
179
|
-
<dependency-name>: {
|
|
180
|
-
issues: ...,
|
|
181
|
-
results: {
|
|
182
|
-
...
|
|
183
|
-
}
|
|
184
|
-
},
|
|
185
|
-
...
|
|
186
|
-
}
|
|
187
|
-
"""
|
|
188
75
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
sys.exit(255)
|
|
76
|
+
@dataclass
|
|
77
|
+
class DependencyFile:
|
|
78
|
+
"""
|
|
79
|
+
This class represents a specification file for a project (requirements.txt, package.json, etc.)
|
|
80
|
+
"""
|
|
195
81
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
self, raw_requirements: str
|
|
199
|
-
) -> dict[str, set[str]]: # returns { package: version }
|
|
200
|
-
pass
|
|
82
|
+
file_path: str
|
|
83
|
+
dependencies: List[Dependency]
|
|
201
84
|
|
|
202
85
|
|
|
203
86
|
class PackageScanner:
|
|
@@ -324,3 +207,202 @@ class PackageScanner:
|
|
|
324
207
|
finally:
|
|
325
208
|
log.debug(f"Removing temporary archive file {archive_path}")
|
|
326
209
|
os.remove(archive_path)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class ProjectScanner:
|
|
213
|
+
def __init__(self, package_scanner: PackageScanner):
|
|
214
|
+
super().__init__()
|
|
215
|
+
self.package_scanner = package_scanner
|
|
216
|
+
|
|
217
|
+
def _authenticate_by_access_token(self) -> tuple[str, str]:
|
|
218
|
+
"""
|
|
219
|
+
Gives GitHub authentication through access token
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
tuple[str, str]: username, personal access token
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
user = os.getenv("GIT_USERNAME")
|
|
226
|
+
personal_access_token = os.getenv("GH_TOKEN")
|
|
227
|
+
if not user or not personal_access_token:
|
|
228
|
+
log.error(
|
|
229
|
+
"""WARNING: Please set GIT_USERNAME (Github handle) and GH_TOKEN
|
|
230
|
+
(generate a personal access token in Github settings > developer)
|
|
231
|
+
as environment variables before proceeding."""
|
|
232
|
+
)
|
|
233
|
+
exit(1)
|
|
234
|
+
return (user, personal_access_token)
|
|
235
|
+
|
|
236
|
+
def scan_dependencies(
|
|
237
|
+
self,
|
|
238
|
+
dependencies: List[Dependency],
|
|
239
|
+
rules=None,
|
|
240
|
+
callback: typing.Callable[[dict], None] = noop,
|
|
241
|
+
) -> list[dict]:
|
|
242
|
+
"""
|
|
243
|
+
scans each possible dependency and version supplied
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
dependencies a list of dependencies to scan
|
|
247
|
+
rules: list of rules to apply
|
|
248
|
+
callback: callback to call for each result
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
dict: mapping of dependencies to scan results
|
|
252
|
+
|
|
253
|
+
ex.
|
|
254
|
+
{
|
|
255
|
+
....
|
|
256
|
+
<dependency-name>: {
|
|
257
|
+
issues: ...,
|
|
258
|
+
results: {
|
|
259
|
+
...
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
...
|
|
263
|
+
}
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
def scan_single_dependency(dependency: str, version: Optional[str]) -> dict:
|
|
267
|
+
log.debug(f"Scanning {dependency} version {version}")
|
|
268
|
+
result = self.package_scanner.scan_remote(dependency, version, rules)
|
|
269
|
+
return {"dependency": dependency, "version": version, "result": result}
|
|
270
|
+
|
|
271
|
+
num_workers = PARALLELISM
|
|
272
|
+
|
|
273
|
+
log.info(f"Scanning using at most {num_workers} parallel worker threads\n")
|
|
274
|
+
with ThreadPoolExecutor(max_workers=num_workers) as pool:
|
|
275
|
+
try:
|
|
276
|
+
futures: typing.List[concurrent.futures.Future] = []
|
|
277
|
+
for dependency in dependencies:
|
|
278
|
+
versions = dependency.versions
|
|
279
|
+
if not versions:
|
|
280
|
+
# this will cause scan_remote to use the latest version
|
|
281
|
+
futures.append(
|
|
282
|
+
pool.submit(scan_single_dependency, dependency.name, None)
|
|
283
|
+
)
|
|
284
|
+
else:
|
|
285
|
+
futures.extend(
|
|
286
|
+
map(
|
|
287
|
+
lambda version: pool.submit(
|
|
288
|
+
scan_single_dependency,
|
|
289
|
+
dependency.name,
|
|
290
|
+
version.version,
|
|
291
|
+
),
|
|
292
|
+
versions,
|
|
293
|
+
)
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
results = []
|
|
297
|
+
for future in concurrent.futures.as_completed(futures):
|
|
298
|
+
result = future.result()
|
|
299
|
+
if callback is not None:
|
|
300
|
+
callback(result)
|
|
301
|
+
results.append(result)
|
|
302
|
+
except KeyboardInterrupt:
|
|
303
|
+
log.warning("Received keyboard interrupt, cancelling scan\n")
|
|
304
|
+
pool.shutdown(wait=False, cancel_futures=True)
|
|
305
|
+
|
|
306
|
+
return results
|
|
307
|
+
|
|
308
|
+
def scan_remote(
|
|
309
|
+
self, url: str, branch: str, requirements_name: str
|
|
310
|
+
) -> tuple[List[Dependency], list[dict]]:
|
|
311
|
+
"""
|
|
312
|
+
Scans remote requirements.txt file
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
url (str): url of the GitHub repo
|
|
316
|
+
branch (str): branch containing requirements.txt
|
|
317
|
+
requirements_name (str, optional): name of requirements file.
|
|
318
|
+
Defaults to "requirements.txt".
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
deps: list of dependencies to scan
|
|
322
|
+
results: mapping of dependencies to scan results
|
|
323
|
+
ex.
|
|
324
|
+
{
|
|
325
|
+
....
|
|
326
|
+
<dependency-name>: {
|
|
327
|
+
issues: ...,
|
|
328
|
+
results: {
|
|
329
|
+
...
|
|
330
|
+
}
|
|
331
|
+
},
|
|
332
|
+
...
|
|
333
|
+
}
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
token = self._authenticate_by_access_token()
|
|
337
|
+
githubusercontent_url = url.replace("github", "raw.githubusercontent")
|
|
338
|
+
req_url = f"{githubusercontent_url}/{branch}/{requirements_name}"
|
|
339
|
+
resp = requests.get(url=req_url, auth=token)
|
|
340
|
+
resp.raise_for_status()
|
|
341
|
+
dependencies = self.parse_requirements(resp.content.decode())
|
|
342
|
+
return dependencies, self.scan_dependencies(dependencies)
|
|
343
|
+
|
|
344
|
+
def scan_local(
|
|
345
|
+
self, path, rules=None, callback: typing.Callable[[dict], None] = noop
|
|
346
|
+
) -> Tuple[List[DependencyFile], list[dict]]:
|
|
347
|
+
"""
|
|
348
|
+
Scans a local requirements files (requirements.txt, package.json, etc.)
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
path (str): path to requirements file or directory to search
|
|
352
|
+
rules: list of rules to apply
|
|
353
|
+
callback: callback to call for each result
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
deps: list of dependencies to scan
|
|
357
|
+
results: mapping of dependencies to scan results
|
|
358
|
+
ex.
|
|
359
|
+
{
|
|
360
|
+
....
|
|
361
|
+
<dependency-name>: {
|
|
362
|
+
issues: ...,
|
|
363
|
+
results: {
|
|
364
|
+
...
|
|
365
|
+
}
|
|
366
|
+
},
|
|
367
|
+
...
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
"""
|
|
371
|
+
|
|
372
|
+
requirement_paths = []
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
if os.path.isfile(path):
|
|
376
|
+
requirement_paths.append(path)
|
|
377
|
+
elif os.path.isdir(path):
|
|
378
|
+
requirement_paths.extend(self.find_requirements(path))
|
|
379
|
+
else:
|
|
380
|
+
raise ValueError(f"unable to find file or directory {path}")
|
|
381
|
+
|
|
382
|
+
dep_files: List[DependencyFile] = []
|
|
383
|
+
|
|
384
|
+
for req in requirement_paths:
|
|
385
|
+
with open(req, "r") as f:
|
|
386
|
+
dep_files.append(
|
|
387
|
+
DependencyFile(
|
|
388
|
+
file_path=req,
|
|
389
|
+
dependencies=self.parse_requirements(f.read()),
|
|
390
|
+
)
|
|
391
|
+
)
|
|
392
|
+
deps_to_scan = [d for d_file in dep_files for d in d_file.dependencies]
|
|
393
|
+
results = self.scan_dependencies(deps_to_scan, rules, callback)
|
|
394
|
+
return dep_files, results
|
|
395
|
+
except Exception as e:
|
|
396
|
+
log.error(f"Error while scanning. Received {e}")
|
|
397
|
+
raise e
|
|
398
|
+
|
|
399
|
+
@abstractmethod
|
|
400
|
+
def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
|
|
401
|
+
pass
|
|
402
|
+
|
|
403
|
+
@abstractmethod
|
|
404
|
+
def find_requirements(
|
|
405
|
+
self,
|
|
406
|
+
directory: str,
|
|
407
|
+
) -> list[str]: # returns paths of files
|
|
408
|
+
pass
|