guarddog 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/analyzer.py +58 -20
- guarddog/analyzer/metadata/__init__.py +2 -0
- guarddog/analyzer/metadata/bundled_binary.py +6 -6
- guarddog/analyzer/metadata/deceptive_author.py +3 -1
- guarddog/analyzer/metadata/detector.py +7 -2
- guarddog/analyzer/metadata/empty_information.py +8 -3
- guarddog/analyzer/metadata/go/typosquatting.py +4 -3
- guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
- guarddog/analyzer/metadata/npm/empty_information.py +10 -7
- guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/npm/release_zero.py +13 -5
- guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
- guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
- guarddog/analyzer/metadata/npm/utils.py +4 -5
- guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
- guarddog/analyzer/metadata/pypi/__init__.py +12 -6
- guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
- guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
- guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
- guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
- guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
- guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
- guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
- guarddog/analyzer/metadata/pypi/typosquatting.py +21 -8
- guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
- guarddog/analyzer/metadata/pypi/utils.py +1 -4
- guarddog/analyzer/metadata/release_zero.py +1 -1
- guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
- guarddog/analyzer/metadata/typosquatting.py +12 -8
- guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
- guarddog/analyzer/sourcecode/__init__.py +34 -7
- guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
- guarddog/analyzer/sourcecode/code-execution.yml +1 -0
- guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
- guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
- guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
- guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
- guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
- guarddog/analyzer/sourcecode/shady-links.yml +2 -0
- guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
- guarddog/analyzer/sourcecode/unicode.yml +75 -0
- guarddog/cli.py +33 -107
- guarddog/ecosystems.py +3 -0
- guarddog/reporters/__init__.py +28 -0
- guarddog/reporters/human_readable.py +138 -0
- guarddog/reporters/json.py +28 -0
- guarddog/reporters/reporter_factory.py +50 -0
- guarddog/reporters/sarif.py +179 -173
- guarddog/scanners/__init__.py +5 -0
- guarddog/scanners/extension_scanner.py +152 -0
- guarddog/scanners/github_action_project_scanner.py +47 -8
- guarddog/scanners/github_action_scanner.py +6 -2
- guarddog/scanners/go_project_scanner.py +42 -5
- guarddog/scanners/npm_package_scanner.py +12 -4
- guarddog/scanners/npm_project_scanner.py +54 -10
- guarddog/scanners/pypi_package_scanner.py +9 -3
- guarddog/scanners/pypi_project_scanner.py +67 -29
- guarddog/scanners/scanner.py +247 -164
- guarddog/utils/archives.py +2 -1
- guarddog/utils/package_info.py +3 -1
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/METADATA +11 -10
- guarddog-2.7.0.dist-info/RECORD +100 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/WHEEL +1 -1
- guarddog-2.5.0.dist-info/RECORD +0 -90
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/entry_points.txt +0 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE +0 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/NOTICE +0 -0
guarddog/reporters/sarif.py
CHANGED
|
@@ -4,181 +4,187 @@ import json
|
|
|
4
4
|
from guarddog.analyzer.sourcecode import get_sourcecode_rules
|
|
5
5
|
from guarddog.analyzer.metadata import get_metadata_detectors
|
|
6
6
|
from guarddog.ecosystems import ECOSYSTEM
|
|
7
|
+
from guarddog.reporters import BaseReporter
|
|
8
|
+
from guarddog.scanners.scanner import DependencyFile
|
|
9
|
+
from typing import List
|
|
10
|
+
from guarddog.reporters.human_readable import HumanReadableReporter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SarifReporter(BaseReporter):
|
|
14
|
+
"""
|
|
15
|
+
Sarif is a class that formats and prints scan results in the SARIF format.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def render_verify(
|
|
20
|
+
dependency_files: List[DependencyFile],
|
|
21
|
+
rule_names: list[str],
|
|
22
|
+
scan_results: list[dict],
|
|
23
|
+
ecosystem: ECOSYSTEM,
|
|
24
|
+
) -> tuple[str, str]:
|
|
25
|
+
"""
|
|
26
|
+
Report the scans results in the SARIF format.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
scan_results (dict): The scan results to be reported.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def build_rules_help_list() -> dict:
|
|
33
|
+
"""
|
|
34
|
+
Builds a dict with the names of all available rules and their documentation
|
|
35
|
+
@return: dict[name_of_rule, rule_description]
|
|
36
|
+
"""
|
|
37
|
+
rules_documentation = {}
|
|
38
|
+
for ecosystem in ECOSYSTEM:
|
|
39
|
+
rules = get_metadata_detectors(ecosystem)
|
|
40
|
+
for name, instance in rules.items():
|
|
41
|
+
detector_class = instance.__class__.__base__
|
|
42
|
+
rules_documentation[name] = detector_class.__doc__
|
|
43
|
+
for sourcecode_rule in get_sourcecode_rules(ecosystem):
|
|
44
|
+
rules_documentation[sourcecode_rule.id] = (
|
|
45
|
+
sourcecode_rule.description
|
|
46
|
+
)
|
|
47
|
+
return rules_documentation
|
|
48
|
+
|
|
49
|
+
def get_sarif_log(runs):
|
|
50
|
+
"""
|
|
51
|
+
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#sariflog-object
|
|
52
|
+
"""
|
|
53
|
+
return {
|
|
54
|
+
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
|
|
55
|
+
"version": "2.1.0",
|
|
56
|
+
"runs": runs,
|
|
57
|
+
}
|
|
7
58
|
|
|
59
|
+
def get_run(results, driver):
|
|
60
|
+
"""
|
|
61
|
+
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#run-object
|
|
62
|
+
"""
|
|
63
|
+
return {"tool": {"driver": driver}, "results": results}
|
|
64
|
+
|
|
65
|
+
def get_driver(rules, ecosystem: str):
|
|
66
|
+
"""
|
|
67
|
+
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#toolcomponent-object
|
|
68
|
+
"""
|
|
69
|
+
return {
|
|
70
|
+
"name": f"GuardDog-{ecosystem}",
|
|
71
|
+
"informationUri": "https://github.com/DataDog/guarddog",
|
|
72
|
+
"rules": rules,
|
|
73
|
+
}
|
|
8
74
|
|
|
9
|
-
def
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
"""
|
|
27
|
-
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#sariflog-object
|
|
28
|
-
"""
|
|
29
|
-
return {
|
|
30
|
-
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
|
|
31
|
-
"version": "2.1.0",
|
|
32
|
-
"runs": runs
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def get_run(results, driver):
|
|
37
|
-
"""
|
|
38
|
-
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#run-object
|
|
39
|
-
"""
|
|
40
|
-
return {
|
|
41
|
-
"tool": {
|
|
42
|
-
"driver": driver
|
|
43
|
-
},
|
|
44
|
-
"results": results
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def get_driver(rules, ecosystem: str):
|
|
49
|
-
"""
|
|
50
|
-
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#toolcomponent-object
|
|
51
|
-
"""
|
|
52
|
-
return {
|
|
53
|
-
"name": f"GuardDog-{ecosystem}",
|
|
54
|
-
"rules": rules
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def get_rule(rule_name: str, rules_documentation) -> dict:
|
|
59
|
-
"""
|
|
60
|
-
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#reportingdescriptor-object
|
|
61
|
-
"""
|
|
62
|
-
message = rules_documentation[rule_name] if rules_documentation[rule_name] is not None else ""
|
|
63
|
-
return {
|
|
64
|
-
"id": rule_name,
|
|
65
|
-
"defaultConfiguration": {
|
|
66
|
-
"level": "warning"
|
|
67
|
-
},
|
|
68
|
-
"shortDescription": {
|
|
69
|
-
"text": f"GuardDog rule: {rule_name}"
|
|
70
|
-
},
|
|
71
|
-
"fullDescription": {
|
|
72
|
-
"text": message
|
|
73
|
-
},
|
|
74
|
-
"help": {
|
|
75
|
-
"text": message,
|
|
76
|
-
"markdown": message
|
|
77
|
-
},
|
|
78
|
-
"properties": {
|
|
79
|
-
"precision": "medium"
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def get_result(rule_name, locations, text, partial_fingerprints):
|
|
85
|
-
"""
|
|
86
|
-
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#result-object
|
|
87
|
-
"""
|
|
88
|
-
return {
|
|
89
|
-
"ruleId": rule_name,
|
|
90
|
-
"message": {
|
|
91
|
-
"text": text
|
|
92
|
-
},
|
|
93
|
-
"locations": locations,
|
|
94
|
-
"partialFingerprints": partial_fingerprints
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def get_location(physical_location):
|
|
99
|
-
"""
|
|
100
|
-
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#location-object
|
|
101
|
-
"""
|
|
102
|
-
return {
|
|
103
|
-
"physicalLocation": physical_location
|
|
104
|
-
}
|
|
75
|
+
def get_rule(rule_name: str, rules_documentation) -> dict:
|
|
76
|
+
"""
|
|
77
|
+
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#reportingdescriptor-object
|
|
78
|
+
"""
|
|
79
|
+
message = (
|
|
80
|
+
rules_documentation[rule_name]
|
|
81
|
+
if rules_documentation[rule_name] is not None
|
|
82
|
+
else ""
|
|
83
|
+
)
|
|
84
|
+
return {
|
|
85
|
+
"id": rule_name,
|
|
86
|
+
"defaultConfiguration": {"level": "warning"},
|
|
87
|
+
"shortDescription": {"text": f"GuardDog rule: {rule_name}"},
|
|
88
|
+
"fullDescription": {"text": message},
|
|
89
|
+
"help": {"text": message, "markdown": message},
|
|
90
|
+
"properties": {"precision": "medium"},
|
|
91
|
+
}
|
|
105
92
|
|
|
93
|
+
def get_result(rule_name, locations, text, partial_fingerprints):
|
|
94
|
+
"""
|
|
95
|
+
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#result-object
|
|
96
|
+
"""
|
|
97
|
+
return {
|
|
98
|
+
"ruleId": rule_name,
|
|
99
|
+
"message": {"text": text},
|
|
100
|
+
"locations": locations,
|
|
101
|
+
"partialFingerprints": partial_fingerprints,
|
|
102
|
+
}
|
|
106
103
|
|
|
107
|
-
def
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def get_region(
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
) -> str:
|
|
143
|
-
rules_documentation = build_rules_help_list()
|
|
144
|
-
rules = list(map(
|
|
145
|
-
lambda s: get_rule(s, rules_documentation),
|
|
146
|
-
rule_names
|
|
147
|
-
))
|
|
148
|
-
driver = get_driver(rules, ecosystem.value)
|
|
149
|
-
results = []
|
|
150
|
-
|
|
151
|
-
with open(package_path, "r") as file:
|
|
152
|
-
package_raw = file.read()
|
|
153
|
-
|
|
154
|
-
for entry in scan_results:
|
|
155
|
-
if entry["result"]["issues"] == 0:
|
|
156
|
-
continue
|
|
157
|
-
|
|
158
|
-
region = get_region(package_raw, entry["dependency"])
|
|
159
|
-
uri = package_path[2:] if package_path.startswith('./') else package_path
|
|
160
|
-
physical_location = get_physical_location(uri, region)
|
|
161
|
-
location = get_location(physical_location)
|
|
162
|
-
scan_result_details = entry["result"]["results"]
|
|
163
|
-
package = entry["dependency"]
|
|
164
|
-
version = entry["version"]
|
|
165
|
-
for rule_name in scan_result_details.keys():
|
|
166
|
-
if scan_result_details[rule_name] is None or len(scan_result_details[rule_name]) == 0:
|
|
104
|
+
def get_location(physical_location):
|
|
105
|
+
"""
|
|
106
|
+
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#location-object
|
|
107
|
+
"""
|
|
108
|
+
return {"physicalLocation": physical_location}
|
|
109
|
+
|
|
110
|
+
def get_physical_location(uri, region):
|
|
111
|
+
"""
|
|
112
|
+
https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#physicallocation-object
|
|
113
|
+
"""
|
|
114
|
+
return {"artifactLocation": {"uri": uri}, "region": region}
|
|
115
|
+
|
|
116
|
+
def get_region(
|
|
117
|
+
dependency_files: List[DependencyFile], package: str
|
|
118
|
+
) -> tuple[DependencyFile, dict]:
|
|
119
|
+
for dependency_file in dependency_files:
|
|
120
|
+
for d in dependency_file.dependencies:
|
|
121
|
+
if d.name == package:
|
|
122
|
+
return dependency_file, {
|
|
123
|
+
"startLine": list(d.versions)[0].location,
|
|
124
|
+
"endLine": list(d.versions)[0].location,
|
|
125
|
+
"startColumn": 1,
|
|
126
|
+
"endColumn": len(package),
|
|
127
|
+
}
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"Could not find the package {package} in the dependency files"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
rules_documentation = build_rules_help_list()
|
|
133
|
+
rules = list(map(lambda s: get_rule(s, rules_documentation), rule_names))
|
|
134
|
+
driver = get_driver(rules, ecosystem.value)
|
|
135
|
+
results = []
|
|
136
|
+
|
|
137
|
+
for entry in scan_results:
|
|
138
|
+
if entry["result"]["issues"] == 0:
|
|
167
139
|
continue
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
140
|
+
|
|
141
|
+
dep_file, region = get_region(
|
|
142
|
+
dependency_files=dependency_files, package=entry["dependency"]
|
|
143
|
+
)
|
|
144
|
+
package_path = dep_file.file_path
|
|
145
|
+
uri = package_path[2:] if package_path.startswith("./") else package_path
|
|
146
|
+
physical_location = get_physical_location(uri, region)
|
|
147
|
+
location = get_location(physical_location)
|
|
148
|
+
scan_result_details = entry["result"]["results"]
|
|
149
|
+
package = entry["dependency"]
|
|
150
|
+
version = entry["version"]
|
|
151
|
+
for rule_name in scan_result_details.keys():
|
|
152
|
+
if (
|
|
153
|
+
scan_result_details[rule_name] is None
|
|
154
|
+
or len(scan_result_details[rule_name]) == 0
|
|
155
|
+
):
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
text = (
|
|
159
|
+
f"On package: {package} version: {version}\n"
|
|
160
|
+
+ "\n".join(
|
|
161
|
+
map(
|
|
162
|
+
lambda x: f"{x['message']} in file {x['location']}",
|
|
163
|
+
scan_result_details[rule_name],
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
if isinstance(scan_result_details[rule_name], list)
|
|
167
|
+
else scan_result_details[rule_name]
|
|
168
|
+
)
|
|
169
|
+
key = f"{rule_name}-{text}"
|
|
170
|
+
partial_fingerprints = {
|
|
171
|
+
f"guarddog/v1/{rule_name}": hashlib.sha256(
|
|
172
|
+
key.encode("utf-8")
|
|
173
|
+
).hexdigest()
|
|
174
|
+
}
|
|
175
|
+
result = get_result(rule_name, [location], text, partial_fingerprints)
|
|
176
|
+
results.append(result)
|
|
177
|
+
|
|
178
|
+
runs = get_run(results, driver)
|
|
179
|
+
log = get_sarif_log([runs])
|
|
180
|
+
|
|
181
|
+
errors = "\n".join(
|
|
182
|
+
[
|
|
183
|
+
HumanReadableReporter.print_errors(
|
|
184
|
+
identifier=r["dependency"], results=r["result"]
|
|
185
|
+
)
|
|
186
|
+
for r in scan_results
|
|
187
|
+
]
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
return (json.dumps(log, indent=2), errors)
|
guarddog/scanners/__init__.py
CHANGED
|
@@ -8,6 +8,7 @@ from .pypi_project_scanner import PypiRequirementsScanner
|
|
|
8
8
|
from .go_package_scanner import GoModuleScanner
|
|
9
9
|
from .go_project_scanner import GoDependenciesScanner
|
|
10
10
|
from .github_action_scanner import GithubActionScanner
|
|
11
|
+
from .extension_scanner import ExtensionScanner
|
|
11
12
|
from .scanner import PackageScanner, ProjectScanner
|
|
12
13
|
from ..ecosystems import ECOSYSTEM
|
|
13
14
|
|
|
@@ -33,6 +34,8 @@ def get_package_scanner(ecosystem: ECOSYSTEM) -> Optional[PackageScanner]:
|
|
|
33
34
|
return GoModuleScanner()
|
|
34
35
|
case ECOSYSTEM.GITHUB_ACTION:
|
|
35
36
|
return GithubActionScanner()
|
|
37
|
+
case ECOSYSTEM.EXTENSION:
|
|
38
|
+
return ExtensionScanner()
|
|
36
39
|
return None
|
|
37
40
|
|
|
38
41
|
|
|
@@ -57,4 +60,6 @@ def get_project_scanner(ecosystem: ECOSYSTEM) -> Optional[ProjectScanner]:
|
|
|
57
60
|
return GoDependenciesScanner()
|
|
58
61
|
case ECOSYSTEM.GITHUB_ACTION:
|
|
59
62
|
return GitHubActionDependencyScanner()
|
|
63
|
+
case ECOSYSTEM.EXTENSION:
|
|
64
|
+
return None # we're not including dependency scanning for this PR
|
|
60
65
|
return None
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from guarddog.analyzer.analyzer import Analyzer
|
|
8
|
+
from guarddog.ecosystems import ECOSYSTEM
|
|
9
|
+
from guarddog.scanners.scanner import PackageScanner, noop
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger("guarddog")
|
|
12
|
+
|
|
13
|
+
MARKETPLACE_URL = (
|
|
14
|
+
"https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery"
|
|
15
|
+
)
|
|
16
|
+
MARKETPLACE_HEADERS = {
|
|
17
|
+
"Content-Type": "application/json",
|
|
18
|
+
"Accept": "application/json;api-version=3.0-preview.1",
|
|
19
|
+
}
|
|
20
|
+
MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE = "Microsoft.VisualStudio.Services.VSIXPackage"
|
|
21
|
+
VSIX_FILE_EXTENSION = ".vsix"
|
|
22
|
+
|
|
23
|
+
# VSCode Marketplace API filter types
|
|
24
|
+
# FilterType 7 = publisherName.extensionName (search by exact extension identifier)
|
|
25
|
+
MARKETPLACE_FILTER_TYPE_EXTENSION_NAME = 7
|
|
26
|
+
|
|
27
|
+
# VSCode Marketplace API flags (bitwise combination)
|
|
28
|
+
# 446 = IncludeVersions | IncludeFiles | IncludeMetadata
|
|
29
|
+
MARKETPLACE_FLAGS_FULL_METADATA = 446
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ExtensionScanner(PackageScanner):
|
|
33
|
+
def __init__(self) -> None:
|
|
34
|
+
super().__init__(Analyzer(ECOSYSTEM.EXTENSION))
|
|
35
|
+
|
|
36
|
+
def download_and_get_package_info(
|
|
37
|
+
self, directory: str, package_name: str, version=None
|
|
38
|
+
) -> typing.Tuple[dict, str]:
|
|
39
|
+
"""
|
|
40
|
+
Downloads a VSCode extension from the marketplace and extracts it
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
directory: Directory to download to
|
|
44
|
+
package_name: Extension identifier (publisher.extension format)
|
|
45
|
+
version: Specific version or default to latest
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Tuple of (marketplace API response, extracted_path)
|
|
49
|
+
"""
|
|
50
|
+
marketplace_data, vsix_url = self._get_marketplace_info_and_url(
|
|
51
|
+
package_name, version
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
vsix_path = os.path.join(
|
|
55
|
+
directory, package_name.replace("/", "-") + VSIX_FILE_EXTENSION
|
|
56
|
+
)
|
|
57
|
+
extracted_path = vsix_path.removesuffix(VSIX_FILE_EXTENSION)
|
|
58
|
+
|
|
59
|
+
log.debug(f"Downloading VSCode extension from {vsix_url}")
|
|
60
|
+
|
|
61
|
+
self.download_compressed(vsix_url, vsix_path, extracted_path)
|
|
62
|
+
|
|
63
|
+
return marketplace_data, extracted_path
|
|
64
|
+
|
|
65
|
+
def _get_marketplace_info_and_url(
|
|
66
|
+
self, package_name: str, version: typing.Optional[str] = None
|
|
67
|
+
) -> typing.Tuple[dict, str]:
|
|
68
|
+
"""Get marketplace metadata and VSIX download URL"""
|
|
69
|
+
payload = {
|
|
70
|
+
"filters": [
|
|
71
|
+
{
|
|
72
|
+
"criteria": [
|
|
73
|
+
{
|
|
74
|
+
"filterType": MARKETPLACE_FILTER_TYPE_EXTENSION_NAME,
|
|
75
|
+
"value": package_name,
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
],
|
|
80
|
+
"flags": MARKETPLACE_FLAGS_FULL_METADATA,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
response = requests.post(
|
|
84
|
+
MARKETPLACE_URL, headers=MARKETPLACE_HEADERS, json=payload
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
response.raise_for_status()
|
|
88
|
+
|
|
89
|
+
data = response.json()
|
|
90
|
+
|
|
91
|
+
if not data.get("results") or not data["results"][0].get("extensions"):
|
|
92
|
+
raise ValueError(f"Extension {package_name} not found in marketplace")
|
|
93
|
+
|
|
94
|
+
extension_info = data["results"][0]["extensions"][0]
|
|
95
|
+
versions = extension_info.get("versions", [])
|
|
96
|
+
|
|
97
|
+
if not versions:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"No versions available for this extension: {package_name}"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
target_version = None
|
|
103
|
+
if version is None:
|
|
104
|
+
# if not version is provided, default to latest
|
|
105
|
+
target_version = versions[0]
|
|
106
|
+
else:
|
|
107
|
+
for v in versions:
|
|
108
|
+
if v.get("version") == version:
|
|
109
|
+
target_version = v
|
|
110
|
+
break
|
|
111
|
+
if target_version is None:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"Version {version} not found for extension: {package_name}"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Extract download URL
|
|
117
|
+
files = target_version.get("files", [])
|
|
118
|
+
vsix_url = None
|
|
119
|
+
for file_info in files:
|
|
120
|
+
if file_info.get("assetType") == MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE:
|
|
121
|
+
vsix_url = file_info.get("source")
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
if not vsix_url:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"No VSIX download link available for this extension: {package_name}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return data, vsix_url
|
|
130
|
+
|
|
131
|
+
def scan_local(
|
|
132
|
+
self, path: str, rules=None, callback: typing.Callable[[dict], None] = noop
|
|
133
|
+
) -> dict:
|
|
134
|
+
"""
|
|
135
|
+
Scan a local VSCode extension directory
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
path: Path to extension directory containing package.json
|
|
139
|
+
rules: Set of rules to use
|
|
140
|
+
callback: Callback to apply to analyzer output
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Scan results
|
|
144
|
+
"""
|
|
145
|
+
if rules is not None:
|
|
146
|
+
rules = set(rules)
|
|
147
|
+
|
|
148
|
+
# Use only sourcecode analysis for local scans, consistent with other ecosystems
|
|
149
|
+
results = self.analyzer.analyze_sourcecode(path, rules=rules)
|
|
150
|
+
callback(results)
|
|
151
|
+
|
|
152
|
+
return results
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
from typing import List, Dict, TypedDict
|
|
3
4
|
from typing_extensions import NotRequired
|
|
4
5
|
|
|
@@ -7,6 +8,7 @@ import re
|
|
|
7
8
|
|
|
8
9
|
from guarddog.scanners.github_action_scanner import GithubActionScanner
|
|
9
10
|
from guarddog.scanners.scanner import ProjectScanner
|
|
11
|
+
from guarddog.scanners.scanner import Dependency, DependencyVersion
|
|
10
12
|
|
|
11
13
|
log = logging.getLogger("guarddog")
|
|
12
14
|
|
|
@@ -66,17 +68,40 @@ class GitHubActionDependencyScanner(ProjectScanner):
|
|
|
66
68
|
def __init__(self) -> None:
|
|
67
69
|
super().__init__(GithubActionScanner())
|
|
68
70
|
|
|
69
|
-
def parse_requirements(self, raw_requirements: str) ->
|
|
71
|
+
def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
|
|
70
72
|
actions = self.parse_workflow_3rd_party_actions(raw_requirements)
|
|
73
|
+
dependencies: List[Dependency] = []
|
|
71
74
|
|
|
72
|
-
requirements: dict[str, set[str]] = {}
|
|
73
75
|
for action in actions:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
76
|
+
name = action["name"]
|
|
77
|
+
version = action["ref"]
|
|
78
|
+
idx = next(
|
|
79
|
+
iter(
|
|
80
|
+
[
|
|
81
|
+
ix
|
|
82
|
+
for ix, line in enumerate(raw_requirements.splitlines())
|
|
83
|
+
if name in line
|
|
84
|
+
]
|
|
85
|
+
),
|
|
86
|
+
0,
|
|
87
|
+
)
|
|
88
|
+
# find the dep with the same name or create a new one
|
|
89
|
+
dep_versions = [DependencyVersion(version=version, location=idx + 1)]
|
|
90
|
+
|
|
91
|
+
dep = next(
|
|
92
|
+
filter(
|
|
93
|
+
lambda d: d.name == name,
|
|
94
|
+
dependencies,
|
|
95
|
+
),
|
|
96
|
+
None,
|
|
97
|
+
)
|
|
98
|
+
if not dep:
|
|
99
|
+
dep = Dependency(name=name, versions=set())
|
|
100
|
+
dependencies.append(dep)
|
|
101
|
+
|
|
102
|
+
dep.versions.update(dep_versions)
|
|
103
|
+
|
|
104
|
+
return dependencies
|
|
80
105
|
|
|
81
106
|
def parse_workflow_3rd_party_actions(
|
|
82
107
|
self, workflow_file: str
|
|
@@ -99,3 +124,17 @@ class GitHubActionDependencyScanner(ProjectScanner):
|
|
|
99
124
|
if action:
|
|
100
125
|
actions.append(action)
|
|
101
126
|
return actions
|
|
127
|
+
|
|
128
|
+
def find_requirements(self, directory: str) -> list[str]:
|
|
129
|
+
requirement_files = []
|
|
130
|
+
|
|
131
|
+
if not os.path.isdir(os.path.join(directory, ".git")):
|
|
132
|
+
raise Exception(
|
|
133
|
+
"unable to find github workflows, not called from git directory"
|
|
134
|
+
)
|
|
135
|
+
workflow_folder = os.path.join(directory, ".github/workflows")
|
|
136
|
+
if os.path.isdir(workflow_folder):
|
|
137
|
+
for name in os.listdir(workflow_folder):
|
|
138
|
+
if re.match(r"^(.+)\.y(a)?ml$", name, flags=re.IGNORECASE):
|
|
139
|
+
requirement_files.append(os.path.join(workflow_folder, name))
|
|
140
|
+
return requirement_files
|
|
@@ -15,7 +15,9 @@ class GithubActionScanner(PackageScanner):
|
|
|
15
15
|
def __init__(self) -> None:
|
|
16
16
|
super().__init__(Analyzer(ECOSYSTEM.GITHUB_ACTION))
|
|
17
17
|
|
|
18
|
-
def download_and_get_package_info(
|
|
18
|
+
def download_and_get_package_info(
|
|
19
|
+
self, directory: str, package_name: str, version=None
|
|
20
|
+
) -> typing.Tuple[dict, str]:
|
|
19
21
|
repo = self._get_repo(package_name)
|
|
20
22
|
tarball_url = self._get_git_tarball_url(repo, version)
|
|
21
23
|
|
|
@@ -25,7 +27,9 @@ class GithubActionScanner(PackageScanner):
|
|
|
25
27
|
if file_extension == "":
|
|
26
28
|
file_extension = ".zip"
|
|
27
29
|
|
|
28
|
-
zippath = os.path.join(
|
|
30
|
+
zippath = os.path.join(
|
|
31
|
+
directory, package_name.replace("/", "-") + file_extension
|
|
32
|
+
)
|
|
29
33
|
unzippedpath = zippath.removesuffix(file_extension)
|
|
30
34
|
self.download_compressed(tarball_url, zippath, unzippedpath)
|
|
31
35
|
|