guarddog 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. guarddog/analyzer/analyzer.py +58 -20
  2. guarddog/analyzer/metadata/__init__.py +2 -0
  3. guarddog/analyzer/metadata/bundled_binary.py +6 -6
  4. guarddog/analyzer/metadata/deceptive_author.py +3 -1
  5. guarddog/analyzer/metadata/detector.py +7 -2
  6. guarddog/analyzer/metadata/empty_information.py +8 -3
  7. guarddog/analyzer/metadata/go/typosquatting.py +4 -3
  8. guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
  9. guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
  10. guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
  11. guarddog/analyzer/metadata/npm/empty_information.py +10 -7
  12. guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
  13. guarddog/analyzer/metadata/npm/release_zero.py +13 -5
  14. guarddog/analyzer/metadata/npm/typosquatting.py +1 -1
  15. guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
  16. guarddog/analyzer/metadata/npm/utils.py +4 -5
  17. guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
  18. guarddog/analyzer/metadata/pypi/__init__.py +12 -6
  19. guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
  20. guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
  21. guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
  22. guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
  23. guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
  24. guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
  25. guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
  26. guarddog/analyzer/metadata/pypi/typosquatting.py +21 -8
  27. guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
  28. guarddog/analyzer/metadata/pypi/utils.py +1 -4
  29. guarddog/analyzer/metadata/release_zero.py +1 -1
  30. guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
  31. guarddog/analyzer/metadata/resources/top_pypi_packages.json +43984 -15984
  32. guarddog/analyzer/metadata/typosquatting.py +12 -8
  33. guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
  34. guarddog/analyzer/sourcecode/__init__.py +34 -7
  35. guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
  36. guarddog/analyzer/sourcecode/code-execution.yml +1 -0
  37. guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
  38. guarddog/analyzer/sourcecode/go-exec-base64.yml +40 -0
  39. guarddog/analyzer/sourcecode/go-exec-download.yml +85 -0
  40. guarddog/analyzer/sourcecode/go-exfiltrate-sensitive-data.yml +85 -0
  41. guarddog/analyzer/sourcecode/npm-obfuscation.yml +2 -1
  42. guarddog/analyzer/sourcecode/shady-links.yml +2 -0
  43. guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
  44. guarddog/analyzer/sourcecode/unicode.yml +75 -0
  45. guarddog/cli.py +33 -107
  46. guarddog/ecosystems.py +3 -0
  47. guarddog/reporters/__init__.py +28 -0
  48. guarddog/reporters/human_readable.py +138 -0
  49. guarddog/reporters/json.py +28 -0
  50. guarddog/reporters/reporter_factory.py +50 -0
  51. guarddog/reporters/sarif.py +179 -173
  52. guarddog/scanners/__init__.py +5 -0
  53. guarddog/scanners/extension_scanner.py +152 -0
  54. guarddog/scanners/github_action_project_scanner.py +47 -8
  55. guarddog/scanners/github_action_scanner.py +6 -2
  56. guarddog/scanners/go_project_scanner.py +42 -5
  57. guarddog/scanners/npm_package_scanner.py +12 -4
  58. guarddog/scanners/npm_project_scanner.py +54 -10
  59. guarddog/scanners/pypi_package_scanner.py +9 -3
  60. guarddog/scanners/pypi_project_scanner.py +67 -29
  61. guarddog/scanners/scanner.py +247 -164
  62. guarddog/utils/archives.py +2 -1
  63. guarddog/utils/package_info.py +3 -1
  64. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/METADATA +11 -10
  65. guarddog-2.7.0.dist-info/RECORD +100 -0
  66. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/WHEEL +1 -1
  67. guarddog-2.5.0.dist-info/RECORD +0 -90
  68. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info}/entry_points.txt +0 -0
  69. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE +0 -0
  70. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
  71. {guarddog-2.5.0.dist-info → guarddog-2.7.0.dist-info/licenses}/NOTICE +0 -0
@@ -4,181 +4,187 @@ import json
4
4
  from guarddog.analyzer.sourcecode import get_sourcecode_rules
5
5
  from guarddog.analyzer.metadata import get_metadata_detectors
6
6
  from guarddog.ecosystems import ECOSYSTEM
7
+ from guarddog.reporters import BaseReporter
8
+ from guarddog.scanners.scanner import DependencyFile
9
+ from typing import List
10
+ from guarddog.reporters.human_readable import HumanReadableReporter
11
+
12
+
13
+ class SarifReporter(BaseReporter):
14
+ """
15
+ Sarif is a class that formats and prints scan results in the SARIF format.
16
+ """
17
+
18
+ @staticmethod
19
+ def render_verify(
20
+ dependency_files: List[DependencyFile],
21
+ rule_names: list[str],
22
+ scan_results: list[dict],
23
+ ecosystem: ECOSYSTEM,
24
+ ) -> tuple[str, str]:
25
+ """
26
+ Report the scans results in the SARIF format.
27
+
28
+ Args:
29
+ scan_results (dict): The scan results to be reported.
30
+ """
31
+
32
+ def build_rules_help_list() -> dict:
33
+ """
34
+ Builds a dict with the names of all available rules and their documentation
35
+ @return: dict[name_of_rule, rule_description]
36
+ """
37
+ rules_documentation = {}
38
+ for ecosystem in ECOSYSTEM:
39
+ rules = get_metadata_detectors(ecosystem)
40
+ for name, instance in rules.items():
41
+ detector_class = instance.__class__.__base__
42
+ rules_documentation[name] = detector_class.__doc__
43
+ for sourcecode_rule in get_sourcecode_rules(ecosystem):
44
+ rules_documentation[sourcecode_rule.id] = (
45
+ sourcecode_rule.description
46
+ )
47
+ return rules_documentation
48
+
49
+ def get_sarif_log(runs):
50
+ """
51
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#sariflog-object
52
+ """
53
+ return {
54
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
55
+ "version": "2.1.0",
56
+ "runs": runs,
57
+ }
7
58
 
59
+ def get_run(results, driver):
60
+ """
61
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#run-object
62
+ """
63
+ return {"tool": {"driver": driver}, "results": results}
64
+
65
+ def get_driver(rules, ecosystem: str):
66
+ """
67
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#toolcomponent-object
68
+ """
69
+ return {
70
+ "name": f"GuardDog-{ecosystem}",
71
+ "informationUri": "https://github.com/DataDog/guarddog",
72
+ "rules": rules,
73
+ }
8
74
 
9
- def build_rules_help_list() -> dict:
10
- """
11
- Builds a dict with the names of all available rules and their documentation
12
- @return: dict[name_of_rule, rule_description]
13
- """
14
- rules_documentation = {}
15
- for ecosystem in ECOSYSTEM:
16
- rules = get_metadata_detectors(ecosystem)
17
- for name, instance in rules.items():
18
- detector_class = instance.__class__.__base__
19
- rules_documentation[name] = detector_class.__doc__
20
- for sourcecode_rule in get_sourcecode_rules(ecosystem):
21
- rules_documentation[sourcecode_rule.id] = sourcecode_rule.description
22
- return rules_documentation
23
-
24
-
25
- def get_sarif_log(runs):
26
- """
27
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#sariflog-object
28
- """
29
- return {
30
- "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
31
- "version": "2.1.0",
32
- "runs": runs
33
- }
34
-
35
-
36
- def get_run(results, driver):
37
- """
38
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#run-object
39
- """
40
- return {
41
- "tool": {
42
- "driver": driver
43
- },
44
- "results": results
45
- }
46
-
47
-
48
- def get_driver(rules, ecosystem: str):
49
- """
50
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#toolcomponent-object
51
- """
52
- return {
53
- "name": f"GuardDog-{ecosystem}",
54
- "rules": rules
55
- }
56
-
57
-
58
- def get_rule(rule_name: str, rules_documentation) -> dict:
59
- """
60
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#reportingdescriptor-object
61
- """
62
- message = rules_documentation[rule_name] if rules_documentation[rule_name] is not None else ""
63
- return {
64
- "id": rule_name,
65
- "defaultConfiguration": {
66
- "level": "warning"
67
- },
68
- "shortDescription": {
69
- "text": f"GuardDog rule: {rule_name}"
70
- },
71
- "fullDescription": {
72
- "text": message
73
- },
74
- "help": {
75
- "text": message,
76
- "markdown": message
77
- },
78
- "properties": {
79
- "precision": "medium"
80
- }
81
- }
82
-
83
-
84
- def get_result(rule_name, locations, text, partial_fingerprints):
85
- """
86
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#result-object
87
- """
88
- return {
89
- "ruleId": rule_name,
90
- "message": {
91
- "text": text
92
- },
93
- "locations": locations,
94
- "partialFingerprints": partial_fingerprints
95
- }
96
-
97
-
98
- def get_location(physical_location):
99
- """
100
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#location-object
101
- """
102
- return {
103
- "physicalLocation": physical_location
104
- }
75
+ def get_rule(rule_name: str, rules_documentation) -> dict:
76
+ """
77
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#reportingdescriptor-object
78
+ """
79
+ message = (
80
+ rules_documentation[rule_name]
81
+ if rules_documentation[rule_name] is not None
82
+ else ""
83
+ )
84
+ return {
85
+ "id": rule_name,
86
+ "defaultConfiguration": {"level": "warning"},
87
+ "shortDescription": {"text": f"GuardDog rule: {rule_name}"},
88
+ "fullDescription": {"text": message},
89
+ "help": {"text": message, "markdown": message},
90
+ "properties": {"precision": "medium"},
91
+ }
105
92
 
93
+ def get_result(rule_name, locations, text, partial_fingerprints):
94
+ """
95
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#result-object
96
+ """
97
+ return {
98
+ "ruleId": rule_name,
99
+ "message": {"text": text},
100
+ "locations": locations,
101
+ "partialFingerprints": partial_fingerprints,
102
+ }
106
103
 
107
- def get_physical_location(uri, region):
108
- """
109
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#physicallocation-object
110
- """
111
- return {
112
- "artifactLocation": {
113
- "uri": uri
114
- },
115
- "region": region
116
- }
117
-
118
-
119
- def get_region(package_raw: str, package: str) -> dict:
120
- start_line = 0
121
- start_column = 0
122
- end_column = 0
123
- for idx, val in enumerate(package_raw.split("\n")):
124
- if package in val:
125
- start_line = idx + 1
126
- start_column = val.index(package) + 1
127
- end_column = start_column + len(package)
128
-
129
- return {
130
- "startLine": start_line,
131
- "endLine": start_line,
132
- "startColumn": start_column,
133
- "endColumn": end_column,
134
- }
135
-
136
-
137
- def report_verify_sarif(
138
- package_path: str,
139
- rule_names: list[str],
140
- scan_results: list[dict],
141
- ecosystem: ECOSYSTEM,
142
- ) -> str:
143
- rules_documentation = build_rules_help_list()
144
- rules = list(map(
145
- lambda s: get_rule(s, rules_documentation),
146
- rule_names
147
- ))
148
- driver = get_driver(rules, ecosystem.value)
149
- results = []
150
-
151
- with open(package_path, "r") as file:
152
- package_raw = file.read()
153
-
154
- for entry in scan_results:
155
- if entry["result"]["issues"] == 0:
156
- continue
157
-
158
- region = get_region(package_raw, entry["dependency"])
159
- uri = package_path[2:] if package_path.startswith('./') else package_path
160
- physical_location = get_physical_location(uri, region)
161
- location = get_location(physical_location)
162
- scan_result_details = entry["result"]["results"]
163
- package = entry["dependency"]
164
- version = entry["version"]
165
- for rule_name in scan_result_details.keys():
166
- if scan_result_details[rule_name] is None or len(scan_result_details[rule_name]) == 0:
104
+ def get_location(physical_location):
105
+ """
106
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#location-object
107
+ """
108
+ return {"physicalLocation": physical_location}
109
+
110
+ def get_physical_location(uri, region):
111
+ """
112
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#physicallocation-object
113
+ """
114
+ return {"artifactLocation": {"uri": uri}, "region": region}
115
+
116
+ def get_region(
117
+ dependency_files: List[DependencyFile], package: str
118
+ ) -> tuple[DependencyFile, dict]:
119
+ for dependency_file in dependency_files:
120
+ for d in dependency_file.dependencies:
121
+ if d.name == package:
122
+ return dependency_file, {
123
+ "startLine": list(d.versions)[0].location,
124
+ "endLine": list(d.versions)[0].location,
125
+ "startColumn": 1,
126
+ "endColumn": len(package),
127
+ }
128
+ raise ValueError(
129
+ f"Could not find the package {package} in the dependency files"
130
+ )
131
+
132
+ rules_documentation = build_rules_help_list()
133
+ rules = list(map(lambda s: get_rule(s, rules_documentation), rule_names))
134
+ driver = get_driver(rules, ecosystem.value)
135
+ results = []
136
+
137
+ for entry in scan_results:
138
+ if entry["result"]["issues"] == 0:
167
139
  continue
168
- text = f"On package: {package} version: {version}\n" + "\n".join(map(
169
- lambda x: x["message"],
170
- scan_result_details[rule_name]
171
- )) if isinstance(scan_result_details[rule_name], list) else scan_result_details[rule_name]
172
- key = f"{rule_name}-{text}"
173
- partial_fingerprints = {
174
- f"guarddog/v1/{rule_name}": hashlib.sha256(key.encode('utf-8')).hexdigest()
175
- }
176
- result = get_result(rule_name,
177
- [location],
178
- text,
179
- partial_fingerprints)
180
- results.append(result)
181
-
182
- runs = get_run(results, driver)
183
- log = get_sarif_log([runs])
184
- return json.dumps(log, indent=2)
140
+
141
+ dep_file, region = get_region(
142
+ dependency_files=dependency_files, package=entry["dependency"]
143
+ )
144
+ package_path = dep_file.file_path
145
+ uri = package_path[2:] if package_path.startswith("./") else package_path
146
+ physical_location = get_physical_location(uri, region)
147
+ location = get_location(physical_location)
148
+ scan_result_details = entry["result"]["results"]
149
+ package = entry["dependency"]
150
+ version = entry["version"]
151
+ for rule_name in scan_result_details.keys():
152
+ if (
153
+ scan_result_details[rule_name] is None
154
+ or len(scan_result_details[rule_name]) == 0
155
+ ):
156
+ continue
157
+
158
+ text = (
159
+ f"On package: {package} version: {version}\n"
160
+ + "\n".join(
161
+ map(
162
+ lambda x: f"{x['message']} in file {x['location']}",
163
+ scan_result_details[rule_name],
164
+ )
165
+ )
166
+ if isinstance(scan_result_details[rule_name], list)
167
+ else scan_result_details[rule_name]
168
+ )
169
+ key = f"{rule_name}-{text}"
170
+ partial_fingerprints = {
171
+ f"guarddog/v1/{rule_name}": hashlib.sha256(
172
+ key.encode("utf-8")
173
+ ).hexdigest()
174
+ }
175
+ result = get_result(rule_name, [location], text, partial_fingerprints)
176
+ results.append(result)
177
+
178
+ runs = get_run(results, driver)
179
+ log = get_sarif_log([runs])
180
+
181
+ errors = "\n".join(
182
+ [
183
+ HumanReadableReporter.print_errors(
184
+ identifier=r["dependency"], results=r["result"]
185
+ )
186
+ for r in scan_results
187
+ ]
188
+ )
189
+
190
+ return (json.dumps(log, indent=2), errors)
@@ -8,6 +8,7 @@ from .pypi_project_scanner import PypiRequirementsScanner
8
8
  from .go_package_scanner import GoModuleScanner
9
9
  from .go_project_scanner import GoDependenciesScanner
10
10
  from .github_action_scanner import GithubActionScanner
11
+ from .extension_scanner import ExtensionScanner
11
12
  from .scanner import PackageScanner, ProjectScanner
12
13
  from ..ecosystems import ECOSYSTEM
13
14
 
@@ -33,6 +34,8 @@ def get_package_scanner(ecosystem: ECOSYSTEM) -> Optional[PackageScanner]:
33
34
  return GoModuleScanner()
34
35
  case ECOSYSTEM.GITHUB_ACTION:
35
36
  return GithubActionScanner()
37
+ case ECOSYSTEM.EXTENSION:
38
+ return ExtensionScanner()
36
39
  return None
37
40
 
38
41
 
@@ -57,4 +60,6 @@ def get_project_scanner(ecosystem: ECOSYSTEM) -> Optional[ProjectScanner]:
57
60
  return GoDependenciesScanner()
58
61
  case ECOSYSTEM.GITHUB_ACTION:
59
62
  return GitHubActionDependencyScanner()
63
+ case ECOSYSTEM.EXTENSION:
64
+ return None # we're not including dependency scanning for this PR
60
65
  return None
@@ -0,0 +1,152 @@
1
+ import logging
2
+ import os
3
+ import typing
4
+
5
+ import requests
6
+
7
+ from guarddog.analyzer.analyzer import Analyzer
8
+ from guarddog.ecosystems import ECOSYSTEM
9
+ from guarddog.scanners.scanner import PackageScanner, noop
10
+
11
+ log = logging.getLogger("guarddog")
12
+
13
+ MARKETPLACE_URL = (
14
+ "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery"
15
+ )
16
+ MARKETPLACE_HEADERS = {
17
+ "Content-Type": "application/json",
18
+ "Accept": "application/json;api-version=3.0-preview.1",
19
+ }
20
+ MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE = "Microsoft.VisualStudio.Services.VSIXPackage"
21
+ VSIX_FILE_EXTENSION = ".vsix"
22
+
23
+ # VSCode Marketplace API filter types
24
+ # FilterType 7 = publisherName.extensionName (search by exact extension identifier)
25
+ MARKETPLACE_FILTER_TYPE_EXTENSION_NAME = 7
26
+
27
+ # VSCode Marketplace API flags (bitwise combination)
28
+ # 446 = IncludeVersions | IncludeFiles | IncludeMetadata
29
+ MARKETPLACE_FLAGS_FULL_METADATA = 446
30
+
31
+
32
+ class ExtensionScanner(PackageScanner):
33
+ def __init__(self) -> None:
34
+ super().__init__(Analyzer(ECOSYSTEM.EXTENSION))
35
+
36
+ def download_and_get_package_info(
37
+ self, directory: str, package_name: str, version=None
38
+ ) -> typing.Tuple[dict, str]:
39
+ """
40
+ Downloads a VSCode extension from the marketplace and extracts it
41
+
42
+ Args:
43
+ directory: Directory to download to
44
+ package_name: Extension identifier (publisher.extension format)
45
+ version: Specific version or default to latest
46
+
47
+ Returns:
48
+ Tuple of (marketplace API response, extracted_path)
49
+ """
50
+ marketplace_data, vsix_url = self._get_marketplace_info_and_url(
51
+ package_name, version
52
+ )
53
+
54
+ vsix_path = os.path.join(
55
+ directory, package_name.replace("/", "-") + VSIX_FILE_EXTENSION
56
+ )
57
+ extracted_path = vsix_path.removesuffix(VSIX_FILE_EXTENSION)
58
+
59
+ log.debug(f"Downloading VSCode extension from {vsix_url}")
60
+
61
+ self.download_compressed(vsix_url, vsix_path, extracted_path)
62
+
63
+ return marketplace_data, extracted_path
64
+
65
+ def _get_marketplace_info_and_url(
66
+ self, package_name: str, version: typing.Optional[str] = None
67
+ ) -> typing.Tuple[dict, str]:
68
+ """Get marketplace metadata and VSIX download URL"""
69
+ payload = {
70
+ "filters": [
71
+ {
72
+ "criteria": [
73
+ {
74
+ "filterType": MARKETPLACE_FILTER_TYPE_EXTENSION_NAME,
75
+ "value": package_name,
76
+ }
77
+ ]
78
+ }
79
+ ],
80
+ "flags": MARKETPLACE_FLAGS_FULL_METADATA,
81
+ }
82
+
83
+ response = requests.post(
84
+ MARKETPLACE_URL, headers=MARKETPLACE_HEADERS, json=payload
85
+ )
86
+
87
+ response.raise_for_status()
88
+
89
+ data = response.json()
90
+
91
+ if not data.get("results") or not data["results"][0].get("extensions"):
92
+ raise ValueError(f"Extension {package_name} not found in marketplace")
93
+
94
+ extension_info = data["results"][0]["extensions"][0]
95
+ versions = extension_info.get("versions", [])
96
+
97
+ if not versions:
98
+ raise ValueError(
99
+ f"No versions available for this extension: {package_name}"
100
+ )
101
+
102
+ target_version = None
103
+ if version is None:
104
+ # if not version is provided, default to latest
105
+ target_version = versions[0]
106
+ else:
107
+ for v in versions:
108
+ if v.get("version") == version:
109
+ target_version = v
110
+ break
111
+ if target_version is None:
112
+ raise ValueError(
113
+ f"Version {version} not found for extension: {package_name}"
114
+ )
115
+
116
+ # Extract download URL
117
+ files = target_version.get("files", [])
118
+ vsix_url = None
119
+ for file_info in files:
120
+ if file_info.get("assetType") == MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE:
121
+ vsix_url = file_info.get("source")
122
+ break
123
+
124
+ if not vsix_url:
125
+ raise ValueError(
126
+ f"No VSIX download link available for this extension: {package_name}"
127
+ )
128
+
129
+ return data, vsix_url
130
+
131
+ def scan_local(
132
+ self, path: str, rules=None, callback: typing.Callable[[dict], None] = noop
133
+ ) -> dict:
134
+ """
135
+ Scan a local VSCode extension directory
136
+
137
+ Args:
138
+ path: Path to extension directory containing package.json
139
+ rules: Set of rules to use
140
+ callback: Callback to apply to analyzer output
141
+
142
+ Returns:
143
+ Scan results
144
+ """
145
+ if rules is not None:
146
+ rules = set(rules)
147
+
148
+ # Use only sourcecode analysis for local scans, consistent with other ecosystems
149
+ results = self.analyzer.analyze_sourcecode(path, rules=rules)
150
+ callback(results)
151
+
152
+ return results
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  from typing import List, Dict, TypedDict
3
4
  from typing_extensions import NotRequired
4
5
 
@@ -7,6 +8,7 @@ import re
7
8
 
8
9
  from guarddog.scanners.github_action_scanner import GithubActionScanner
9
10
  from guarddog.scanners.scanner import ProjectScanner
11
+ from guarddog.scanners.scanner import Dependency, DependencyVersion
10
12
 
11
13
  log = logging.getLogger("guarddog")
12
14
 
@@ -66,17 +68,40 @@ class GitHubActionDependencyScanner(ProjectScanner):
66
68
  def __init__(self) -> None:
67
69
  super().__init__(GithubActionScanner())
68
70
 
69
- def parse_requirements(self, raw_requirements: str) -> dict[str, set[str]]:
71
+ def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
70
72
  actions = self.parse_workflow_3rd_party_actions(raw_requirements)
73
+ dependencies: List[Dependency] = []
71
74
 
72
- requirements: dict[str, set[str]] = {}
73
75
  for action in actions:
74
- repo, version = action["name"], action["ref"]
75
- if repo in requirements:
76
- requirements[repo].add(version)
77
- else:
78
- requirements[repo] = {version}
79
- return requirements
76
+ name = action["name"]
77
+ version = action["ref"]
78
+ idx = next(
79
+ iter(
80
+ [
81
+ ix
82
+ for ix, line in enumerate(raw_requirements.splitlines())
83
+ if name in line
84
+ ]
85
+ ),
86
+ 0,
87
+ )
88
+ # find the dep with the same name or create a new one
89
+ dep_versions = [DependencyVersion(version=version, location=idx + 1)]
90
+
91
+ dep = next(
92
+ filter(
93
+ lambda d: d.name == name,
94
+ dependencies,
95
+ ),
96
+ None,
97
+ )
98
+ if not dep:
99
+ dep = Dependency(name=name, versions=set())
100
+ dependencies.append(dep)
101
+
102
+ dep.versions.update(dep_versions)
103
+
104
+ return dependencies
80
105
 
81
106
  def parse_workflow_3rd_party_actions(
82
107
  self, workflow_file: str
@@ -99,3 +124,17 @@ class GitHubActionDependencyScanner(ProjectScanner):
99
124
  if action:
100
125
  actions.append(action)
101
126
  return actions
127
+
128
+ def find_requirements(self, directory: str) -> list[str]:
129
+ requirement_files = []
130
+
131
+ if not os.path.isdir(os.path.join(directory, ".git")):
132
+ raise Exception(
133
+ "unable to find github workflows, not called from git directory"
134
+ )
135
+ workflow_folder = os.path.join(directory, ".github/workflows")
136
+ if os.path.isdir(workflow_folder):
137
+ for name in os.listdir(workflow_folder):
138
+ if re.match(r"^(.+)\.y(a)?ml$", name, flags=re.IGNORECASE):
139
+ requirement_files.append(os.path.join(workflow_folder, name))
140
+ return requirement_files
@@ -15,7 +15,9 @@ class GithubActionScanner(PackageScanner):
15
15
  def __init__(self) -> None:
16
16
  super().__init__(Analyzer(ECOSYSTEM.GITHUB_ACTION))
17
17
 
18
- def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
18
+ def download_and_get_package_info(
19
+ self, directory: str, package_name: str, version=None
20
+ ) -> typing.Tuple[dict, str]:
19
21
  repo = self._get_repo(package_name)
20
22
  tarball_url = self._get_git_tarball_url(repo, version)
21
23
 
@@ -25,7 +27,9 @@ class GithubActionScanner(PackageScanner):
25
27
  if file_extension == "":
26
28
  file_extension = ".zip"
27
29
 
28
- zippath = os.path.join(directory, package_name.replace("/", "-") + file_extension)
30
+ zippath = os.path.join(
31
+ directory, package_name.replace("/", "-") + file_extension
32
+ )
29
33
  unzippedpath = zippath.removesuffix(file_extension)
30
34
  self.download_compressed(tarball_url, zippath, unzippedpath)
31
35