guarddog 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ from enum import Enum, auto
2
+ from typing import Optional
3
+
4
+ from guarddog.reporters import BaseReporter
5
+ from guarddog.reporters.human_readable import HumanReadableReporter
6
+ from guarddog.reporters.sarif import SarifReporter
7
+ from guarddog.reporters.json import JsonReporter
8
+
9
+
10
+ class ReporterType(Enum):
11
+ """
12
+ Enum representing the different types of reporters available.
13
+ """
14
+
15
+ HUMAN_READABLE = auto()
16
+ SARIF = auto()
17
+ JSON = auto()
18
+
19
+ @classmethod
20
+ def from_str(cls, type: Optional[str]) -> "ReporterType":
21
+ if not type:
22
+ return cls.HUMAN_READABLE
23
+ match (type).lower():
24
+ case "human_readable":
25
+ return cls.HUMAN_READABLE
26
+ case "sarif":
27
+ return cls.SARIF
28
+ case "json":
29
+ return cls.JSON
30
+ case _:
31
+ raise ValueError(f"Unsupported reporter type: {type}")
32
+
33
+
34
+ class ReporterFactory:
35
+ """
36
+ Factory class for creating reporter instances based on the reporter type.
37
+ """
38
+
39
+ @staticmethod
40
+ def create_reporter(reporter_type: ReporterType) -> type[BaseReporter]:
41
+ """
42
+ Create a reporter instance based on the reporter type.
43
+ """
44
+ match reporter_type:
45
+ case ReporterType.HUMAN_READABLE:
46
+ return HumanReadableReporter
47
+ case ReporterType.SARIF:
48
+ return SarifReporter
49
+ case ReporterType.JSON:
50
+ return JsonReporter
@@ -4,181 +4,187 @@ import json
4
4
  from guarddog.analyzer.sourcecode import get_sourcecode_rules
5
5
  from guarddog.analyzer.metadata import get_metadata_detectors
6
6
  from guarddog.ecosystems import ECOSYSTEM
7
+ from guarddog.reporters import BaseReporter
8
+ from guarddog.scanners.scanner import DependencyFile
9
+ from typing import List
10
+ from guarddog.reporters.human_readable import HumanReadableReporter
11
+
12
+
13
+ class SarifReporter(BaseReporter):
14
+ """
15
+ Sarif is a class that formats and prints scan results in the SARIF format.
16
+ """
17
+
18
+ @staticmethod
19
+ def render_verify(
20
+ dependency_files: List[DependencyFile],
21
+ rule_names: list[str],
22
+ scan_results: list[dict],
23
+ ecosystem: ECOSYSTEM,
24
+ ) -> tuple[str, str]:
25
+ """
26
+ Report the scans results in the SARIF format.
27
+
28
+ Args:
29
+ scan_results (dict): The scan results to be reported.
30
+ """
31
+
32
+ def build_rules_help_list() -> dict:
33
+ """
34
+ Builds a dict with the names of all available rules and their documentation
35
+ @return: dict[name_of_rule, rule_description]
36
+ """
37
+ rules_documentation = {}
38
+ for ecosystem in ECOSYSTEM:
39
+ rules = get_metadata_detectors(ecosystem)
40
+ for name, instance in rules.items():
41
+ detector_class = instance.__class__.__base__
42
+ rules_documentation[name] = detector_class.__doc__
43
+ for sourcecode_rule in get_sourcecode_rules(ecosystem):
44
+ rules_documentation[sourcecode_rule.id] = (
45
+ sourcecode_rule.description
46
+ )
47
+ return rules_documentation
48
+
49
+ def get_sarif_log(runs):
50
+ """
51
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#sariflog-object
52
+ """
53
+ return {
54
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
55
+ "version": "2.1.0",
56
+ "runs": runs,
57
+ }
7
58
 
59
+ def get_run(results, driver):
60
+ """
61
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#run-object
62
+ """
63
+ return {"tool": {"driver": driver}, "results": results}
64
+
65
+ def get_driver(rules, ecosystem: str):
66
+ """
67
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#toolcomponent-object
68
+ """
69
+ return {
70
+ "name": f"GuardDog-{ecosystem}",
71
+ "informationUri": "https://github.com/DataDog/guarddog",
72
+ "rules": rules,
73
+ }
8
74
 
9
- def build_rules_help_list() -> dict:
10
- """
11
- Builds a dict with the names of all available rules and their documentation
12
- @return: dict[name_of_rule, rule_description]
13
- """
14
- rules_documentation = {}
15
- for ecosystem in ECOSYSTEM:
16
- rules = get_metadata_detectors(ecosystem)
17
- for name, instance in rules.items():
18
- detector_class = instance.__class__.__base__
19
- rules_documentation[name] = detector_class.__doc__
20
- for sourcecode_rule in get_sourcecode_rules(ecosystem):
21
- rules_documentation[sourcecode_rule.id] = sourcecode_rule.description
22
- return rules_documentation
23
-
24
-
25
- def get_sarif_log(runs):
26
- """
27
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#sariflog-object
28
- """
29
- return {
30
- "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
31
- "version": "2.1.0",
32
- "runs": runs
33
- }
34
-
35
-
36
- def get_run(results, driver):
37
- """
38
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#run-object
39
- """
40
- return {
41
- "tool": {
42
- "driver": driver
43
- },
44
- "results": results
45
- }
46
-
47
-
48
- def get_driver(rules, ecosystem: str):
49
- """
50
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#toolcomponent-object
51
- """
52
- return {
53
- "name": f"GuardDog-{ecosystem}",
54
- "rules": rules
55
- }
56
-
57
-
58
- def get_rule(rule_name: str, rules_documentation) -> dict:
59
- """
60
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#reportingdescriptor-object
61
- """
62
- message = rules_documentation[rule_name] if rules_documentation[rule_name] is not None else ""
63
- return {
64
- "id": rule_name,
65
- "defaultConfiguration": {
66
- "level": "warning"
67
- },
68
- "shortDescription": {
69
- "text": f"GuardDog rule: {rule_name}"
70
- },
71
- "fullDescription": {
72
- "text": message
73
- },
74
- "help": {
75
- "text": message,
76
- "markdown": message
77
- },
78
- "properties": {
79
- "precision": "medium"
80
- }
81
- }
82
-
83
-
84
- def get_result(rule_name, locations, text, partial_fingerprints):
85
- """
86
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#result-object
87
- """
88
- return {
89
- "ruleId": rule_name,
90
- "message": {
91
- "text": text
92
- },
93
- "locations": locations,
94
- "partialFingerprints": partial_fingerprints
95
- }
96
-
97
-
98
- def get_location(physical_location):
99
- """
100
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#location-object
101
- """
102
- return {
103
- "physicalLocation": physical_location
104
- }
75
+ def get_rule(rule_name: str, rules_documentation) -> dict:
76
+ """
77
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#reportingdescriptor-object
78
+ """
79
+ message = (
80
+ rules_documentation[rule_name]
81
+ if rules_documentation[rule_name] is not None
82
+ else ""
83
+ )
84
+ return {
85
+ "id": rule_name,
86
+ "defaultConfiguration": {"level": "warning"},
87
+ "shortDescription": {"text": f"GuardDog rule: {rule_name}"},
88
+ "fullDescription": {"text": message},
89
+ "help": {"text": message, "markdown": message},
90
+ "properties": {"precision": "medium"},
91
+ }
105
92
 
93
+ def get_result(rule_name, locations, text, partial_fingerprints):
94
+ """
95
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#result-object
96
+ """
97
+ return {
98
+ "ruleId": rule_name,
99
+ "message": {"text": text},
100
+ "locations": locations,
101
+ "partialFingerprints": partial_fingerprints,
102
+ }
106
103
 
107
- def get_physical_location(uri, region):
108
- """
109
- https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#physicallocation-object
110
- """
111
- return {
112
- "artifactLocation": {
113
- "uri": uri
114
- },
115
- "region": region
116
- }
117
-
118
-
119
- def get_region(package_raw: str, package: str) -> dict:
120
- start_line = 0
121
- start_column = 0
122
- end_column = 0
123
- for idx, val in enumerate(package_raw.split("\n")):
124
- if package in val:
125
- start_line = idx + 1
126
- start_column = val.index(package) + 1
127
- end_column = start_column + len(package)
128
-
129
- return {
130
- "startLine": start_line,
131
- "endLine": start_line,
132
- "startColumn": start_column,
133
- "endColumn": end_column,
134
- }
135
-
136
-
137
- def report_verify_sarif(
138
- package_path: str,
139
- rule_names: list[str],
140
- scan_results: list[dict],
141
- ecosystem: ECOSYSTEM,
142
- ) -> str:
143
- rules_documentation = build_rules_help_list()
144
- rules = list(map(
145
- lambda s: get_rule(s, rules_documentation),
146
- rule_names
147
- ))
148
- driver = get_driver(rules, ecosystem.value)
149
- results = []
150
-
151
- with open(package_path, "r") as file:
152
- package_raw = file.read()
153
-
154
- for entry in scan_results:
155
- if entry["result"]["issues"] == 0:
156
- continue
157
-
158
- region = get_region(package_raw, entry["dependency"])
159
- uri = package_path[2:] if package_path.startswith('./') else package_path
160
- physical_location = get_physical_location(uri, region)
161
- location = get_location(physical_location)
162
- scan_result_details = entry["result"]["results"]
163
- package = entry["dependency"]
164
- version = entry["version"]
165
- for rule_name in scan_result_details.keys():
166
- if scan_result_details[rule_name] is None or len(scan_result_details[rule_name]) == 0:
104
+ def get_location(physical_location):
105
+ """
106
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#location-object
107
+ """
108
+ return {"physicalLocation": physical_location}
109
+
110
+ def get_physical_location(uri, region):
111
+ """
112
+ https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#physicallocation-object
113
+ """
114
+ return {"artifactLocation": {"uri": uri}, "region": region}
115
+
116
+ def get_region(
117
+ dependency_files: List[DependencyFile], package: str
118
+ ) -> tuple[DependencyFile, dict]:
119
+ for dependency_file in dependency_files:
120
+ for d in dependency_file.dependencies:
121
+ if d.name == package:
122
+ return dependency_file, {
123
+ "startLine": list(d.versions)[0].location,
124
+ "endLine": list(d.versions)[0].location,
125
+ "startColumn": 1,
126
+ "endColumn": len(package),
127
+ }
128
+ raise ValueError(
129
+ f"Could not find the package {package} in the dependency files"
130
+ )
131
+
132
+ rules_documentation = build_rules_help_list()
133
+ rules = list(map(lambda s: get_rule(s, rules_documentation), rule_names))
134
+ driver = get_driver(rules, ecosystem.value)
135
+ results = []
136
+
137
+ for entry in scan_results:
138
+ if entry["result"]["issues"] == 0:
167
139
  continue
168
- text = f"On package: {package} version: {version}\n" + "\n".join(map(
169
- lambda x: x["message"],
170
- scan_result_details[rule_name]
171
- )) if isinstance(scan_result_details[rule_name], list) else scan_result_details[rule_name]
172
- key = f"{rule_name}-{text}"
173
- partial_fingerprints = {
174
- f"guarddog/v1/{rule_name}": hashlib.sha256(key.encode('utf-8')).hexdigest()
175
- }
176
- result = get_result(rule_name,
177
- [location],
178
- text,
179
- partial_fingerprints)
180
- results.append(result)
181
-
182
- runs = get_run(results, driver)
183
- log = get_sarif_log([runs])
184
- return json.dumps(log, indent=2)
140
+
141
+ dep_file, region = get_region(
142
+ dependency_files=dependency_files, package=entry["dependency"]
143
+ )
144
+ package_path = dep_file.file_path
145
+ uri = package_path[2:] if package_path.startswith("./") else package_path
146
+ physical_location = get_physical_location(uri, region)
147
+ location = get_location(physical_location)
148
+ scan_result_details = entry["result"]["results"]
149
+ package = entry["dependency"]
150
+ version = entry["version"]
151
+ for rule_name in scan_result_details.keys():
152
+ if (
153
+ scan_result_details[rule_name] is None
154
+ or len(scan_result_details[rule_name]) == 0
155
+ ):
156
+ continue
157
+
158
+ text = (
159
+ f"On package: {package} version: {version}\n"
160
+ + "\n".join(
161
+ map(
162
+ lambda x: f"{x['message']} in file {x['location']}",
163
+ scan_result_details[rule_name],
164
+ )
165
+ )
166
+ if isinstance(scan_result_details[rule_name], list)
167
+ else scan_result_details[rule_name]
168
+ )
169
+ key = f"{rule_name}-{text}"
170
+ partial_fingerprints = {
171
+ f"guarddog/v1/{rule_name}": hashlib.sha256(
172
+ key.encode("utf-8")
173
+ ).hexdigest()
174
+ }
175
+ result = get_result(rule_name, [location], text, partial_fingerprints)
176
+ results.append(result)
177
+
178
+ runs = get_run(results, driver)
179
+ log = get_sarif_log([runs])
180
+
181
+ errors = "\n".join(
182
+ [
183
+ HumanReadableReporter.print_errors(
184
+ identifier=r["dependency"], results=r["result"]
185
+ )
186
+ for r in scan_results
187
+ ]
188
+ )
189
+
190
+ return (json.dumps(log, indent=2), errors)
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  from typing import List, Dict, TypedDict
3
4
  from typing_extensions import NotRequired
4
5
 
@@ -7,6 +8,7 @@ import re
7
8
 
8
9
  from guarddog.scanners.github_action_scanner import GithubActionScanner
9
10
  from guarddog.scanners.scanner import ProjectScanner
11
+ from guarddog.scanners.scanner import Dependency, DependencyVersion
10
12
 
11
13
  log = logging.getLogger("guarddog")
12
14
 
@@ -66,17 +68,40 @@ class GitHubActionDependencyScanner(ProjectScanner):
66
68
  def __init__(self) -> None:
67
69
  super().__init__(GithubActionScanner())
68
70
 
69
- def parse_requirements(self, raw_requirements: str) -> dict[str, set[str]]:
71
+ def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
70
72
  actions = self.parse_workflow_3rd_party_actions(raw_requirements)
73
+ dependencies: List[Dependency] = []
71
74
 
72
- requirements: dict[str, set[str]] = {}
73
75
  for action in actions:
74
- repo, version = action["name"], action["ref"]
75
- if repo in requirements:
76
- requirements[repo].add(version)
77
- else:
78
- requirements[repo] = {version}
79
- return requirements
76
+ name = action["name"]
77
+ version = action["ref"]
78
+ idx = next(
79
+ iter(
80
+ [
81
+ ix
82
+ for ix, line in enumerate(raw_requirements.splitlines())
83
+ if name in line
84
+ ]
85
+ ),
86
+ 0,
87
+ )
88
+ # find the dep with the same name or create a new one
89
+ dep_versions = [DependencyVersion(version=version, location=idx + 1)]
90
+
91
+ dep = next(
92
+ filter(
93
+ lambda d: d.name == name,
94
+ dependencies,
95
+ ),
96
+ None,
97
+ )
98
+ if not dep:
99
+ dep = Dependency(name=name, versions=set())
100
+ dependencies.append(dep)
101
+
102
+ dep.versions.update(dep_versions)
103
+
104
+ return dependencies
80
105
 
81
106
  def parse_workflow_3rd_party_actions(
82
107
  self, workflow_file: str
@@ -99,3 +124,17 @@ class GitHubActionDependencyScanner(ProjectScanner):
99
124
  if action:
100
125
  actions.append(action)
101
126
  return actions
127
+
128
+ def find_requirements(self, directory: str) -> list[str]:
129
+ requirement_files = []
130
+
131
+ if not os.path.isdir(os.path.join(directory, ".git")):
132
+ raise Exception(
133
+ "unable to find github workflows, not called from git directory"
134
+ )
135
+ workflow_folder = os.path.join(directory, ".github/workflows")
136
+ if os.path.isdir(workflow_folder):
137
+ for name in os.listdir(workflow_folder):
138
+ if re.match(r"^(.+)\.y(a)?ml$", name, flags=re.IGNORECASE):
139
+ requirement_files.append(os.path.join(workflow_folder, name))
140
+ return requirement_files
@@ -1,9 +1,12 @@
1
1
  import logging
2
+ import os
3
+ import re
2
4
  from dataclasses import dataclass
3
5
  from typing import List
4
6
 
5
7
  from guarddog.scanners.go_package_scanner import GoModuleScanner
6
8
  from guarddog.scanners.scanner import ProjectScanner
9
+ from guarddog.scanners.scanner import Dependency, DependencyVersion
7
10
 
8
11
  log = logging.getLogger("guarddog")
9
12
 
@@ -26,13 +29,39 @@ class GoDependenciesScanner(ProjectScanner):
26
29
  def __init__(self) -> None:
27
30
  super().__init__(GoModuleScanner())
28
31
 
29
- def parse_requirements(self, raw_requirements: str) -> dict[str, set[str]]:
32
+ def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
30
33
  main_mod = self.parse_go_mod_file(raw_requirements)
31
34
 
32
- return {
33
- requirement.module: set([requirement.version])
34
- for requirement in main_mod.requirements
35
- }
35
+ dependencies: List[Dependency] = []
36
+ for dependency in main_mod.requirements:
37
+ version = dependency.version
38
+ name = dependency.module
39
+ idx = next(
40
+ iter(
41
+ [
42
+ ix
43
+ for ix, line in enumerate(raw_requirements.splitlines())
44
+ if name in line
45
+ ]
46
+ ),
47
+ 0,
48
+ )
49
+
50
+ dep_versions = [DependencyVersion(version=version, location=idx + 1)]
51
+
52
+ dep = next(
53
+ filter(
54
+ lambda d: d.name == name,
55
+ dependencies,
56
+ ),
57
+ None
58
+ )
59
+ if not dep:
60
+ dep = Dependency(name=name, versions=set())
61
+ dependencies.append(dep)
62
+
63
+ dep.versions.update(dep_versions)
64
+ return dependencies
36
65
 
37
66
  # Read https://go.dev/ref/mod#go-mod-file to learn more about the go.mod syntax
38
67
  def parse_go_mod_file(self, go_mod_content: str) -> GoModule:
@@ -66,3 +95,11 @@ class GoDependenciesScanner(ProjectScanner):
66
95
  # TODO: support exclude, replace and retract statements
67
96
 
68
97
  return GoModule(module, go, toolchain, requirements)
98
+
99
+ def find_requirements(self, directory: str) -> list[str]:
100
+ requirement_files = []
101
+ for root, dirs, files in os.walk(directory):
102
+ for name in files:
103
+ if re.match(r"^go\.mod$", name, flags=re.IGNORECASE):
104
+ requirement_files.append(os.path.join(root, name))
105
+ return requirement_files
@@ -1,11 +1,15 @@
1
1
  import json
2
2
  import logging
3
+ import os
4
+ import re
5
+ from typing import List
6
+
3
7
  import requests
4
8
  from semantic_version import NpmSpec, Version # type:ignore
5
9
 
6
- from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
7
10
  from guarddog.scanners.npm_package_scanner import NPMPackageScanner
8
- from guarddog.scanners.scanner import ProjectScanner
11
+ from guarddog.scanners.scanner import Dependency, DependencyVersion, ProjectScanner
12
+ from guarddog.utils.config import VERIFY_EXHAUSTIVE_DEPENDENCIES
9
13
 
10
14
  log = logging.getLogger("guarddog")
11
15
 
@@ -21,7 +25,7 @@ class NPMRequirementsScanner(ProjectScanner):
21
25
  def __init__(self) -> None:
22
26
  super().__init__(NPMPackageScanner())
23
27
 
24
- def parse_requirements(self, raw_requirements: str) -> dict:
28
+ def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
25
29
  """
26
30
  Parses requirements.txt specification and finds all valid
27
31
  versions of each dependency
@@ -40,8 +44,8 @@ class NPMRequirementsScanner(ProjectScanner):
40
44
  }
41
45
  """
42
46
  package = json.loads(raw_requirements)
43
- dependencies = package["dependencies"] if "dependencies" in package else {}
44
- dev_dependencies = (
47
+ dependencies_attr = package["dependencies"] if "dependencies" in package else {}
48
+ dev_dependencies_attr = (
45
49
  package["devDependencies"] if "devDependencies" in package else {}
46
50
  )
47
51
 
@@ -82,23 +86,63 @@ class NPMRequirementsScanner(ProjectScanner):
82
86
  return versions
83
87
 
84
88
  merged = {} # type: dict[str, set[str]]
85
- for package, selector in list(dependencies.items()) + list(
86
- dev_dependencies.items()
89
+ for package, selector in list(dependencies_attr.items()) + list(
90
+ dev_dependencies_attr.items()
87
91
  ):
88
92
  if package not in merged:
89
93
  merged[package] = set()
90
94
  merged[package].add(selector)
91
95
 
92
- results = {}
96
+ dependencies: List[Dependency] = []
93
97
  for package, all_selectors in merged.items():
94
98
  versions = set() # type: set[str]
95
99
  for selector in all_selectors:
96
100
  versions = versions.union(
97
101
  get_matched_versions(find_all_versions(package), selector)
98
102
  )
103
+
99
104
  if len(versions) == 0:
100
105
  log.error(f"Package/Version {package} not on NPM\n")
101
106
  continue
102
107
 
103
- results[package] = versions
104
- return results
108
+ idx = next(
109
+ iter(
110
+ [
111
+ ix
112
+ for ix, line in enumerate(raw_requirements.splitlines())
113
+ if package in line
114
+ ]
115
+ ),
116
+ 0,
117
+ )
118
+
119
+ dep_versions = list(
120
+ map(
121
+ lambda d: DependencyVersion(version=d, location=idx + 1),
122
+ versions,
123
+ )
124
+ )
125
+
126
+ # find the dep with the same name or create a new one
127
+ dep = next(
128
+ filter(
129
+ lambda d: d.name == package,
130
+ dependencies,
131
+ ),
132
+ None,
133
+ )
134
+ if not dep:
135
+ dep = Dependency(name=package, versions=set())
136
+ dependencies.append(dep)
137
+
138
+ dep.versions.update(dep_versions)
139
+
140
+ return dependencies
141
+
142
+ def find_requirements(self, directory: str) -> list[str]:
143
+ requirement_files = []
144
+ for root, dirs, files in os.walk(directory):
145
+ for name in files:
146
+ if re.match(r"^package\.json$", name, flags=re.IGNORECASE):
147
+ requirement_files.append(os.path.join(root, name))
148
+ return requirement_files