owasp-depscan 5.5.0__py3-none-any.whl → 6.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- depscan/__init__.py +8 -0
- depscan/cli.py +719 -827
- depscan/cli_options.py +302 -0
- depscan/lib/audit.py +3 -1
- depscan/lib/bom.py +387 -289
- depscan/lib/config.py +86 -337
- depscan/lib/explainer.py +389 -101
- depscan/lib/license.py +11 -10
- depscan/lib/logger.py +65 -17
- depscan/lib/package_query/__init__.py +0 -0
- depscan/lib/package_query/cargo_pkg.py +124 -0
- depscan/lib/package_query/metadata.py +170 -0
- depscan/lib/package_query/npm_pkg.py +345 -0
- depscan/lib/package_query/pkg_query.py +195 -0
- depscan/lib/package_query/pypi_pkg.py +113 -0
- depscan/lib/tomlparse.py +116 -0
- depscan/lib/utils.py +34 -188
- owasp_depscan-6.0.0a3.dist-info/METADATA +388 -0
- {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/RECORD +28 -25
- {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/WHEEL +1 -1
- vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
- vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
- vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
- vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
- vendor/spdx/json/licenses.json +904 -677
- depscan/lib/analysis.py +0 -1554
- depscan/lib/csaf.py +0 -1860
- depscan/lib/normalize.py +0 -312
- depscan/lib/orasclient.py +0 -142
- depscan/lib/pkg_query.py +0 -532
- owasp_depscan-5.5.0.dist-info/METADATA +0 -580
- {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/entry_points.txt +0 -0
- {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info/licenses}/LICENSE +0 -0
- {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/top_level.txt +0 -0
depscan/lib/explainer.py
CHANGED
|
@@ -1,69 +1,197 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import os
|
|
3
2
|
import re
|
|
4
|
-
|
|
3
|
+
import glob
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from custom_json_diff.lib.utils import json_load
|
|
5
6
|
from rich import box
|
|
6
7
|
from rich.markdown import Markdown
|
|
7
8
|
from rich.table import Table
|
|
8
9
|
from rich.tree import Tree
|
|
9
10
|
|
|
10
|
-
from depscan.lib.config import
|
|
11
|
-
|
|
11
|
+
from depscan.lib.config import (
|
|
12
|
+
COMMON_CHECK_TAGS,
|
|
13
|
+
max_purl_per_flow,
|
|
14
|
+
max_reachable_explanations,
|
|
15
|
+
max_purls_reachable_explanations,
|
|
16
|
+
max_source_reachable_explanations,
|
|
17
|
+
max_sink_reachable_explanations,
|
|
18
|
+
)
|
|
19
|
+
from depscan.lib.logger import console, LOG
|
|
12
20
|
|
|
13
21
|
|
|
14
|
-
def explain(
|
|
15
|
-
project_type,
|
|
16
|
-
src_dir,
|
|
17
|
-
reachables_slices_file,
|
|
18
|
-
vdr_file,
|
|
19
|
-
pkg_vulnerabilities,
|
|
20
|
-
pkg_group_rows,
|
|
21
|
-
direct_purls,
|
|
22
|
-
reached_purls,
|
|
23
|
-
):
|
|
22
|
+
def explain(project_type, src_dir, bom_dir, vdr_file, vdr_result, explanation_mode):
|
|
24
23
|
"""
|
|
25
|
-
Explain the analysis and findings
|
|
24
|
+
Explain the analysis and findings based on the explanation mode.
|
|
26
25
|
|
|
27
26
|
:param project_type: Project type
|
|
28
27
|
:param src_dir: Source directory
|
|
29
|
-
:param
|
|
30
|
-
:param vdr_file: VDR file
|
|
31
|
-
:param
|
|
32
|
-
:param
|
|
33
|
-
:param direct_purls: Dict containing packages used directly
|
|
34
|
-
:param reached_purls: Dict containing packages identified via reachables slicing
|
|
28
|
+
:param bom_dir: BOM directory
|
|
29
|
+
:param vdr_file: VDR file
|
|
30
|
+
:param vdr_result: VDR Result
|
|
31
|
+
:param explanation_mode: Explanation mode
|
|
35
32
|
"""
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
33
|
+
pattern_methods = {}
|
|
34
|
+
has_any_explanation = False
|
|
35
|
+
has_any_crypto_flows = False
|
|
36
|
+
slices_files = glob.glob(f"{bom_dir}/**/*reachables.slices.json", recursive=True)
|
|
37
|
+
openapi_spec_files = None
|
|
38
|
+
# Should we explain the endpoints and Code Hotspots
|
|
39
|
+
if explanation_mode in (
|
|
40
|
+
"Endpoints",
|
|
41
|
+
"EndpointsAndReachables",
|
|
40
42
|
):
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
43
|
+
openapi_spec_files = glob.glob(f"{bom_dir}/*openapi*.json", recursive=False)
|
|
44
|
+
if not openapi_spec_files:
|
|
45
|
+
openapi_spec_files = glob.glob(f"{src_dir}/*openapi*.json", recursive=False)
|
|
46
|
+
if openapi_spec_files:
|
|
47
|
+
rsection = Markdown("""## Service Endpoints
|
|
48
|
+
|
|
49
|
+
The following endpoints and code hotspots were identified by depscan. Verify that proper authentication and authorization mechanisms are in place to secure them.""")
|
|
50
|
+
console.print(rsection)
|
|
51
|
+
for ospec in openapi_spec_files:
|
|
52
|
+
pattern_methods = print_endpoints(ospec)
|
|
53
|
+
# Return early for endpoints only explanations
|
|
54
|
+
if explanation_mode in ("Endpoints",):
|
|
55
|
+
return
|
|
56
|
+
section_title = (
|
|
57
|
+
"Non-Reachable Flows"
|
|
58
|
+
if explanation_mode in ("NonReachables",)
|
|
59
|
+
else "Reachable Flows"
|
|
60
|
+
)
|
|
61
|
+
for sf in slices_files:
|
|
62
|
+
if len(slices_files) > 1:
|
|
63
|
+
fn = os.path.basename(sf)
|
|
64
|
+
section_label = f"# Explanations for {sf}"
|
|
65
|
+
if "-" in fn:
|
|
66
|
+
section_label = f"# Explanations for {fn.split('-')[0].upper()}"
|
|
67
|
+
console.print(Markdown(section_label))
|
|
68
|
+
if (reachables_data := json_load(sf, log=LOG)) and reachables_data.get(
|
|
69
|
+
"reachables"
|
|
70
|
+
):
|
|
71
|
+
if explanation_mode in ("NonReachables",):
|
|
46
72
|
rsection = Markdown(
|
|
47
|
-
"""##
|
|
73
|
+
f"""## {section_title}
|
|
48
74
|
|
|
49
|
-
Below are
|
|
75
|
+
Below are several data flows deemed safe and non-reachable. Use the provided tips to confirm this assessment.
|
|
50
76
|
"""
|
|
51
77
|
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
78
|
+
elif pattern_methods:
|
|
79
|
+
rsection = Markdown(
|
|
80
|
+
f"""## {section_title}
|
|
81
|
+
|
|
82
|
+
Below are some reachable flows, including those accessible via endpoints, identified by depscan. Use the generated OpenAPI specification to evaluate these endpoints for vulnerabilities and risk.
|
|
83
|
+
"""
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
rsection = Markdown(
|
|
87
|
+
f"""## {section_title}
|
|
88
|
+
|
|
89
|
+
Below are several data flows identified by depscan, including reachable ones. Use the tips provided to strengthen your application’s security posture.
|
|
90
|
+
"""
|
|
55
91
|
)
|
|
92
|
+
has_explanation, has_crypto_flows, tips = explain_reachables(
|
|
93
|
+
explanation_mode,
|
|
94
|
+
reachables_data,
|
|
95
|
+
project_type,
|
|
96
|
+
vdr_result,
|
|
97
|
+
rsection if not has_any_explanation else None,
|
|
98
|
+
)
|
|
99
|
+
if not has_any_explanation and has_explanation:
|
|
100
|
+
has_any_explanation = True
|
|
101
|
+
if not has_any_crypto_flows and has_crypto_flows:
|
|
102
|
+
has_any_crypto_flows = True
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _track_usage_targets(usage_targets, usages_object):
|
|
106
|
+
for k, v in usages_object.items():
|
|
107
|
+
for file, lines in v.items():
|
|
108
|
+
for l in lines:
|
|
109
|
+
usage_targets.add(f"{file}#{l}")
|
|
56
110
|
|
|
57
111
|
|
|
58
|
-
def
|
|
112
|
+
def print_endpoints(ospec):
|
|
113
|
+
if not ospec:
|
|
114
|
+
return
|
|
115
|
+
paths = json_load(ospec).get("paths") or {}
|
|
116
|
+
pattern_methods = defaultdict(list)
|
|
117
|
+
pattern_usage_targets = defaultdict(set)
|
|
118
|
+
for pattern, path_obj in paths.items():
|
|
119
|
+
usage_targets = set()
|
|
120
|
+
http_method_added = False
|
|
121
|
+
for k, v in path_obj.items():
|
|
122
|
+
if k == "parameters":
|
|
123
|
+
continue
|
|
124
|
+
# Java, JavaScript, Python etc
|
|
125
|
+
if k == "x-atom-usages":
|
|
126
|
+
_track_usage_targets(usage_targets, v)
|
|
127
|
+
continue
|
|
128
|
+
if isinstance(v, dict) and v.get("x-atom-usages"):
|
|
129
|
+
_track_usage_targets(usage_targets, v.get("x-atom-usages"))
|
|
130
|
+
pattern_methods[pattern].append(k)
|
|
131
|
+
http_method_added = True
|
|
132
|
+
pattern_usage_targets[pattern] = usage_targets
|
|
133
|
+
# We see an endpoint, but do not know the HTTP methods.
|
|
134
|
+
# Let's track them as empty
|
|
135
|
+
if not http_method_added and usage_targets:
|
|
136
|
+
pattern_methods[pattern].append("")
|
|
137
|
+
caption = ""
|
|
138
|
+
if pattern_methods:
|
|
139
|
+
caption = f"Identified Endpoints: {len(pattern_methods.keys())}"
|
|
140
|
+
rtable = Table(
|
|
141
|
+
box=box.DOUBLE_EDGE,
|
|
142
|
+
show_lines=True,
|
|
143
|
+
title="Endpoints",
|
|
144
|
+
caption=caption,
|
|
145
|
+
)
|
|
146
|
+
for c in ("URL Pattern", "HTTP Methods", "Code Hotspots"):
|
|
147
|
+
rtable.add_column(header=c, vertical="top")
|
|
148
|
+
for k, v in pattern_methods.items():
|
|
149
|
+
v.sort()
|
|
150
|
+
sorted_areas = list(pattern_usage_targets[k])
|
|
151
|
+
sorted_areas.sort()
|
|
152
|
+
rtable.add_row(k, ("\n".join(v)).upper(), "\n".join(sorted_areas))
|
|
153
|
+
if pattern_methods:
|
|
154
|
+
console.print()
|
|
155
|
+
console.print(rtable)
|
|
156
|
+
return pattern_methods
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def is_cpp_flow(flows):
|
|
160
|
+
if not flows:
|
|
161
|
+
return False
|
|
162
|
+
attempts = 0
|
|
163
|
+
for idx, aflow in enumerate(flows):
|
|
164
|
+
if aflow.get("parentFileName", "").endswith(".c") or aflow.get(
|
|
165
|
+
"parentFileName", ""
|
|
166
|
+
).endswith(".cpp"):
|
|
167
|
+
return True
|
|
168
|
+
attempts += 1
|
|
169
|
+
if attempts > 3:
|
|
170
|
+
return False
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def explain_reachables(
|
|
175
|
+
explanation_mode, reachables, project_type, vdr_result, header_section=None
|
|
176
|
+
):
|
|
59
177
|
""""""
|
|
60
178
|
reachable_explanations = 0
|
|
61
179
|
checked_flows = 0
|
|
180
|
+
has_crypto_flows = False
|
|
181
|
+
purls_reachable_explanations = defaultdict(int)
|
|
182
|
+
source_reachable_explanations = defaultdict(int)
|
|
183
|
+
sink_reachable_explanations = defaultdict(int)
|
|
184
|
+
has_explanation = False
|
|
185
|
+
header_shown = False
|
|
186
|
+
has_cpp_flow = False
|
|
62
187
|
for areach in reachables.get("reachables", []):
|
|
188
|
+
cpp_flow = is_cpp_flow(areach.get("flows"))
|
|
189
|
+
if not has_cpp_flow and cpp_flow:
|
|
190
|
+
has_cpp_flow = True
|
|
63
191
|
if (
|
|
64
192
|
not areach.get("flows")
|
|
65
193
|
or len(areach.get("flows")) < 2
|
|
66
|
-
or not areach.get("purls")
|
|
194
|
+
or (not areach.get("purls") and not cpp_flow)
|
|
67
195
|
):
|
|
68
196
|
continue
|
|
69
197
|
# Focus only on the prioritized list if available
|
|
@@ -74,11 +202,55 @@ def explain_reachables(reachables, pkg_group_rows, project_type):
|
|
|
74
202
|
# is_prioritized = True
|
|
75
203
|
# if not is_prioritized:
|
|
76
204
|
# continue
|
|
77
|
-
|
|
78
|
-
|
|
205
|
+
(
|
|
206
|
+
flow_tree,
|
|
207
|
+
comment,
|
|
208
|
+
source_sink_desc,
|
|
209
|
+
source_code_str,
|
|
210
|
+
sink_code_str,
|
|
211
|
+
has_check_tag,
|
|
212
|
+
is_endpoint_reachable,
|
|
213
|
+
is_crypto_flow,
|
|
214
|
+
) = explain_flows(
|
|
215
|
+
explanation_mode,
|
|
216
|
+
areach.get("flows"),
|
|
217
|
+
areach.get("purls"),
|
|
218
|
+
project_type,
|
|
219
|
+
vdr_result,
|
|
79
220
|
)
|
|
80
|
-
if not source_sink_desc or not flow_tree:
|
|
221
|
+
if not source_sink_desc or not flow_tree or len(flow_tree.children) < 5:
|
|
81
222
|
continue
|
|
223
|
+
# In non-reachables mode, we are not interested in reachable flows.
|
|
224
|
+
if (
|
|
225
|
+
explanation_mode
|
|
226
|
+
and explanation_mode in ("NonReachables",)
|
|
227
|
+
and not has_check_tag
|
|
228
|
+
):
|
|
229
|
+
continue
|
|
230
|
+
if (
|
|
231
|
+
source_code_str
|
|
232
|
+
and source_reachable_explanations[source_code_str] + 1
|
|
233
|
+
> max_source_reachable_explanations
|
|
234
|
+
):
|
|
235
|
+
continue
|
|
236
|
+
if (
|
|
237
|
+
sink_code_str
|
|
238
|
+
and sink_reachable_explanations[sink_code_str] + 1
|
|
239
|
+
> max_sink_reachable_explanations
|
|
240
|
+
):
|
|
241
|
+
continue
|
|
242
|
+
purls_str = ",".join(sorted(areach.get("purls", [])))
|
|
243
|
+
if (
|
|
244
|
+
purls_str
|
|
245
|
+
and purls_reachable_explanations[purls_str] + 1
|
|
246
|
+
> max_purls_reachable_explanations
|
|
247
|
+
):
|
|
248
|
+
continue
|
|
249
|
+
if not has_explanation:
|
|
250
|
+
has_explanation = True
|
|
251
|
+
# Did we find any crypto flows
|
|
252
|
+
if is_crypto_flow and not has_crypto_flows:
|
|
253
|
+
has_crypto_flows = True
|
|
82
254
|
rtable = Table(
|
|
83
255
|
box=box.DOUBLE_EDGE,
|
|
84
256
|
show_lines=True,
|
|
@@ -90,31 +262,80 @@ def explain_reachables(reachables, pkg_group_rows, project_type):
|
|
|
90
262
|
)
|
|
91
263
|
rtable.add_column(header="Flow", vertical="top")
|
|
92
264
|
rtable.add_row(flow_tree)
|
|
265
|
+
# Print the header first in case we haven't
|
|
266
|
+
if not header_shown and header_section:
|
|
267
|
+
console.print()
|
|
268
|
+
console.print(header_section)
|
|
269
|
+
header_shown = True
|
|
93
270
|
console.print()
|
|
94
271
|
console.print(rtable)
|
|
95
272
|
reachable_explanations += 1
|
|
273
|
+
if purls_str:
|
|
274
|
+
purls_reachable_explanations[purls_str] += 1
|
|
275
|
+
if source_code_str:
|
|
276
|
+
source_reachable_explanations[source_code_str] += 1
|
|
277
|
+
if sink_code_str:
|
|
278
|
+
sink_reachable_explanations[sink_code_str] += 1
|
|
96
279
|
if has_check_tag:
|
|
97
280
|
checked_flows += 1
|
|
98
281
|
if reachable_explanations + 1 > max_reachable_explanations:
|
|
99
282
|
break
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
283
|
+
tips = """## Secure Design Tips"""
|
|
284
|
+
if explanation_mode in ("NonReachables",):
|
|
285
|
+
tips += """
|
|
286
|
+
- Automate tests (including fuzzing) to verify validation, sanitization, encoding, and encryption.
|
|
287
|
+
- Align the implementation with the original architecture and threat models to ensure security compliance.
|
|
288
|
+
- Extract reusable methods into a shared library for organization-wide use.
|
|
289
|
+
"""
|
|
290
|
+
elif has_explanation:
|
|
291
|
+
if has_crypto_flows:
|
|
104
292
|
tips += """
|
|
105
|
-
-
|
|
106
|
-
|
|
293
|
+
- Generate a Cryptographic BOM with cdxgen and monitor it in Dependency-Track.
|
|
294
|
+
"""
|
|
295
|
+
elif checked_flows:
|
|
296
|
+
if not has_cpp_flow:
|
|
297
|
+
tips += """
|
|
298
|
+
- Review the validation and sanitization methods used in the application.
|
|
299
|
+
- To enhance the security posture, implement a common validation middleware.
|
|
300
|
+
"""
|
|
301
|
+
else:
|
|
302
|
+
tips += """
|
|
303
|
+
- Continuously fuzz the parser and validation functions with diverse payloads.
|
|
304
|
+
- Generate post-build SBOMs with OWASP blint by building this project for various architecture combinations. Re-run depscan with the `--bom-dir` argument to enhance the analysis.
|
|
305
|
+
"""
|
|
306
|
+
elif purls_reachable_explanations:
|
|
307
|
+
tips += """
|
|
308
|
+
- Consider implementing a common validation and sanitization library to reduce the risk of exploitability.
|
|
107
309
|
"""
|
|
108
310
|
else:
|
|
109
311
|
tips += """
|
|
110
|
-
-
|
|
312
|
+
- Enhance your unit and integration tests to cover the flows listed above.
|
|
313
|
+
- Continuously fuzz the parser and validation functions with diverse payloads.
|
|
111
314
|
"""
|
|
315
|
+
if tips:
|
|
112
316
|
rsection = Markdown(tips)
|
|
113
317
|
console.print(rsection)
|
|
318
|
+
return has_explanation, has_crypto_flows, tips
|
|
114
319
|
|
|
115
320
|
|
|
116
|
-
def flow_to_source_sink(idx, flow, purls, project_type):
|
|
321
|
+
def flow_to_source_sink(idx, flow, purls, project_type, vdr_result):
|
|
117
322
|
""" """
|
|
323
|
+
endpoint_reached_purls = {}
|
|
324
|
+
reached_services = {}
|
|
325
|
+
if vdr_result:
|
|
326
|
+
endpoint_reached_purls = vdr_result.endpoint_reached_purls
|
|
327
|
+
reached_services = vdr_result.reached_services
|
|
328
|
+
is_endpoint_reachable = False
|
|
329
|
+
possible_reachable_service = False
|
|
330
|
+
tags = flow.get("tags", [])
|
|
331
|
+
is_crypto_flow = "crypto" in tags or "crypto-generate" in tags
|
|
332
|
+
method_in_emoji = ":right_arrow_curving_left:"
|
|
333
|
+
for p in purls:
|
|
334
|
+
if endpoint_reached_purls and endpoint_reached_purls.get(p):
|
|
335
|
+
is_endpoint_reachable = True
|
|
336
|
+
method_in_emoji = ":heavy_large_circle: "
|
|
337
|
+
if reached_services and reached_services.get(p):
|
|
338
|
+
possible_reachable_service = True
|
|
118
339
|
source_sink_desc = ""
|
|
119
340
|
param_name = flow.get("name")
|
|
120
341
|
method_str = "method"
|
|
@@ -130,25 +351,27 @@ def flow_to_source_sink(idx, flow, purls, project_type):
|
|
|
130
351
|
if parent_method in ("handleRequest",):
|
|
131
352
|
method_str = f"handler {method_str}"
|
|
132
353
|
elif parent_method in ("__construct", "__init"):
|
|
133
|
-
method_str =
|
|
354
|
+
method_str = "constructor"
|
|
134
355
|
elif project_type in ("php",) and parent_method.startswith("__"):
|
|
135
356
|
method_str = f"magic {method_str}"
|
|
136
357
|
if flow.get("label") == "METHOD_PARAMETER_IN":
|
|
137
358
|
if param_name:
|
|
138
|
-
source_sink_desc = f"""{param_str} [red]{param_name}[/red]
|
|
359
|
+
source_sink_desc = f"""{param_str} [red]{param_name}[/red] {method_in_emoji} to the {method_str} [bold]{parent_method}[/bold]"""
|
|
139
360
|
else:
|
|
140
|
-
source_sink_desc = f"""{method_str.capitalize()} [red]{parent_method}[/red]
|
|
361
|
+
source_sink_desc = f"""{method_str.capitalize()} [red]{parent_method}[/red] {method_in_emoji}"""
|
|
141
362
|
elif flow.get("label") == "CALL" and flow.get("isExternal"):
|
|
142
363
|
method_full_name = flow.get("fullName", "")
|
|
143
364
|
if not method_full_name.startswith("<"):
|
|
144
365
|
source_sink_desc = f"Invocation: {method_full_name}"
|
|
145
366
|
elif flow.get("label") == "RETURN" and flow.get("code"):
|
|
146
367
|
source_sink_desc = flow.get("code").split("\n")[0]
|
|
147
|
-
elif project_type not in ("java") and flow.get("label") == "IDENTIFIER":
|
|
368
|
+
elif project_type not in ("java",) and flow.get("label") == "IDENTIFIER":
|
|
148
369
|
source_sink_desc = flow.get("code").split("\n")[0]
|
|
370
|
+
if source_sink_desc.endswith("("):
|
|
371
|
+
source_sink_desc = f":diamond_suit: {source_sink_desc})"
|
|
149
372
|
# Try to understand the source a bit more
|
|
150
373
|
if source_sink_desc.startswith("require("):
|
|
151
|
-
source_sink_desc = "
|
|
374
|
+
source_sink_desc = "The flow originates from a module import."
|
|
152
375
|
elif (
|
|
153
376
|
".use(" in source_sink_desc
|
|
154
377
|
or ".subscribe(" in source_sink_desc
|
|
@@ -156,21 +379,41 @@ def flow_to_source_sink(idx, flow, purls, project_type):
|
|
|
156
379
|
or ".emit(" in source_sink_desc
|
|
157
380
|
or " => {" in source_sink_desc
|
|
158
381
|
):
|
|
159
|
-
source_sink_desc = "
|
|
382
|
+
source_sink_desc = "The flow originates from a callback function."
|
|
160
383
|
elif (
|
|
161
|
-
"middleware" in source_sink_desc.lower()
|
|
162
|
-
or "route" in source_sink_desc.lower()
|
|
384
|
+
"middleware" in source_sink_desc.lower() or "route" in source_sink_desc.lower()
|
|
163
385
|
):
|
|
164
|
-
source_sink_desc = "
|
|
386
|
+
source_sink_desc = "The flow originates from middleware."
|
|
387
|
+
elif len(purls) == 0:
|
|
388
|
+
if tags:
|
|
389
|
+
source_sink_desc = (
|
|
390
|
+
f"{source_sink_desc} can be used to reach packages with tags `{tags}`"
|
|
391
|
+
)
|
|
165
392
|
elif len(purls) == 1:
|
|
166
|
-
|
|
167
|
-
f"{source_sink_desc} can be used to reach this package."
|
|
168
|
-
|
|
393
|
+
if is_endpoint_reachable:
|
|
394
|
+
source_sink_desc = f"{source_sink_desc} can be used to reach this package from certain endpoints."
|
|
395
|
+
elif source_sink_desc:
|
|
396
|
+
if is_crypto_flow:
|
|
397
|
+
source_sink_desc = "Reachable crypto-flow."
|
|
398
|
+
else:
|
|
399
|
+
source_sink_desc = "Reachable data-flow."
|
|
169
400
|
else:
|
|
170
|
-
|
|
171
|
-
f"{source_sink_desc} can be used to reach {len(purls)} packages."
|
|
172
|
-
|
|
173
|
-
|
|
401
|
+
if is_endpoint_reachable:
|
|
402
|
+
source_sink_desc = f"{source_sink_desc} can be used to reach {len(purls)} packages from certain endpoints."
|
|
403
|
+
else:
|
|
404
|
+
if source_sink_desc:
|
|
405
|
+
source_sink_desc = (
|
|
406
|
+
f"{source_sink_desc} can be used to reach {len(purls)} packages."
|
|
407
|
+
)
|
|
408
|
+
elif is_crypto_flow:
|
|
409
|
+
source_sink_desc = (
|
|
410
|
+
f"{len(purls)} packages reachable from this crypto-flow."
|
|
411
|
+
)
|
|
412
|
+
else:
|
|
413
|
+
source_sink_desc = (
|
|
414
|
+
f"{len(purls)} packages reachable from this data-flow."
|
|
415
|
+
)
|
|
416
|
+
return source_sink_desc, is_endpoint_reachable, is_crypto_flow
|
|
174
417
|
|
|
175
418
|
|
|
176
419
|
def filter_tags(tags):
|
|
@@ -178,14 +421,32 @@ def filter_tags(tags):
|
|
|
178
421
|
tags = [
|
|
179
422
|
atag
|
|
180
423
|
for atag in tags.split(", ")
|
|
181
|
-
if atag
|
|
182
|
-
not in ("RESOLVED_MEMBER", "UNKNOWN_METHOD", "UNKNOWN_TYPE_DECL")
|
|
424
|
+
if atag not in ("RESOLVED_MEMBER", "UNKNOWN_METHOD", "UNKNOWN_TYPE_DECL")
|
|
183
425
|
]
|
|
184
426
|
return ", ".join(tags)
|
|
185
427
|
return tags
|
|
186
428
|
|
|
187
429
|
|
|
188
|
-
def
|
|
430
|
+
def is_filterable_code(project_type, code):
|
|
431
|
+
if len(code) < 5:
|
|
432
|
+
return True
|
|
433
|
+
for c in (
|
|
434
|
+
"console.log",
|
|
435
|
+
"thoughtLog(",
|
|
436
|
+
"_tmp_",
|
|
437
|
+
"LOG.debug(",
|
|
438
|
+
"options.get(",
|
|
439
|
+
"RET",
|
|
440
|
+
"this.",
|
|
441
|
+
"NULL",
|
|
442
|
+
"!",
|
|
443
|
+
):
|
|
444
|
+
if code and code.startswith(c):
|
|
445
|
+
return True
|
|
446
|
+
return False
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def flow_to_str(explanation_mode, flow, project_type):
|
|
189
450
|
""""""
|
|
190
451
|
has_check_tag = False
|
|
191
452
|
file_loc = ""
|
|
@@ -194,60 +455,69 @@ def flow_to_str(flow, project_type):
|
|
|
194
455
|
and flow.get("lineNumber")
|
|
195
456
|
and not flow.get("parentFileName").startswith("unknown")
|
|
196
457
|
):
|
|
197
|
-
file_loc = f
|
|
458
|
+
file_loc = f"{flow.get('parentFileName').replace('src/main/java/', '').replace('src/main/scala/', '')}#{flow.get('lineNumber')} "
|
|
198
459
|
node_desc = flow.get("code").split("\n")[0]
|
|
460
|
+
if node_desc.endswith("("):
|
|
461
|
+
node_desc = f":diamond_suit: {node_desc})"
|
|
462
|
+
elif node_desc.startswith("return "):
|
|
463
|
+
node_desc = f":arrow_backward: [italic]{node_desc}[/italic]"
|
|
199
464
|
tags = filter_tags(flow.get("tags"))
|
|
200
|
-
if flow.get("label")
|
|
465
|
+
if flow.get("label") in ("METHOD_PARAMETER_IN",):
|
|
201
466
|
param_name = flow.get("name")
|
|
202
467
|
if param_name == "this":
|
|
203
468
|
param_name = ""
|
|
204
|
-
node_desc = f
|
|
469
|
+
node_desc = f"{flow.get('parentMethodName')}([red]{param_name}[/red]) :right_arrow_curving_left:"
|
|
205
470
|
if tags:
|
|
206
|
-
node_desc =
|
|
207
|
-
|
|
208
|
-
)
|
|
209
|
-
elif flow.get("label") == "IDENTIFIER":
|
|
471
|
+
node_desc = f"{node_desc}\n[bold]Tags:[/bold] [italic]{tags}[/italic]\n"
|
|
472
|
+
elif flow.get("label") in ("IDENTIFIER", "CALL"):
|
|
210
473
|
if node_desc.startswith("<"):
|
|
211
474
|
node_desc = flow.get("name")
|
|
475
|
+
if flow.get("isExternal"):
|
|
476
|
+
node_desc = f"{node_desc} :right_arrow_curving_up:"
|
|
212
477
|
if tags:
|
|
213
|
-
node_desc =
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if tags:
|
|
217
|
-
for ctag in (
|
|
218
|
-
"validation",
|
|
219
|
-
"encode",
|
|
220
|
-
"encrypt",
|
|
221
|
-
"sanitize",
|
|
222
|
-
"authentication",
|
|
223
|
-
"authorization",
|
|
224
|
-
):
|
|
478
|
+
node_desc = f"{node_desc}\n[bold]Tags:[/bold] [italic]{tags}[/italic]\n"
|
|
479
|
+
if tags and not is_filterable_code(project_type, node_desc):
|
|
480
|
+
for ctag in COMMON_CHECK_TAGS:
|
|
225
481
|
if ctag in tags:
|
|
226
482
|
has_check_tag = True
|
|
227
483
|
break
|
|
228
484
|
if has_check_tag:
|
|
229
|
-
|
|
485
|
+
if explanation_mode in ("NonReachables",):
|
|
486
|
+
node_desc = f"[bold][green]{node_desc}[/green][/bold]"
|
|
487
|
+
else:
|
|
488
|
+
node_desc = f"[green]{node_desc}[/green]"
|
|
489
|
+
flow_str = (
|
|
490
|
+
f"""[gray37]{file_loc}[/gray37]{node_desc}"""
|
|
491
|
+
if not is_filterable_code(project_type, node_desc)
|
|
492
|
+
else ""
|
|
493
|
+
)
|
|
230
494
|
return (
|
|
231
495
|
file_loc,
|
|
232
|
-
|
|
496
|
+
flow_str,
|
|
497
|
+
node_desc,
|
|
233
498
|
has_check_tag,
|
|
234
499
|
)
|
|
235
500
|
|
|
236
501
|
|
|
237
|
-
def explain_flows(flows, purls, project_type):
|
|
502
|
+
def explain_flows(explanation_mode, flows, purls, project_type, vdr_result):
|
|
238
503
|
""""""
|
|
239
504
|
tree = None
|
|
240
505
|
comments = []
|
|
241
506
|
if len(purls) > max_purl_per_flow:
|
|
242
507
|
comments.append(
|
|
243
|
-
":exclamation_mark: Refactor this flow to
|
|
508
|
+
":exclamation_mark: Refactor this flow to minimize the use of external libraries."
|
|
244
509
|
)
|
|
245
|
-
|
|
246
|
-
|
|
510
|
+
if purls:
|
|
511
|
+
purls_str = "\n".join(purls)
|
|
512
|
+
comments.append(f"[info]Reachable Packages:[/info]\n{purls_str}")
|
|
247
513
|
added_flows = []
|
|
514
|
+
added_node_desc = []
|
|
248
515
|
has_check_tag = False
|
|
249
516
|
last_file_loc = None
|
|
250
517
|
source_sink_desc = ""
|
|
518
|
+
last_code = ""
|
|
519
|
+
source_code_str = ""
|
|
520
|
+
sink_code_str = ""
|
|
251
521
|
for idx, aflow in enumerate(flows):
|
|
252
522
|
# For java, we are only interested in identifiers with tags to keep the flows simple to understand
|
|
253
523
|
if (
|
|
@@ -256,28 +526,46 @@ def explain_flows(flows, purls, project_type):
|
|
|
256
526
|
and not aflow.get("tags")
|
|
257
527
|
):
|
|
258
528
|
continue
|
|
529
|
+
curr_code = aflow.get("code", "").split("\n")[0]
|
|
530
|
+
if idx == 0:
|
|
531
|
+
source_code_str = curr_code
|
|
532
|
+
if idx == len(flows):
|
|
533
|
+
sink_code_str = curr_code
|
|
534
|
+
if last_code and last_code == curr_code:
|
|
535
|
+
continue
|
|
536
|
+
last_code = curr_code
|
|
259
537
|
if not source_sink_desc:
|
|
260
|
-
source_sink_desc =
|
|
261
|
-
idx, aflow, purls, project_type
|
|
538
|
+
source_sink_desc, is_endpoint_reachable, is_crypto_flow = (
|
|
539
|
+
flow_to_source_sink(idx, aflow, purls, project_type, vdr_result)
|
|
262
540
|
)
|
|
263
|
-
file_loc, flow_str, has_check_tag_flow = flow_to_str(
|
|
264
|
-
aflow, project_type
|
|
541
|
+
file_loc, flow_str, node_desc, has_check_tag_flow = flow_to_str(
|
|
542
|
+
explanation_mode, aflow, project_type
|
|
265
543
|
)
|
|
266
|
-
if last_file_loc == file_loc:
|
|
544
|
+
if not flow_str or (last_file_loc and last_file_loc == file_loc):
|
|
267
545
|
continue
|
|
268
546
|
last_file_loc = file_loc
|
|
269
|
-
if flow_str in added_flows:
|
|
547
|
+
if flow_str in added_flows or node_desc in added_node_desc:
|
|
270
548
|
continue
|
|
271
549
|
added_flows.append(flow_str)
|
|
550
|
+
added_node_desc.append(node_desc)
|
|
272
551
|
if not tree:
|
|
273
552
|
tree = Tree(flow_str)
|
|
274
553
|
else:
|
|
275
554
|
tree.add(flow_str)
|
|
276
555
|
if has_check_tag_flow:
|
|
277
556
|
has_check_tag = True
|
|
278
|
-
if has_check_tag:
|
|
557
|
+
if has_check_tag and explanation_mode not in ("NonReachables",):
|
|
279
558
|
comments.insert(
|
|
280
559
|
0,
|
|
281
|
-
":white_medium_small_square:
|
|
560
|
+
":white_medium_small_square: Verify that the mitigation(s) used in this flow are valid and appropriate for your security requirements.",
|
|
282
561
|
)
|
|
283
|
-
return
|
|
562
|
+
return (
|
|
563
|
+
tree,
|
|
564
|
+
"\n".join(comments),
|
|
565
|
+
source_sink_desc,
|
|
566
|
+
source_code_str,
|
|
567
|
+
sink_code_str,
|
|
568
|
+
has_check_tag,
|
|
569
|
+
is_endpoint_reachable,
|
|
570
|
+
is_crypto_flow,
|
|
571
|
+
)
|