owasp-depscan 5.5.0__py3-none-any.whl → 6.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. depscan/__init__.py +8 -0
  2. depscan/cli.py +719 -827
  3. depscan/cli_options.py +302 -0
  4. depscan/lib/audit.py +3 -1
  5. depscan/lib/bom.py +387 -289
  6. depscan/lib/config.py +86 -337
  7. depscan/lib/explainer.py +389 -101
  8. depscan/lib/license.py +11 -10
  9. depscan/lib/logger.py +65 -17
  10. depscan/lib/package_query/__init__.py +0 -0
  11. depscan/lib/package_query/cargo_pkg.py +124 -0
  12. depscan/lib/package_query/metadata.py +170 -0
  13. depscan/lib/package_query/npm_pkg.py +345 -0
  14. depscan/lib/package_query/pkg_query.py +195 -0
  15. depscan/lib/package_query/pypi_pkg.py +113 -0
  16. depscan/lib/tomlparse.py +116 -0
  17. depscan/lib/utils.py +34 -188
  18. owasp_depscan-6.0.0a3.dist-info/METADATA +388 -0
  19. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/RECORD +28 -25
  20. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/WHEEL +1 -1
  21. vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
  22. vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
  23. vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
  24. vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
  25. vendor/spdx/json/licenses.json +904 -677
  26. depscan/lib/analysis.py +0 -1554
  27. depscan/lib/csaf.py +0 -1860
  28. depscan/lib/normalize.py +0 -312
  29. depscan/lib/orasclient.py +0 -142
  30. depscan/lib/pkg_query.py +0 -532
  31. owasp_depscan-5.5.0.dist-info/METADATA +0 -580
  32. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/entry_points.txt +0 -0
  33. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info/licenses}/LICENSE +0 -0
  34. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,116 @@
1
+ """
2
+ Module for parsing command line arguments and TOML configuration files.
3
+
4
+ This module provides a class, `ArgumentParser`, which extends the functionality
5
+ of `argparse.ArgumentParser` by allowing users to specify default values for
6
+ arguments in a TOML file, in addition to the command line.
7
+ """
8
+ # Based on https://github.com/florianmahner/tomlparse/blob/main/tomlparse/argparse.py
9
+ # MIT license
10
+ import argparse
11
+ import os
12
+ from typing import Any, Dict, List, MutableMapping, Optional, Tuple
13
+
14
+ try:
15
+ import tomllib
16
+ except ImportError:
17
+ import tomli as tomllib
18
+
19
+
20
+ class ArgumentParser(argparse.ArgumentParser):
21
+ """A wrapper of the argparse.ArgumentParser class that adds the ability to
22
+ specify the values for arguments using a TOML file.
23
+
24
+ This class extends the functionality of the standard argparse.ArgumentParser by allowing
25
+ users to specify default values for arguments in a TOML file, in addition to the command line.
26
+ We can use all functionalities from the argument parser as usual:
27
+
28
+ Example:
29
+ >>> from depscan.lib.tomlparse import argparse
30
+ >>> parser = argparse.ArgumentParser(description='Example argparse-toml app')
31
+ >>> parser.add_argument('--foo', type=int, help='An example argument')
32
+ >>> args = parser.parse_args()
33
+
34
+ The above code will work as with the standard argparse.ArgumentParser class. We can also
35
+ specify the default values for the arguments in a TOML file. For this the TOML ArgumentParser
36
+ has one additional argument: `--config`. The `--config` argument is used
37
+ to specify the path to the TOML file.
38
+
39
+ We have the following hierarchy of arguments:
40
+ 1. Arguments passed through the command line are selected over TOML
41
+ arguments, even if both are passed
42
+ 2. Arguments from the TOML file are preferred over the default arguments
43
+ """
44
+
45
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
46
+ super().__init__(*args, **kwargs)
47
+ default_config = os.path.join(os.getcwd(), ".config", "depscan.toml")
48
+ self.add_argument("--config", help="Path to the configuration file. Default: $PWD/.config/depscan.toml",
49
+ default=os.getenv("DEPSCAN_CONFIG", default_config))
50
+
51
+ def extract_args(
52
+ self, args: Optional[List[str]] = None, namespace: Optional[object] = None
53
+ ) -> Tuple[argparse.Namespace, argparse.Namespace]:
54
+ """Find the default arguments of the argument parser if any and the
55
+ ones that are passed through the command line"""
56
+ default_args = super().parse_args([])
57
+ cmdl_args = super().parse_args(args, namespace)
58
+
59
+ return default_args, cmdl_args
60
+
61
+ def find_changed_args(
62
+ self, default_args: argparse.Namespace, sys_args: argparse.Namespace
63
+ ) -> List[str]:
64
+ """Find the arguments that have been changed from the command
65
+ line to replace the .toml arguments"""
66
+ default_dict = vars(default_args)
67
+ sys_dict = vars(sys_args)
68
+ changed_dict = []
69
+ for key, value in default_dict.items():
70
+ sys_value = sys_dict[key]
71
+ if sys_value != value:
72
+ changed_dict.append(key)
73
+ return changed_dict
74
+
75
+ def load_toml(self, path: str) -> MutableMapping[str, Any]:
76
+ try:
77
+ with open(path, "rb") as f:
78
+ config = tomllib.load(f)
79
+ except FileNotFoundError:
80
+ self.error(f'Configuration file "{path}" doesn\'t exist')
81
+ return config
82
+
83
+ def remove_nested_keys(self, dictionary: Dict[str, Any]) -> Dict[str, Any]:
84
+ new_dict = {}
85
+ for key, value in dictionary.items():
86
+ if not isinstance(value, dict):
87
+ new_dict[key] = value
88
+ return new_dict
89
+
90
+ def parse_args(
91
+ self, args: Optional[List[str]] = None, namespace: Optional[object] = None
92
+ ) -> argparse.Namespace:
93
+ """Parse the arguments from the command line and the TOML file
94
+ and return the updated arguments. Same functionality as the
95
+ `argparse.ArgumentParser.parse_args` method."""
96
+ default_args, sys_args = self.extract_args(args, namespace)
97
+ config = sys_args.config
98
+ # These are the default arguments options updated by the command line
99
+ if not config or not os.path.exists(config):
100
+ return sys_args
101
+
102
+ # If a config file is passed, update the cmdl args with the config file unless
103
+ # the argument is already specified in the command line
104
+ toml_data = self.load_toml(config)
105
+ changed_args = self.find_changed_args(default_args, sys_args)
106
+ toml_args = self.remove_nested_keys(toml_data)
107
+
108
+ # Replaced unchanged command line arguments with arguments from
109
+ # the TOML file.
110
+ for key, value in toml_args.items():
111
+ if key not in changed_args:
112
+ setattr(sys_args, key, value)
113
+ # Support both hyphen and underscore representations
114
+ setattr(sys_args, key.replace("-", "_"), value)
115
+
116
+ return sys_args
depscan/lib/utils.py CHANGED
@@ -1,19 +1,16 @@
1
1
  import ast
2
- import json
3
2
  import os
4
3
  import re
5
4
  import shutil
6
- from collections import defaultdict
7
5
  from datetime import datetime
8
- from importlib.metadata import distribution
9
6
 
7
+ from custom_json_diff.lib.utils import file_read, file_write, json_load
10
8
  from jinja2 import Environment
11
- from vdb.lib import db as db_lib
12
- from vdb.lib.utils import version_compare
13
9
 
14
- from depscan.lib import config, normalize
10
+ from depscan.lib.config import ignore_directories
11
+ from depscan.lib.logger import LOG
15
12
 
16
- lic_symbol_regex = re.compile(r"[(),]")
13
+ LIC_SYMBOL_REGEX = re.compile(r"[(),]")
17
14
 
18
15
 
19
16
  def filter_ignored_dirs(dirs):
@@ -26,7 +23,7 @@ def filter_ignored_dirs(dirs):
26
23
  [
27
24
  dirs.remove(d)
28
25
  for d in list(dirs)
29
- if d.lower() in config.ignore_directories or d.startswith(".")
26
+ if d.lower() in ignore_directories or d.startswith(".")
30
27
  ]
31
28
  return dirs
32
29
 
@@ -49,24 +46,22 @@ def find_python_reqfiles(path):
49
46
  ]
50
47
  for root, dirs, files in os.walk(path):
51
48
  filter_ignored_dirs(dirs)
52
- for name in req_files:
53
- if name in files:
54
- result.append(os.path.join(root, name))
49
+ result.extend(os.path.join(root, name) for name in req_files if name in files)
55
50
  return result
56
51
 
57
52
 
58
- def find_files(src, src_ext_name, quick=False, filter=True):
53
+ def find_files(src, src_ext_name, quick=False, filter_dirs=True):
59
54
  """
60
55
  Method to find files with given extension
61
56
 
62
57
  :param src: source directory to search
63
58
  :param src_ext_name: type of source file
64
59
  :param quick: only return first match found
65
- :param filter: filter out ignored directories
60
+ :param filter_dirs: filter out ignored directories
66
61
  """
67
62
  result = []
68
63
  for root, dirs, files in os.walk(src):
69
- if filter:
64
+ if filter_dirs:
70
65
  filter_ignored_dirs(dirs)
71
66
  for file in files:
72
67
  if file == src_ext_name or file.endswith(src_ext_name):
@@ -80,9 +75,7 @@ def is_binary_string(content):
80
75
  """
81
76
  Method to check if the given content is a binary string
82
77
  """
83
- textchars = bytearray(
84
- {7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F}
85
- )
78
+ textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
86
79
  return bool(content.translate(None, textchars))
87
80
 
88
81
 
@@ -171,7 +164,7 @@ def detect_project_type(src_dir):
171
164
  os.path.join(src_dir, ".github", "workflows"),
172
165
  ".yml",
173
166
  quick=True,
174
- filter=False,
167
+ filter_dirs=False,
175
168
  ):
176
169
  project_types.append("github")
177
170
  # jars
@@ -187,125 +180,6 @@ def detect_project_type(src_dir):
187
180
  return project_types
188
181
 
189
182
 
190
- def get_pkg_vendor_name(pkg):
191
- """
192
- Method to extract vendor and name information from package. If vendor
193
- information is not available package url is used to extract the package
194
- registry provider such as pypi, maven
195
-
196
- :param pkg: a dictionary representing a package
197
- :return: vendor and name as a tuple
198
- """
199
- vendor = pkg.get("vendor")
200
- if not vendor:
201
- purl = pkg.get("purl")
202
- if purl:
203
- purl_parts = purl.split("/")
204
- if purl_parts:
205
- vendor = purl_parts[0].replace("pkg:", "")
206
- else:
207
- vendor = ""
208
- name = pkg.get("name")
209
- return vendor, name
210
-
211
-
212
- def search_pkgs(db, project_type, pkg_list):
213
- """
214
- Method to search packages in our vulnerability database
215
-
216
- :param db: DB instance
217
- :param project_type: Project type
218
- :param pkg_list: List of packages to search
219
- :returns: raw_results, pkg_aliases, purl_aliases
220
- """
221
- expanded_list = []
222
- # The challenge we have is to broaden our search and create several
223
- # variations of the package and vendor names to perform a broad search.
224
- # We then have to map the results back to the original package names and
225
- # package urls.
226
- pkg_aliases = defaultdict(list)
227
- purl_aliases = {}
228
- for pkg in pkg_list:
229
- variations = normalize.create_pkg_variations(pkg)
230
- if variations:
231
- expanded_list += variations
232
- vendor, name = get_pkg_vendor_name(pkg)
233
- version = pkg.get("version")
234
- if pkg.get("purl"):
235
- ppurl = pkg.get("purl")
236
- purl_aliases[pkg.get("purl")] = pkg.get("purl")
237
- purl_aliases[f"{vendor.lower()}:{name.lower()}:{version}"] = ppurl
238
- if ppurl.startswith("pkg:npm"):
239
- purl_aliases[f"npm:{vendor.lower()}/{name.lower()}:{version}"] = ppurl
240
- if not purl_aliases.get(f"{vendor.lower()}:{name.lower()}"):
241
- purl_aliases[f"{vendor.lower()}:{name.lower()}"] = ppurl
242
- if variations:
243
- for vari in variations:
244
- vari_full_pkg = f"""{vari.get("vendor")}:{vari.get("name")}"""
245
- pkg_aliases[
246
- f"{vendor.lower()}:{name.lower()}:{version}"
247
- ].append(vari_full_pkg)
248
- if pkg.get("purl"):
249
- purl_aliases[f"{vari_full_pkg.lower()}:{version}"] = pkg.get("purl")
250
- quick_res = db_lib.bulk_index_search(expanded_list)
251
- raw_results = db_lib.pkg_bulk_search(db, quick_res)
252
- raw_results = normalize.dedup(project_type, raw_results)
253
- pkg_aliases = normalize.dealias_packages(
254
- raw_results,
255
- pkg_aliases=pkg_aliases,
256
- purl_aliases=purl_aliases,
257
- )
258
- return raw_results, pkg_aliases, purl_aliases
259
-
260
-
261
- def get_pkgs_by_scope(pkg_list):
262
- """
263
- Method to return the packages by scope as defined in CycloneDX spec -
264
- required, optional and excluded
265
-
266
- :param pkg_list: List of packages
267
- :return: Dictionary of packages categorized by scope if available. Empty if
268
- no scope information is available
269
- """
270
- scoped_pkgs = {}
271
- for pkg in pkg_list:
272
- if pkg.get("scope"):
273
- vendor, name = get_pkg_vendor_name(pkg)
274
- scope = pkg.get("scope").lower()
275
- if pkg.get("purl"):
276
- scoped_pkgs.setdefault(scope, []).append(pkg.get("purl"))
277
- else:
278
- scoped_pkgs.setdefault(scope, []).append(f"{vendor}:{name}")
279
- return scoped_pkgs
280
-
281
-
282
- def get_scope_from_imports(project_type, pkg_list, all_imports):
283
- """
284
- Method to compute the packages scope defined in CycloneDX spec - required,
285
- optional and excluded
286
-
287
- :param project_type: Project type
288
- :param pkg_list: List of packages
289
- :param all_imports: List of imports detected
290
- :return: Dictionary of packages categorized by scope if available. Empty if
291
- no scope information is available
292
- """
293
- scoped_pkgs = {}
294
- if not pkg_list or not all_imports:
295
- return scoped_pkgs
296
- for pkg in pkg_list:
297
- scope = "optional"
298
- vendor, name = get_pkg_vendor_name(pkg)
299
- if name in all_imports or name.lower().replace("py", "") in all_imports:
300
- scope = "required"
301
- if pkg.get("purl"):
302
- scoped_pkgs.setdefault(scope, []).append(pkg.get("purl"))
303
- else:
304
- scoped_pkgs.setdefault(scope, []).append(f"{vendor}:{name}")
305
- scoped_pkgs[scope].append(f"{project_type}:{name.lower()}")
306
- return scoped_pkgs
307
-
308
-
309
183
  def cleanup_license_string(license_str):
310
184
  """
311
185
  Method to clean up license string by removing problematic symbols and
@@ -322,33 +196,10 @@ def cleanup_license_string(license_str):
322
196
  .replace(" & ", " OR ")
323
197
  .replace("&", " OR ")
324
198
  )
325
- license_str = lic_symbol_regex.sub("", license_str)
199
+ license_str = LIC_SYMBOL_REGEX.sub("", license_str)
326
200
  return license_str.upper()
327
201
 
328
202
 
329
- def max_version(version_list):
330
- """
331
- Method to return the highest version from the list
332
-
333
- :param version_list: single version string or set of versions
334
- :return: max version
335
- """
336
- if isinstance(version_list, str):
337
- return version_list
338
- if isinstance(version_list, set):
339
- version_list = list(version_list)
340
- if len(version_list) == 1:
341
- return version_list[0]
342
- min_ver = "0"
343
- max_ver = version_list[0]
344
- for i, vl in enumerate(version_list):
345
- if not vl:
346
- continue
347
- if not version_compare(vl, min_ver, max_ver):
348
- max_ver = vl
349
- return max_ver
350
-
351
-
352
203
  def get_all_imports(src_dir):
353
204
  """
354
205
  Method to collect all package imports from a python file
@@ -359,9 +210,7 @@ def get_all_imports(src_dir):
359
210
  if not py_files:
360
211
  return import_list
361
212
  for afile in py_files:
362
- with open(os.path.join(afile), "rb", encoding="utf-8") as f:
363
- content = f.read()
364
- parsed = ast.parse(content)
213
+ parsed = ast.parse(file_read(os.path.join(afile), True, log=LOG))
365
214
  for node in ast.walk(parsed):
366
215
  if isinstance(node, ast.Import):
367
216
  for name in node.names:
@@ -379,18 +228,11 @@ def get_all_imports(src_dir):
379
228
  return import_list
380
229
 
381
230
 
382
- def get_version():
383
- """
384
- Returns the version of depscan
385
- """
386
- return distribution("owasp-depscan").version
387
-
388
-
389
231
  def export_pdf(
390
232
  html_file,
391
233
  pdf_file,
392
234
  title="DepScan Analysis",
393
- footer=f'Report generated by OWASP dep-scan at {datetime.now().strftime("%B %d, %Y %H:%M")}',
235
+ footer=f"Report generated by OWASP dep-scan at {datetime.now().strftime('%B %d, %Y %H:%M')}",
394
236
  ):
395
237
  """
396
238
  Method to export html as pdf using pdfkit
@@ -428,31 +270,35 @@ def render_template_report(
428
270
  summary,
429
271
  template_file,
430
272
  result_file,
273
+ depscan_options={},
431
274
  ):
432
275
  """
433
276
  Render the given vdr_file (falling back to bom_file if no vdr was written)
434
277
  and summary dict using the template_file with Jinja, rendered output is written
435
278
  to named result_file in reports directory.
436
279
  """
437
- if vdr_file and os.path.isfile(vdr_file):
438
- with open(vdr_file, "r", encoding="utf-8") as f:
439
- bom = json.load(f)
440
- else:
441
- with open(bom_file, "r", encoding="utf-8") as f:
442
- bom = json.load(f)
443
- with open(template_file, "r", encoding="utf-8") as tmpl_file:
444
- template = tmpl_file.read()
445
- jinja_env = Environment(autoescape=False)
280
+ bom = {}
281
+ if vdr_file:
282
+ bom = json_load(vdr_file, log=LOG)
283
+ if not bom:
284
+ bom = json_load(bom_file, log=LOG)
285
+ template = file_read(template_file, log=LOG)
286
+ jinja_env = Environment(autoescape=True)
446
287
  jinja_tmpl = jinja_env.from_string(template)
447
288
  report_result = jinja_tmpl.render(
448
- metadata=bom.get("metadata", None),
449
- vulnerabilities=bom.get("vulnerabilities", None),
450
- components=bom.get("components", None),
451
- dependencies=bom.get("dependencies", None),
452
- services=bom.get("services", None),
289
+ metadata=bom.get("metadata"),
290
+ vulnerabilities=bom.get("vulnerabilities"),
291
+ components=bom.get("components"),
292
+ dependencies=bom.get("dependencies"),
293
+ services=bom.get("services"),
453
294
  summary=summary,
454
295
  pkg_vulnerabilities=pkg_vulnerabilities,
455
296
  pkg_group_rows=pkg_group_rows,
456
297
  )
457
- with open(result_file, "w", encoding="utf-8") as outfile:
458
- outfile.write(report_result)
298
+ file_write(
299
+ result_file,
300
+ report_result,
301
+ error_msg=f"Failed to export report: {result_file}",
302
+ success_msg=f"Report written to {result_file}.",
303
+ log=LOG,
304
+ )