scanoss 1.14.0__py3-none-any.whl → 1.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scanoss/results.py ADDED
@@ -0,0 +1,301 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2024, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import json
26
+ from typing import Any, Dict, List
27
+
28
+ from .scanossbase import ScanossBase
29
+
30
+ MATCH_TYPES = ["file", "snippet"]
31
+ STATUSES = ["pending", "identified"]
32
+
33
+
34
+ AVAILABLE_FILTER_VALUES = {
35
+ "match_type": [e for e in MATCH_TYPES],
36
+ "status": [e for e in STATUSES],
37
+ }
38
+
39
+
40
+ ARG_TO_FILTER_MAP = {
41
+ "match_type": "id",
42
+ "status": "status",
43
+ }
44
+
45
+ PENDING_IDENTIFICATION_FILTERS = {
46
+ "match_type": ["file", "snippet"],
47
+ "status": ["pending"],
48
+ }
49
+
50
+ AVAILABLE_OUTPUT_FORMATS = ["json", "plain"]
51
+
52
+
53
+ class Results(ScanossBase):
54
+ """
55
+ SCANOSS Results class \n
56
+ Handles the parsing and filtering of the scan results
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ debug: bool = False,
62
+ trace: bool = False,
63
+ quiet: bool = False,
64
+ filepath: str = None,
65
+ match_type: str = None,
66
+ status: str = None,
67
+ output_file: str = None,
68
+ output_format: str = None,
69
+ ):
70
+ """Initialise the Results class
71
+
72
+ Args:
73
+ debug (bool, optional): Debug. Defaults to False.
74
+ trace (bool, optional): Trace. Defaults to False.
75
+ quiet (bool, optional): Quiet. Defaults to False.
76
+ filepath (str, optional): Path to the scan results file. Defaults to None.
77
+ match_type (str, optional): Comma separated match type filters. Defaults to None.
78
+ status (str, optional): Comma separated status filters. Defaults to None.
79
+ output_file (str, optional): Path to the output file. Defaults to None.
80
+ output_format (str, optional): Output format. Defaults to None.
81
+ """
82
+
83
+ super().__init__(debug, trace, quiet)
84
+ self.data = self._load_and_transform(filepath)
85
+ self.filters = self._load_filters(match_type=match_type, status=status)
86
+ self.output_file = output_file
87
+ self.output_format = output_format
88
+
89
+ def _load_file(self, file: str) -> Dict[str, Any]:
90
+ """Load the JSON file
91
+
92
+ Args:
93
+ file (str): Path to the JSON file
94
+
95
+ Returns:
96
+ Dict[str, Any]: The parsed JSON data
97
+ """
98
+ with open(file, "r") as jsonfile:
99
+ try:
100
+ return json.load(jsonfile)
101
+ except Exception as e:
102
+ self.print_stderr(f"ERROR: Problem parsing input JSON: {e}")
103
+
104
+ def _load_and_transform(self, file: str) -> List[Dict[str, Any]]:
105
+ """
106
+ Load the file and transform the data into a list of dictionaries with the filename and the file data
107
+ """
108
+
109
+ raw_data = self._load_file(file)
110
+ return self._transform_data(raw_data)
111
+
112
+ @staticmethod
113
+ def _transform_data(data: dict) -> list:
114
+ """Transform the data into a list of dictionaries with the filename and the file data
115
+
116
+ Args:
117
+ data (dict): The raw data
118
+
119
+ Returns:
120
+ list: The transformed data
121
+ """
122
+ result = []
123
+ for filename, file_data in data.items():
124
+ if file_data:
125
+ file_obj = {'filename': filename}
126
+ file_obj.update(file_data[0])
127
+ result.append(file_obj)
128
+ return result
129
+
130
+ def _load_filters(self, **kwargs):
131
+ """Extract and parse the filters
132
+
133
+ Returns:
134
+ dict: Parsed filters
135
+ """
136
+ filters = {}
137
+
138
+ for key, value in kwargs.items():
139
+ if value:
140
+ filters[key] = self._extract_comma_separated_values(value)
141
+
142
+ return filters
143
+
144
+ @staticmethod
145
+ def _extract_comma_separated_values(values: str):
146
+ return [value.strip() for value in values.split(",")]
147
+
148
+ def apply_filters(self):
149
+ """Apply the filters to the data"""
150
+ filtered_data = []
151
+ for item in self.data:
152
+ if self._item_matches_filters(item):
153
+ filtered_data.append(item)
154
+ self.data = filtered_data
155
+
156
+ return self
157
+
158
+ def _item_matches_filters(self, item):
159
+ for filter_key, filter_values in self.filters.items():
160
+ if not filter_values:
161
+ continue
162
+
163
+ self._validate_filter_values(filter_key, filter_values)
164
+
165
+ item_value = item.get(ARG_TO_FILTER_MAP[filter_key])
166
+ if isinstance(filter_values, list):
167
+ if item_value not in filter_values:
168
+ return False
169
+ elif item_value != filter_values:
170
+ return False
171
+ return True
172
+
173
+ @staticmethod
174
+ def _validate_filter_values(filter_key: str, filter_value: List[str]):
175
+ if any(
176
+ value not in AVAILABLE_FILTER_VALUES.get(filter_key, [])
177
+ for value in filter_value
178
+ ):
179
+ valid_values = ", ".join(AVAILABLE_FILTER_VALUES.get(filter_key, []))
180
+ raise Exception(
181
+ f"ERROR: Invalid filter value '{filter_value}' for filter '{filter_key.value}'. "
182
+ f"Valid values are: {valid_values}"
183
+ )
184
+
185
+ def get_pending_identifications(self):
186
+ """Get files with 'pending' status and 'file' or 'snippet' match type"""
187
+ self.filters = PENDING_IDENTIFICATION_FILTERS
188
+ self.apply_filters()
189
+
190
+ return self
191
+
192
+ def has_results(self):
193
+ return bool(self.data)
194
+
195
+ def present(self, output_format: str = None, output_file: str = None):
196
+ """Format and present the results. If no output format is provided, the results will be printed to stdout
197
+
198
+ Args:
199
+ output_format (str, optional): Output format. Defaults to None.
200
+ output_file (str, optional): Output file. Defaults to None.
201
+
202
+ Raises:
203
+ Exception: Invalid output format
204
+
205
+ Returns:
206
+ None
207
+ """
208
+ file_path = output_file or self.output_file
209
+ fmt = output_format or self.output_format
210
+
211
+ if fmt and fmt not in AVAILABLE_OUTPUT_FORMATS:
212
+ raise Exception(
213
+ f"ERROR: Invalid output format '{output_format}'. Valid values are: {', '.join(AVAILABLE_OUTPUT_FORMATS)}"
214
+ )
215
+
216
+ if fmt == 'json':
217
+ return self._present_json(file_path)
218
+ elif fmt == 'plain':
219
+ return self._present_plain(file_path)
220
+ else:
221
+ return self._present_stdout()
222
+
223
+ def _present_json(self, file: str = None):
224
+ """Present the results in JSON format
225
+
226
+ Args:
227
+ file (str, optional): Output file. Defaults to None.
228
+ """
229
+ self.print_to_file_or_stdout(
230
+ json.dumps(self._format_json_output(), indent=2), file
231
+ )
232
+
233
+ def _format_json_output(self):
234
+ """
235
+ Format the output data into a JSON object
236
+ """
237
+
238
+ formatted_data = []
239
+ for item in self.data:
240
+ formatted_data.append(
241
+ {
242
+ 'file': item.get('filename'),
243
+ 'status': item.get('status', "N/A"),
244
+ 'match_type': item['id'],
245
+ 'matched': item.get('matched', "N/A"),
246
+ 'purl': (item.get('purl')[0] if item.get('purl') else "N/A"),
247
+ 'license': (
248
+ item.get('licenses')[0].get('name', "N/A")
249
+ if item.get('licenses')
250
+ else "N/A"
251
+ ),
252
+ }
253
+ )
254
+ return {'results': formatted_data, 'total': len(formatted_data)}
255
+
256
+ def _present_plain(self, file: str = None):
257
+ """Present the results in plain text format
258
+
259
+ Args:
260
+ file (str, optional): Output file. Defaults to None.
261
+
262
+ Returns:
263
+ None
264
+ """
265
+ if not self.data:
266
+ return self.print_stderr("No results to present")
267
+ self.print_to_file_or_stdout(self._format_plain_output(), file)
268
+
269
+ def _present_stdout(self):
270
+ """Present the results to stdout
271
+
272
+ Returns:
273
+ None
274
+ """
275
+ if not self.data:
276
+ return self.print_stderr("No results to present")
277
+ self.print_to_file_or_stdout(self._format_plain_output())
278
+
279
+ def _format_plain_output(self):
280
+ """
281
+ Format the output data into a plain text string
282
+ """
283
+
284
+ formatted = ""
285
+ for item in self.data:
286
+ formatted += f"{self._format_plain_output_item(item)} \n"
287
+ return formatted
288
+
289
+ @staticmethod
290
+ def _format_plain_output_item(item):
291
+ purls = item.get('purl', [])
292
+ licenses = item.get('licenses', [])
293
+
294
+ return (
295
+ f"File: {item.get('filename')}\n"
296
+ f"Match type: {item.get('id')}\n"
297
+ f"Status: {item.get('status', 'N/A')}\n"
298
+ f"Matched: {item.get('matched', 'N/A')}\n"
299
+ f"Purl: {purls[0] if purls else 'N/A'}\n"
300
+ f"License: {licenses[0].get('name', 'N/A') if licenses else 'N/A'}\n"
301
+ )
scanoss/scancodedeps.py CHANGED
@@ -59,6 +59,7 @@ class ScancodeDeps(ScanossBase):
59
59
  else:
60
60
  print(string)
61
61
 
62
+
62
63
  def remove_interim_file(self, output_file: str = None):
63
64
  """
64
65
  Remove the temporary Scancode interim file
@@ -105,15 +106,17 @@ class ScancodeDeps(ScanossBase):
105
106
  continue
106
107
  self.print_debug(f'Path: {f_path}, Packages: {len(f_packages)}')
107
108
  purls = []
109
+ scopes = []
108
110
  for pkgs in f_packages:
109
111
  pk_deps = pkgs.get('dependencies')
112
+
110
113
  if not pk_deps or pk_deps == '':
111
114
  continue
112
- self.print_debug(f'Path: {f_path}, Dependencies: {len(pk_deps)}')
113
115
  for d in pk_deps:
114
116
  dp = d.get('purl')
115
117
  if not dp or dp == '':
116
118
  continue
119
+
117
120
  dp = dp.replace('"', '').replace('%22', '') # remove unwanted quotes on purls
118
121
  dp_data = {'purl': dp}
119
122
  rq = d.get('extracted_requirement') # scancode format 2.0
@@ -122,15 +125,21 @@ class ScancodeDeps(ScanossBase):
122
125
  # skip requirement if it ends with the purl (i.e. exact version) or if it's local (file)
123
126
  if rq and rq != '' and not dp.endswith(rq) and not rq.startswith('file:'):
124
127
  dp_data['requirement'] = rq
128
+
129
+ # Gets dependency scope
130
+ scope = d.get('scope')
131
+ if scope and scope != '':
132
+ dp_data['scope'] = scope
133
+
125
134
  purls.append(dp_data)
126
- # self.print_stderr(f'Path: {f_path}, Purls: {purls}')
135
+ # end for loop
136
+
127
137
  if len(purls) > 0:
128
138
  files.append({'file': f_path, 'purls': purls})
129
139
  # End packages
130
140
  # End file details
131
141
  # End dependencies json
132
142
  deps = {'files': files}
133
- # self.print_debug(f'Dep Data: {deps}')
134
143
  return deps
135
144
 
136
145
  def produce_from_file(self, json_file: str = None) -> json:
@@ -179,6 +188,7 @@ class ScancodeDeps(ScanossBase):
179
188
  return False
180
189
  self.print_msg('Producing summary...')
181
190
  deps = self.produce_from_file(output_file)
191
+ deps = self.__remove_dep_scope(deps)
182
192
  self.remove_interim_file(output_file)
183
193
  if not deps:
184
194
  return False
@@ -235,6 +245,22 @@ class ScancodeDeps(ScanossBase):
235
245
  self.print_stderr(f'ERROR: Problem loading input JSON: {e}')
236
246
  return None
237
247
 
248
+
249
+ @staticmethod
250
+ def __remove_dep_scope(deps: json)->json:
251
+ """
252
+ :param deps: dependencies with scopes
253
+ :return dependencies without scopes
254
+ """
255
+ files = deps.get("files")
256
+ for file in files:
257
+ if 'purls' in file:
258
+ purls = file.get("purls")
259
+ for purl in purls:
260
+ purl.pop("scope",None)
261
+
262
+ return {"files": files }
263
+
238
264
  #
239
265
  # End of ScancodeDeps Class
240
266
  #
scanoss/scanner.py CHANGED
@@ -37,15 +37,17 @@ from .spdxlite import SpdxLite
37
37
  from .csvoutput import CsvOutput
38
38
  from .threadedscanning import ThreadedScanning
39
39
  from .scancodedeps import ScancodeDeps
40
- from .threadeddependencies import ThreadedDependencies
40
+ from .threadeddependencies import ThreadedDependencies, SCOPE
41
41
  from .scanossgrpc import ScanossGrpc
42
42
  from .scantype import ScanType
43
43
  from .scanossbase import ScanossBase
44
+ from .scanoss_settings import ScanossSettings
45
+ from .scanpostprocessor import ScanPostProcessor
44
46
  from . import __version__
45
47
 
46
48
  FAST_WINNOWING = False
47
49
  try:
48
- from scanoss_winnowing.winnowing import Winnowing
50
+ from .winnowing import Winnowing
49
51
 
50
52
  FAST_WINNOWING = True
51
53
  except ModuleNotFoundError or ImportError:
@@ -95,17 +97,18 @@ class Scanner(ScanossBase):
95
97
 
96
98
  def __init__(self, wfp: str = None, scan_output: str = None, output_format: str = 'plain',
97
99
  debug: bool = False, trace: bool = False, quiet: bool = False, api_key: str = None, url: str = None,
98
- sbom_path: str = None, scan_type: str = None, flags: str = None, nb_threads: int = 5,
100
+ flags: str = None, nb_threads: int = 5,
99
101
  post_size: int = 32, timeout: int = 180, no_wfp_file: bool = False,
100
102
  all_extensions: bool = False, all_folders: bool = False, hidden_files_folders: bool = False,
101
103
  scan_options: int = 7, sc_timeout: int = 600, sc_command: str = None, grpc_url: str = None,
102
104
  obfuscate: bool = False, ignore_cert_errors: bool = False, proxy: str = None, grpc_proxy: str = None,
103
105
  ca_cert: str = None, pac: PACFile = None, retry: int = 5, hpsm: bool = False,
104
106
  skip_size: int = 0, skip_extensions=None, skip_folders=None,
105
- strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None
107
+ strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None,
108
+ scan_settings: ScanossSettings = None
106
109
  ):
107
110
  """
108
- Initialise scanning class, including Winnowing, ScanossApi and ThreadedScanning
111
+ Initialise scanning class, including Winnowing, ScanossApi, ThreadedScanning
109
112
  """
110
113
  super().__init__(debug, trace, quiet)
111
114
  if skip_folders is None:
@@ -133,7 +136,7 @@ class Scanner(ScanossBase):
133
136
  skip_md5_ids=skip_md5_ids
134
137
  )
135
138
  self.scanoss_api = ScanossApi(debug=debug, trace=trace, quiet=quiet, api_key=api_key, url=url,
136
- sbom_path=sbom_path, scan_type=scan_type, flags=flags, timeout=timeout,
139
+ flags=flags, timeout=timeout,
137
140
  ver_details=ver_details, ignore_cert_errors=ignore_cert_errors,
138
141
  proxy=proxy, ca_cert=ca_cert, pac=pac, retry=retry
139
142
  )
@@ -157,6 +160,16 @@ class Scanner(ScanossBase):
157
160
  if skip_extensions: # Append extra file extensions to skip
158
161
  self.skip_extensions.extend(skip_extensions)
159
162
 
163
+ if scan_settings:
164
+ self.scan_settings = scan_settings
165
+ self.post_processor = ScanPostProcessor(scan_settings, debug=debug, trace=trace, quiet=quiet)
166
+ self._maybe_set_api_sbom()
167
+
168
+ def _maybe_set_api_sbom(self):
169
+ sbom = self.scan_settings.get_sbom()
170
+ if sbom:
171
+ self.scanoss_api.set_sbom(sbom)
172
+
160
173
  def __filter_files(self, files: list) -> list:
161
174
  """
162
175
  Filter which files should be considered for processing
@@ -329,14 +342,20 @@ class Scanner(ScanossBase):
329
342
  return True
330
343
  return False
331
344
 
332
- def scan_folder_with_options(self, scan_dir: str, deps_file: str = None, file_map: dict = None) -> bool:
345
+ def scan_folder_with_options(self, scan_dir: str, deps_file: str = None, file_map: dict = None,
346
+ dep_scope: SCOPE = None, dep_scope_include: str = None,
347
+ dep_scope_exclude: str = None) -> bool:
333
348
  """
334
349
  Scan the given folder for whatever scaning options that have been configured
350
+ :param dep_scope_exclude: comma separated list of dependency scopes to exclude
351
+ :param dep_scope_include: comma separated list of dependency scopes to include
352
+ :param dep_scope: Enum dependency scope to use
335
353
  :param scan_dir: directory to scan
336
354
  :param deps_file: pre-parsed dependency file to decorate
337
355
  :param file_map: mapping of obfuscated files back into originals
338
356
  :return: True if successful, False otherwise
339
357
  """
358
+
340
359
  success = True
341
360
  if not scan_dir:
342
361
  raise Exception(f"ERROR: Please specify a folder to scan")
@@ -348,7 +367,8 @@ class Scanner(ScanossBase):
348
367
  if self.scan_output:
349
368
  self.print_msg(f'Writing results to {self.scan_output}...')
350
369
  if self.is_dependency_scan():
351
- if not self.threaded_deps.run(what_to_scan=scan_dir, deps_file=deps_file, wait=False): # Kick off a background dependency scan
370
+ if not self.threaded_deps.run(what_to_scan=scan_dir, deps_file=deps_file, wait=False, dep_scope=dep_scope,
371
+ dep_scope_include= dep_scope_include, dep_scope_exclude=dep_scope_exclude): # Kick off a background dependency scan
352
372
  success = False
353
373
  if self.is_file_or_snippet_scan():
354
374
  if not self.scan_folder(scan_dir):
@@ -524,43 +544,34 @@ class Scanner(ScanossBase):
524
544
  raw_output += ",\n \"%s\":[%s]" % (file, json.dumps(dep_file, indent=2))
525
545
  # End for loop
526
546
  raw_output += "\n}"
527
- parsed_json = None
528
547
  try:
529
- parsed_json = json.loads(raw_output)
548
+ raw_results = json.loads(raw_output)
530
549
  except Exception as e:
531
- self.print_stderr(f'Warning: Problem decoding parsed json: {e}')
550
+ raise Exception(f'ERROR: Problem decoding parsed json: {e}')
551
+
552
+ results = self.post_processor.load_results(raw_results).post_process()
532
553
 
533
554
  if self.output_format == 'plain':
534
- if parsed_json:
535
- self.__log_result(json.dumps(parsed_json, indent=2, sort_keys=True))
536
- else:
537
- self.__log_result(raw_output)
555
+ self.__log_result(json.dumps(results, indent=2, sort_keys=True))
538
556
  elif self.output_format == 'cyclonedx':
539
557
  cdx = CycloneDx(self.debug, self.scan_output)
540
- if parsed_json:
541
- success = cdx.produce_from_json(parsed_json)
542
- else:
543
- success = cdx.produce_from_str(raw_output)
558
+ success = cdx.produce_from_json(results)
544
559
  elif self.output_format == 'spdxlite':
545
560
  spdxlite = SpdxLite(self.debug, self.scan_output)
546
- if parsed_json:
547
- success = spdxlite.produce_from_json(parsed_json)
548
- else:
549
- success = spdxlite.produce_from_str(raw_output)
561
+ success = spdxlite.produce_from_json(results)
550
562
  elif self.output_format == 'csv':
551
563
  csvo = CsvOutput(self.debug, self.scan_output)
552
- if parsed_json:
553
- success = csvo.produce_from_json(parsed_json)
554
- else:
555
- success = csvo.produce_from_str(raw_output)
564
+ success = csvo.produce_from_json(results)
556
565
  else:
557
566
  self.print_stderr(f'ERROR: Unknown output format: {self.output_format}')
558
567
  success = False
559
568
  return success
560
569
 
561
- def scan_file_with_options(self, file: str, deps_file: str = None, file_map: dict = None) -> bool:
570
+ def scan_file_with_options(self, file: str, deps_file: str = None, file_map: dict = None, dep_scope: SCOPE = None,
571
+ dep_scope_include: str = None, dep_scope_exclude: str = None) -> bool:
562
572
  """
563
573
  Scan the given file for whatever scaning options that have been configured
574
+ :param dep_scope:
564
575
  :param file: file to scan
565
576
  :param deps_file: pre-parsed dependency file to decorate
566
577
  :param file_map: mapping of obfuscated files back into originals
@@ -577,7 +588,8 @@ class Scanner(ScanossBase):
577
588
  if self.scan_output:
578
589
  self.print_msg(f'Writing results to {self.scan_output}...')
579
590
  if self.is_dependency_scan():
580
- if not self.threaded_deps.run(what_to_scan=file, deps_file=deps_file, wait=False): # Kick off a background dependency scan
591
+ if not self.threaded_deps.run(what_to_scan=file, deps_file=deps_file, wait=False, dep_scope=dep_scope,
592
+ dep_scope_include=dep_scope_include, dep_scope_exclude=dep_scope_exclude): # Kick off a background dependency scan
581
593
  success = False
582
594
  if self.is_file_or_snippet_scan():
583
595
  if not self.scan_file(file):
@@ -713,7 +725,7 @@ class Scanner(ScanossBase):
713
725
  else:
714
726
  Scanner.print_stderr(f'Warning: No files found to scan from: {filtered_files}')
715
727
  return success
716
-
728
+
717
729
  def scan_files_with_options(self, files: [], deps_file: str = None, file_map: dict = None) -> bool:
718
730
  """
719
731
  Scan the given list of files for whatever scaning options that have been configured