scanoss 1.20.5__py3-none-any.whl → 1.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. protoc_gen_swagger/options/annotations_pb2.py +9 -12
  2. protoc_gen_swagger/options/annotations_pb2_grpc.py +1 -1
  3. protoc_gen_swagger/options/openapiv2_pb2.py +96 -98
  4. protoc_gen_swagger/options/openapiv2_pb2_grpc.py +1 -1
  5. scanoss/__init__.py +1 -1
  6. scanoss/api/common/v2/scanoss_common_pb2.py +20 -18
  7. scanoss/api/common/v2/scanoss_common_pb2_grpc.py +1 -1
  8. scanoss/api/components/v2/scanoss_components_pb2.py +38 -48
  9. scanoss/api/components/v2/scanoss_components_pb2_grpc.py +96 -142
  10. scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +42 -22
  11. scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +185 -75
  12. scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +32 -30
  13. scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +83 -75
  14. scanoss/api/provenance/v2/scanoss_provenance_pb2.py +20 -21
  15. scanoss/api/provenance/v2/scanoss_provenance_pb2_grpc.py +1 -1
  16. scanoss/api/scanning/v2/scanoss_scanning_pb2.py +20 -10
  17. scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +70 -40
  18. scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +18 -22
  19. scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +49 -71
  20. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +27 -37
  21. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +72 -109
  22. scanoss/cli.py +417 -74
  23. scanoss/components.py +5 -3
  24. scanoss/constants.py +12 -0
  25. scanoss/data/build_date.txt +1 -1
  26. scanoss/file_filters.py +272 -57
  27. scanoss/results.py +92 -109
  28. scanoss/scanner.py +25 -20
  29. scanoss/scanners/__init__.py +23 -0
  30. scanoss/scanners/container_scanner.py +474 -0
  31. scanoss/scanners/folder_hasher.py +302 -0
  32. scanoss/scanners/scanner_config.py +73 -0
  33. scanoss/scanners/scanner_hfh.py +172 -0
  34. scanoss/scanoss_settings.py +9 -5
  35. scanoss/scanossapi.py +29 -7
  36. scanoss/scanossbase.py +9 -3
  37. scanoss/scanossgrpc.py +145 -13
  38. scanoss/threadedscanning.py +6 -6
  39. scanoss/utils/abstract_presenter.py +103 -0
  40. scanoss/utils/crc64.py +96 -0
  41. scanoss/utils/simhash.py +198 -0
  42. {scanoss-1.20.5.dist-info → scanoss-1.22.0.dist-info}/METADATA +4 -2
  43. scanoss-1.22.0.dist-info/RECORD +83 -0
  44. {scanoss-1.20.5.dist-info → scanoss-1.22.0.dist-info}/WHEEL +1 -1
  45. scanoss-1.20.5.dist-info/RECORD +0 -74
  46. {scanoss-1.20.5.dist-info → scanoss-1.22.0.dist-info}/entry_points.txt +0 -0
  47. {scanoss-1.20.5.dist-info → scanoss-1.22.0.dist-info/licenses}/LICENSE +0 -0
  48. {scanoss-1.20.5.dist-info → scanoss-1.22.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,302 @@
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Dict, List, Literal, Optional
6
+
7
+ from progress.bar import Bar
8
+
9
+ from scanoss.file_filters import FileFilters
10
+ from scanoss.scanoss_settings import ScanossSettings
11
+ from scanoss.scanossbase import ScanossBase
12
+ from scanoss.utils.abstract_presenter import AbstractPresenter
13
+ from scanoss.utils.crc64 import CRC64
14
+ from scanoss.utils.simhash import WordFeatureSet, fingerprint, simhash, vectorize_bytes
15
+
16
+ MINIMUM_FILE_COUNT = 8
17
+ MINIMUM_CONCATENATED_NAME_LENGTH = 32
18
+ MINIMUM_FILE_NAME_LENGTH = 32
19
+
20
+
21
+ class DirectoryNode:
22
+ """
23
+ Represents a node in the directory tree for folder hashing.
24
+ """
25
+
26
+ def __init__(self, path: str):
27
+ self.path = path
28
+ self.is_dir = True
29
+ self.children: Dict[str, DirectoryNode] = {}
30
+ self.files: List[DirectoryFile] = []
31
+
32
+
33
+ class DirectoryFile:
34
+ """
35
+ Represents a file in the directory tree for folder hashing.
36
+ """
37
+
38
+ def __init__(self, path: str, key: bytes, key_str: str):
39
+ self.path = path
40
+ self.key = key
41
+ self.key_str = key_str
42
+
43
+
44
+ @dataclass
45
+ class FolderHasherConfig:
46
+ debug: bool = False
47
+ trace: bool = False
48
+ quiet: bool = False
49
+ output_file: Optional[str] = None
50
+ output_format: Literal['json'] = 'json'
51
+ settings_file: Optional[str] = None
52
+ skip_settings_file: bool = False
53
+
54
+
55
+ def create_folder_hasher_config_from_args(args) -> FolderHasherConfig:
56
+ return FolderHasherConfig(
57
+ debug=args.debug,
58
+ trace=args.trace,
59
+ quiet=args.quiet,
60
+ output_file=getattr(args, 'output', None),
61
+ output_format=getattr(args, 'format', 'json'),
62
+ settings_file=getattr(args, 'settings', None),
63
+ skip_settings_file=getattr(args, 'skip_settings_file', False),
64
+ )
65
+
66
+
67
+ class FolderHasher:
68
+ """
69
+ Folder Hasher.
70
+
71
+ This class is used to produce a folder hash for a given directory.
72
+
73
+ It builds a directory tree (DirectoryNode) and computes the associated
74
+ hash data for the folder.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ scan_dir: str,
80
+ config: Optional[FolderHasherConfig] = None,
81
+ scanoss_settings: Optional[ScanossSettings] = None,
82
+ ):
83
+ self.base = ScanossBase(
84
+ debug=config.debug,
85
+ trace=config.trace,
86
+ quiet=config.quiet,
87
+ )
88
+ self.file_filters = FileFilters(
89
+ debug=config.debug,
90
+ trace=config.trace,
91
+ quiet=config.quiet,
92
+ scanoss_settings=scanoss_settings,
93
+ is_folder_hashing_scan=True,
94
+ )
95
+ self.presenter = FolderHasherPresenter(
96
+ self,
97
+ debug=config.debug,
98
+ trace=config.trace,
99
+ quiet=config.quiet,
100
+ )
101
+
102
+ self.scan_dir = scan_dir
103
+ self.tree = None
104
+
105
+ def hash_directory(self, path: str) -> dict:
106
+ """
107
+ Generate the folder hashing request structure from a directory path.
108
+
109
+ This method builds a directory tree (DirectoryNode) and computes the associated
110
+ hash data for the folder.
111
+
112
+ Args:
113
+ path (str): The root directory path.
114
+
115
+ Returns:
116
+ dict: The folder hash request structure.
117
+ """
118
+
119
+ root_node = self._build_root_node(path)
120
+ tree = self._hash_calc_from_node(root_node)
121
+
122
+ self.tree = tree
123
+
124
+ return tree
125
+
126
+ def _build_root_node(self, path: str) -> DirectoryNode:
127
+ """
128
+ Build a directory tree from the given path with file information.
129
+
130
+ The tree includes DirectoryNode objects populated with filtered file items,
131
+ each containing their relative path and CRC64 hash key.
132
+
133
+ Args:
134
+ path (str): The directory path to build the tree from.
135
+
136
+ Returns:
137
+ DirectoryNode: The root node representing the directory.
138
+ """
139
+ root = Path(path).resolve()
140
+ root_node = DirectoryNode(str(root))
141
+
142
+ all_files = [
143
+ f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <= MINIMUM_FILE_NAME_LENGTH
144
+ ]
145
+ filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
146
+
147
+ # Sort the files by name to ensure the hash is the same for the same folder
148
+ filtered_files.sort()
149
+
150
+ bar = Bar('Hashing files...', max=len(filtered_files))
151
+ for file_path in filtered_files:
152
+ try:
153
+ file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
154
+ full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
155
+
156
+ self.base.print_debug(f'\nHashing file {str(full_file_path)}')
157
+
158
+ file_bytes = full_file_path.read_bytes()
159
+ key = CRC64.get_hash_buff(file_bytes)
160
+ key_str = ''.join(f'{b:02x}' for b in key)
161
+ rel_path = str(full_file_path.relative_to(root))
162
+
163
+ file_item = DirectoryFile(rel_path, key, key_str)
164
+
165
+ current_node = root_node
166
+ for part in Path(rel_path).parent.parts:
167
+ child_path = str(Path(current_node.path) / part)
168
+ if child_path not in current_node.children:
169
+ current_node.children[child_path] = DirectoryNode(child_path)
170
+ current_node = current_node.children[child_path]
171
+ current_node.files.append(file_item)
172
+
173
+ root_node.files.append(file_item)
174
+
175
+ except Exception as e:
176
+ self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
177
+
178
+ bar.next()
179
+
180
+ bar.finish()
181
+ return root_node
182
+
183
+ def _hash_calc_from_node(self, node: DirectoryNode) -> dict:
184
+ """
185
+ Recursively compute folder hash data for a directory node.
186
+
187
+ The hash data includes the path identifier, simhash for file names,
188
+ simhash for file content, and children node hash information.
189
+
190
+ Args:
191
+ node (DirectoryNode): The directory node to compute the hash for.
192
+
193
+ Returns:
194
+ dict: The computed hash data for the node.
195
+ """
196
+ hash_data = self._hash_calc(node)
197
+
198
+ return {
199
+ 'path_id': node.path,
200
+ 'sim_hash_names': f'{hash_data["name_hash"]:02x}' if hash_data['name_hash'] is not None else None,
201
+ 'sim_hash_content': f'{hash_data["content_hash"]:02x}' if hash_data['content_hash'] is not None else None,
202
+ 'children': [self._hash_calc_from_node(child) for child in node.children.values()],
203
+ }
204
+
205
+ def _hash_calc(self, node: DirectoryNode) -> dict:
206
+ """
207
+ Compute folder hash values for a given directory node.
208
+
209
+ The method aggregates unique file keys and sorted file names to generate
210
+ simhash-based hash values for both file names and file contents.
211
+
212
+ The most significant byte of the name simhash is then replaced by a computed head value.
213
+
214
+ Args:
215
+ node (DirectoryNode): The directory node containing file items.
216
+
217
+ Returns:
218
+ dict: A dictionary with 'name_hash' and 'content_hash' keys.
219
+ """
220
+ processed_hashes = set()
221
+ file_hashes = []
222
+ selected_names = []
223
+
224
+ for file in node.files:
225
+ key_str = file.key_str
226
+ if key_str in processed_hashes:
227
+ continue
228
+ processed_hashes.add(key_str)
229
+
230
+ selected_names.append(os.path.basename(file.path))
231
+
232
+ file_key = bytes(file.key)
233
+ file_hashes.append(file_key)
234
+
235
+ if len(selected_names) < MINIMUM_FILE_COUNT:
236
+ return {
237
+ 'name_hash': None,
238
+ 'content_hash': None,
239
+ }
240
+
241
+ selected_names.sort()
242
+ concatenated_names = ''.join(selected_names)
243
+
244
+ if len(concatenated_names.encode('utf-8')) < MINIMUM_CONCATENATED_NAME_LENGTH:
245
+ return {
246
+ 'name_hash': None,
247
+ 'content_hash': None,
248
+ }
249
+
250
+ names_simhash = simhash(WordFeatureSet(concatenated_names.encode('utf-8')))
251
+ content_simhash = fingerprint(vectorize_bytes(file_hashes))
252
+
253
+ return {
254
+ 'name_hash': names_simhash,
255
+ 'content_hash': content_simhash,
256
+ }
257
+
258
+ def present(self, output_format: str = None, output_file: str = None):
259
+ """Present the hashed tree in the selected format"""
260
+ self.presenter.present(output_format=output_format, output_file=output_file)
261
+
262
+
263
+ class FolderHasherPresenter(AbstractPresenter):
264
+ """
265
+ FolderHasher presenter class
266
+ Handles the presentation of the folder hashing scan results
267
+ """
268
+
269
+ def __init__(self, folder_hasher: FolderHasher, **kwargs):
270
+ super().__init__(**kwargs)
271
+ self.folder_hasher = folder_hasher
272
+
273
+ def _format_json_output(self) -> str:
274
+ """
275
+ Format the scan output data into a JSON object
276
+
277
+ Returns:
278
+ str: The formatted JSON string
279
+ """
280
+ return json.dumps(self.folder_hasher.tree, indent=2)
281
+
282
+ def _format_plain_output(self) -> str:
283
+ """
284
+ Format the scan output data into a plain text string
285
+ """
286
+ return (
287
+ json.dumps(self.folder_hasher.tree, indent=2)
288
+ if isinstance(self.folder_hasher.tree, dict)
289
+ else str(self.folder_hasher.tree)
290
+ )
291
+
292
+ def _format_cyclonedx_output(self) -> str:
293
+ raise NotImplementedError('CycloneDX output is not implemented')
294
+
295
+ def _format_spdxlite_output(self) -> str:
296
+ raise NotImplementedError('SPDXlite output is not implemented')
297
+
298
+ def _format_csv_output(self) -> str:
299
+ raise NotImplementedError('CSV output is not implemented')
300
+
301
+ def _format_raw_output(self) -> str:
302
+ raise NotImplementedError('Raw output is not implemented')
@@ -0,0 +1,73 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ from dataclasses import dataclass
26
+ from typing import Optional
27
+
28
+ from pypac.parser import PACFile
29
+
30
+ from scanoss.constants import (
31
+ DEFAULT_NB_THREADS,
32
+ DEFAULT_POST_SIZE,
33
+ DEFAULT_SC_TIMEOUT,
34
+ DEFAULT_TIMEOUT,
35
+ )
36
+
37
+
38
+ @dataclass
39
+ class ScannerConfig:
40
+ debug: bool = False
41
+ trace: bool = False
42
+ quiet: bool = False
43
+ api_key: Optional[str] = None
44
+ url: Optional[str] = None
45
+ grpc_url: Optional[str] = None
46
+ post_size: int = DEFAULT_POST_SIZE
47
+ timeout: int = DEFAULT_TIMEOUT
48
+ sc_timeout: int = DEFAULT_SC_TIMEOUT
49
+ nb_threads: int = DEFAULT_NB_THREADS
50
+ proxy: Optional[str] = None
51
+ grpc_proxy: Optional[str] = None
52
+
53
+ ca_cert: Optional[str] = None
54
+ pac: Optional[PACFile] = None
55
+
56
+
57
+ def create_scanner_config_from_args(args) -> ScannerConfig:
58
+ return ScannerConfig(
59
+ debug=args.debug,
60
+ trace=args.trace,
61
+ quiet=args.quiet,
62
+ api_key=getattr(args, 'key', None),
63
+ url=getattr(args, 'api_url', None),
64
+ grpc_url=getattr(args, 'grpc_url', None),
65
+ post_size=getattr(args, 'post_size', DEFAULT_POST_SIZE),
66
+ timeout=getattr(args, 'timeout', DEFAULT_TIMEOUT),
67
+ sc_timeout=getattr(args, 'sc_timeout', DEFAULT_SC_TIMEOUT),
68
+ nb_threads=getattr(args, 'nb_threads', DEFAULT_NB_THREADS),
69
+ proxy=getattr(args, 'proxy', None),
70
+ grpc_proxy=getattr(args, 'grpc_proxy', None),
71
+ ca_cert=getattr(args, 'ca_cert', None),
72
+ pac=getattr(args, 'pac', None),
73
+ )
@@ -0,0 +1,172 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import json
26
+ import threading
27
+ import time
28
+ from typing import Dict, Optional
29
+
30
+ from progress.spinner import Spinner
31
+
32
+ from scanoss.file_filters import FileFilters
33
+ from scanoss.scanners.folder_hasher import FolderHasher
34
+ from scanoss.scanners.scanner_config import ScannerConfig
35
+ from scanoss.scanoss_settings import ScanossSettings
36
+ from scanoss.scanossbase import ScanossBase
37
+ from scanoss.scanossgrpc import ScanossGrpc
38
+ from scanoss.utils.abstract_presenter import AbstractPresenter
39
+
40
+
41
+ class ScannerHFH:
42
+ """
43
+ Folder Hashing Scanner.
44
+
45
+ This scanner processes a directory, computes CRC64 hashes for the files,
46
+ and calculates simhash values based on file names and content to detect folder-level similarities.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ scan_dir: str,
52
+ config: ScannerConfig,
53
+ client: Optional[ScanossGrpc] = None,
54
+ scanoss_settings: Optional[ScanossSettings] = None,
55
+ ):
56
+ """
57
+ Initialize the ScannerHFH.
58
+
59
+ Args:
60
+ scan_dir (str): The directory to be scanned.
61
+ config (ScannerConfig): Configuration parameters for the scanner.
62
+ client (ScanossGrpc): gRPC client for communicating with the scanning service.
63
+ scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
64
+ """
65
+ self.base = ScanossBase(
66
+ debug=config.debug,
67
+ trace=config.trace,
68
+ quiet=config.quiet,
69
+ )
70
+ self.presenter = ScannerHFHPresenter(
71
+ self,
72
+ debug=config.debug,
73
+ trace=config.trace,
74
+ quiet=config.quiet,
75
+ )
76
+ self.file_filters = FileFilters(
77
+ debug=config.debug,
78
+ trace=config.trace,
79
+ quiet=config.quiet,
80
+ scanoss_settings=scanoss_settings,
81
+ )
82
+ self.folder_hasher = FolderHasher(
83
+ scan_dir=scan_dir,
84
+ config=config,
85
+ scanoss_settings=scanoss_settings,
86
+ )
87
+
88
+ self.scan_dir = scan_dir
89
+ self.client = client
90
+ self.scan_results = None
91
+ self.best_match = False
92
+ self.threshold = 100
93
+
94
+ def scan(self) -> Optional[Dict]:
95
+ """
96
+ Scan the provided directory using the folder hashing algorithm.
97
+
98
+ Returns:
99
+ Optional[Dict]: The folder hash response from the gRPC client, or None if an error occurs.
100
+ """
101
+ hfh_request = {
102
+ 'root': self.folder_hasher.hash_directory(self.scan_dir),
103
+ 'threshold': self.threshold,
104
+ 'best_match': self.best_match,
105
+ }
106
+
107
+ spinner = Spinner('Scanning folder...')
108
+ stop_spinner = False
109
+
110
+ def spin():
111
+ while not stop_spinner:
112
+ spinner.next()
113
+ time.sleep(0.1)
114
+
115
+ spinner_thread = threading.Thread(target=spin)
116
+ spinner_thread.start()
117
+
118
+ try:
119
+ response = self.client.folder_hash_scan(hfh_request)
120
+ self.scan_results = response
121
+ finally:
122
+ stop_spinner = True
123
+ spinner_thread.join()
124
+ spinner.finish()
125
+
126
+ return self.scan_results
127
+
128
+ def present(self, output_format: str = None, output_file: str = None):
129
+ """Present the results in the selected format"""
130
+ self.presenter.present(output_format=output_format, output_file=output_file)
131
+
132
+
133
+ class ScannerHFHPresenter(AbstractPresenter):
134
+ """
135
+ ScannerHFH presenter class
136
+ Handles the presentation of the folder hashing scan results
137
+ """
138
+
139
+ def __init__(self, scanner: ScannerHFH, **kwargs):
140
+ super().__init__(**kwargs)
141
+ self.scanner = scanner
142
+
143
+ def _format_json_output(self) -> str:
144
+ """
145
+ Format the scan output data into a JSON object
146
+
147
+ Returns:
148
+ str: The formatted JSON string
149
+ """
150
+ return json.dumps(self.scanner.scan_results, indent=2)
151
+
152
+ def _format_plain_output(self) -> str:
153
+ """
154
+ Format the scan output data into a plain text string
155
+ """
156
+ return (
157
+ json.dumps(self.scanner.scan_results, indent=2)
158
+ if isinstance(self.scanner.scan_results, dict)
159
+ else str(self.scanner.scan_results)
160
+ )
161
+
162
+ def _format_cyclonedx_output(self) -> str:
163
+ raise NotImplementedError('CycloneDX output is not implemented')
164
+
165
+ def _format_spdxlite_output(self) -> str:
166
+ raise NotImplementedError('SPDXlite output is not implemented')
167
+
168
+ def _format_csv_output(self) -> str:
169
+ raise NotImplementedError('CSV output is not implemented')
170
+
171
+ def _format_raw_output(self) -> str:
172
+ raise NotImplementedError('Raw output is not implemented')
@@ -24,13 +24,17 @@ SPDX-License-Identifier: MIT
24
24
 
25
25
  import json
26
26
  from pathlib import Path
27
- from typing import List, TypedDict
27
+ from typing import List, Optional, TypedDict
28
28
 
29
29
  import importlib_resources
30
30
  from jsonschema import validate
31
31
 
32
32
  from .scanossbase import ScanossBase
33
- from .utils.file import JSON_ERROR_FILE_NOT_FOUND, JSON_ERROR_FILE_EMPTY, validate_json_file
33
+ from .utils.file import (
34
+ JSON_ERROR_FILE_EMPTY,
35
+ JSON_ERROR_FILE_NOT_FOUND,
36
+ validate_json_file,
37
+ )
34
38
 
35
39
  DEFAULT_SCANOSS_JSON_FILE = Path('scanoss.json')
36
40
 
@@ -96,7 +100,7 @@ class ScanossSettings(ScanossBase):
96
100
  if filepath:
97
101
  self.load_json_file(filepath)
98
102
 
99
- def load_json_file(self, filepath: 'str | None' = None, scan_root: 'str | None' = None) -> 'ScanossSettings':
103
+ def load_json_file(self, filepath: Optional[str] = None, scan_root: Optional[str] = None) -> 'ScanossSettings':
100
104
  """
101
105
  Load the scan settings file. If no filepath is provided, scanoss.json will be used as default.
102
106
 
@@ -118,7 +122,7 @@ class ScanossSettings(ScanossBase):
118
122
 
119
123
  result = validate_json_file(json_file)
120
124
  if not result.is_valid:
121
- if result.error_code == JSON_ERROR_FILE_NOT_FOUND or result.error_code == JSON_ERROR_FILE_EMPTY:
125
+ if result.error_code in (JSON_ERROR_FILE_NOT_FOUND, JSON_ERROR_FILE_EMPTY):
122
126
  self.print_msg(
123
127
  f'WARNING: The supplied settings file "{filepath}" was not found or is empty. Skipping...'
124
128
  )
@@ -235,7 +239,7 @@ class ScanossSettings(ScanossBase):
235
239
  include_bom_entries = self._remove_duplicates(self.normalize_bom_entries(self.get_bom_include()))
236
240
  replace_bom_entries = self._remove_duplicates(self.normalize_bom_entries(self.get_bom_replace()))
237
241
  self.print_debug(
238
- f"Scan type set to 'identify'. Adding {len(include_bom_entries) + len(replace_bom_entries)} components as context to the scan. \n"
242
+ f"Scan type set to 'identify'. Adding {len(include_bom_entries) + len(replace_bom_entries)} components as context to the scan. \n" # noqa: E501
239
243
  f'From Include list: {[entry["purl"] for entry in include_bom_entries]} \n'
240
244
  f'From Replace list: {[entry["purl"] for entry in replace_bom_entries]} \n'
241
245
  )
scanoss/scanossapi.py CHANGED
@@ -52,7 +52,7 @@ class ScanossApi(ScanossBase):
52
52
  Currently support posting scan requests to the SCANOSS streaming API
53
53
  """
54
54
 
55
- def __init__(
55
+ def __init__( # noqa: PLR0913, PLR0915
56
56
  self,
57
57
  scan_format: str = None,
58
58
  flags: str = None,
@@ -68,6 +68,7 @@ class ScanossApi(ScanossBase):
68
68
  ca_cert: str = None,
69
69
  pac: PACFile = None,
70
70
  retry: int = 5,
71
+ req_headers: dict = None,
71
72
  ):
72
73
  """
73
74
  Initialise the SCANOSS API
@@ -86,17 +87,17 @@ class ScanossApi(ScanossBase):
86
87
  HTTPS_PROXY='http://<ip>:<port>'
87
88
  """
88
89
  super().__init__(debug, trace, quiet)
89
- self.url = url if url else SCANOSS_SCAN_URL
90
- self.api_key = api_key if api_key else SCANOSS_API_KEY
91
- if self.api_key and not url and not os.environ.get('SCANOSS_SCAN_URL'):
92
- self.url = DEFAULT_URL2 # API key specific and no alternative URL, so use the default premium
90
+ self.url = url
91
+ self.api_key = api_key
93
92
  self.sbom = None
94
93
  self.scan_format = scan_format if scan_format else 'plain'
95
94
  self.flags = flags
96
95
  self.timeout = timeout if timeout > 5 else 180
97
96
  self.retry_limit = retry if retry >= 0 else 5
98
97
  self.ignore_cert_errors = ignore_cert_errors
98
+ self.req_headers = req_headers if req_headers else {}
99
99
  self.headers = {}
100
+
100
101
  if ver_details:
101
102
  self.headers['x-scanoss-client'] = ver_details
102
103
  if self.api_key:
@@ -104,17 +105,24 @@ class ScanossApi(ScanossBase):
104
105
  self.headers['x-api-key'] = self.api_key
105
106
  self.headers['User-Agent'] = f'scanoss-py/{__version__}'
106
107
  self.headers['user-agent'] = f'scanoss-py/{__version__}'
108
+ self.load_generic_headers()
109
+
110
+ self.url = url if url else SCANOSS_SCAN_URL
111
+ self.api_key = api_key if api_key else SCANOSS_API_KEY
112
+ if self.api_key and not url and not os.environ.get('SCANOSS_SCAN_URL'):
113
+ self.url = DEFAULT_URL2 # API key specific and no alternative URL, so use the default premium
114
+
107
115
  if self.trace:
108
116
  logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
109
117
  http_client.HTTPConnection.debuglevel = 1
110
118
  if pac and not proxy: # Setup PAC session if requested (and no proxy has been explicitly set)
111
- self.print_debug(f'Setting up PAC session...')
119
+ self.print_debug('Setting up PAC session...')
112
120
  self.session = PACSession(pac=pac)
113
121
  else:
114
122
  self.session = requests.sessions.Session()
115
123
  self.verify = None
116
124
  if self.ignore_cert_errors:
117
- self.print_debug(f'Ignoring cert errors...')
125
+ self.print_debug('Ignoring cert errors...')
118
126
  urllib3.disable_warnings(InsecureRequestWarning)
119
127
  self.verify = False
120
128
  self.session.verify = False
@@ -261,6 +269,20 @@ class ScanossApi(ScanossBase):
261
269
  self.sbom = sbom
262
270
  return self
263
271
 
272
+ def load_generic_headers(self):
273
+ """
274
+ Adds custom headers from req_headers to the headers collection.
275
+
276
+ If x-api-key is present and no URL is configured (directly or via
277
+ environment), sets URL to the premium endpoint (DEFAULT_URL2).
278
+ """
279
+ if self.req_headers: # Load generic headers
280
+ for key, value in self.req_headers.items():
281
+ if key == 'x-api-key': # Set premium URL if x-api-key header is set
282
+ if not self.url and not os.environ.get('SCANOSS_SCAN_URL'):
283
+ self.url = DEFAULT_URL2 # API key specific and no alternative URL, so use the default premium
284
+ self.api_key = value
285
+ self.headers[key] = value
264
286
 
265
287
  #
266
288
  # End of ScanossApi Class