scanoss 1.20.6__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protoc_gen_swagger/options/annotations_pb2.py +9 -12
- protoc_gen_swagger/options/annotations_pb2_grpc.py +1 -1
- protoc_gen_swagger/options/openapiv2_pb2.py +96 -98
- protoc_gen_swagger/options/openapiv2_pb2_grpc.py +1 -1
- scanoss/__init__.py +1 -1
- scanoss/api/common/v2/scanoss_common_pb2.py +20 -18
- scanoss/api/common/v2/scanoss_common_pb2_grpc.py +1 -1
- scanoss/api/components/v2/scanoss_components_pb2.py +38 -48
- scanoss/api/components/v2/scanoss_components_pb2_grpc.py +96 -142
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +42 -22
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +185 -75
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +32 -30
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +83 -75
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +49 -0
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +142 -0
- scanoss/api/scanning/v2/scanoss_scanning_pb2.py +20 -10
- scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +70 -40
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +18 -22
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +49 -71
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +27 -37
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +72 -109
- scanoss/cli.py +393 -84
- scanoss/components.py +21 -11
- scanoss/constants.py +12 -0
- scanoss/data/build_date.txt +1 -1
- scanoss/file_filters.py +272 -57
- scanoss/results.py +92 -109
- scanoss/scanners/__init__.py +23 -0
- scanoss/scanners/container_scanner.py +474 -0
- scanoss/scanners/folder_hasher.py +302 -0
- scanoss/scanners/scanner_config.py +73 -0
- scanoss/scanners/scanner_hfh.py +173 -0
- scanoss/scanoss_settings.py +9 -5
- scanoss/scanossbase.py +9 -3
- scanoss/scanossgrpc.py +143 -18
- scanoss/threadedscanning.py +6 -6
- scanoss/utils/abstract_presenter.py +103 -0
- scanoss/utils/crc64.py +96 -0
- scanoss/utils/simhash.py +198 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/METADATA +2 -1
- scanoss-1.23.0.dist-info/RECORD +83 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/WHEEL +1 -1
- scanoss/api/provenance/v2/scanoss_provenance_pb2.py +0 -42
- scanoss/api/provenance/v2/scanoss_provenance_pb2_grpc.py +0 -108
- scanoss-1.20.6.dist-info/RECORD +0 -74
- /scanoss/api/{provenance → geoprovenance}/__init__.py +0 -0
- /scanoss/api/{provenance → geoprovenance}/v2/__init__.py +0 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/entry_points.txt +0 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/licenses/LICENSE +0 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Literal, Optional
|
|
6
|
+
|
|
7
|
+
from progress.bar import Bar
|
|
8
|
+
|
|
9
|
+
from scanoss.file_filters import FileFilters
|
|
10
|
+
from scanoss.scanoss_settings import ScanossSettings
|
|
11
|
+
from scanoss.scanossbase import ScanossBase
|
|
12
|
+
from scanoss.utils.abstract_presenter import AbstractPresenter
|
|
13
|
+
from scanoss.utils.crc64 import CRC64
|
|
14
|
+
from scanoss.utils.simhash import WordFeatureSet, fingerprint, simhash, vectorize_bytes
|
|
15
|
+
|
|
16
|
+
MINIMUM_FILE_COUNT = 8
|
|
17
|
+
MINIMUM_CONCATENATED_NAME_LENGTH = 32
|
|
18
|
+
MINIMUM_FILE_NAME_LENGTH = 32
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DirectoryNode:
|
|
22
|
+
"""
|
|
23
|
+
Represents a node in the directory tree for folder hashing.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, path: str):
|
|
27
|
+
self.path = path
|
|
28
|
+
self.is_dir = True
|
|
29
|
+
self.children: Dict[str, DirectoryNode] = {}
|
|
30
|
+
self.files: List[DirectoryFile] = []
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DirectoryFile:
|
|
34
|
+
"""
|
|
35
|
+
Represents a file in the directory tree for folder hashing.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, path: str, key: bytes, key_str: str):
|
|
39
|
+
self.path = path
|
|
40
|
+
self.key = key
|
|
41
|
+
self.key_str = key_str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class FolderHasherConfig:
|
|
46
|
+
debug: bool = False
|
|
47
|
+
trace: bool = False
|
|
48
|
+
quiet: bool = False
|
|
49
|
+
output_file: Optional[str] = None
|
|
50
|
+
output_format: Literal['json'] = 'json'
|
|
51
|
+
settings_file: Optional[str] = None
|
|
52
|
+
skip_settings_file: bool = False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def create_folder_hasher_config_from_args(args) -> FolderHasherConfig:
|
|
56
|
+
return FolderHasherConfig(
|
|
57
|
+
debug=args.debug,
|
|
58
|
+
trace=args.trace,
|
|
59
|
+
quiet=args.quiet,
|
|
60
|
+
output_file=getattr(args, 'output', None),
|
|
61
|
+
output_format=getattr(args, 'format', 'json'),
|
|
62
|
+
settings_file=getattr(args, 'settings', None),
|
|
63
|
+
skip_settings_file=getattr(args, 'skip_settings_file', False),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class FolderHasher:
|
|
68
|
+
"""
|
|
69
|
+
Folder Hasher.
|
|
70
|
+
|
|
71
|
+
This class is used to produce a folder hash for a given directory.
|
|
72
|
+
|
|
73
|
+
It builds a directory tree (DirectoryNode) and computes the associated
|
|
74
|
+
hash data for the folder.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
scan_dir: str,
|
|
80
|
+
config: Optional[FolderHasherConfig] = None,
|
|
81
|
+
scanoss_settings: Optional[ScanossSettings] = None,
|
|
82
|
+
):
|
|
83
|
+
self.base = ScanossBase(
|
|
84
|
+
debug=config.debug,
|
|
85
|
+
trace=config.trace,
|
|
86
|
+
quiet=config.quiet,
|
|
87
|
+
)
|
|
88
|
+
self.file_filters = FileFilters(
|
|
89
|
+
debug=config.debug,
|
|
90
|
+
trace=config.trace,
|
|
91
|
+
quiet=config.quiet,
|
|
92
|
+
scanoss_settings=scanoss_settings,
|
|
93
|
+
is_folder_hashing_scan=True,
|
|
94
|
+
)
|
|
95
|
+
self.presenter = FolderHasherPresenter(
|
|
96
|
+
self,
|
|
97
|
+
debug=config.debug,
|
|
98
|
+
trace=config.trace,
|
|
99
|
+
quiet=config.quiet,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
self.scan_dir = scan_dir
|
|
103
|
+
self.tree = None
|
|
104
|
+
|
|
105
|
+
def hash_directory(self, path: str) -> dict:
|
|
106
|
+
"""
|
|
107
|
+
Generate the folder hashing request structure from a directory path.
|
|
108
|
+
|
|
109
|
+
This method builds a directory tree (DirectoryNode) and computes the associated
|
|
110
|
+
hash data for the folder.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
path (str): The root directory path.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
dict: The folder hash request structure.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
root_node = self._build_root_node(path)
|
|
120
|
+
tree = self._hash_calc_from_node(root_node)
|
|
121
|
+
|
|
122
|
+
self.tree = tree
|
|
123
|
+
|
|
124
|
+
return tree
|
|
125
|
+
|
|
126
|
+
def _build_root_node(self, path: str) -> DirectoryNode:
|
|
127
|
+
"""
|
|
128
|
+
Build a directory tree from the given path with file information.
|
|
129
|
+
|
|
130
|
+
The tree includes DirectoryNode objects populated with filtered file items,
|
|
131
|
+
each containing their relative path and CRC64 hash key.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
path (str): The directory path to build the tree from.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
DirectoryNode: The root node representing the directory.
|
|
138
|
+
"""
|
|
139
|
+
root = Path(path).resolve()
|
|
140
|
+
root_node = DirectoryNode(str(root))
|
|
141
|
+
|
|
142
|
+
all_files = [
|
|
143
|
+
f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <= MINIMUM_FILE_NAME_LENGTH
|
|
144
|
+
]
|
|
145
|
+
filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
|
|
146
|
+
|
|
147
|
+
# Sort the files by name to ensure the hash is the same for the same folder
|
|
148
|
+
filtered_files.sort()
|
|
149
|
+
|
|
150
|
+
bar = Bar('Hashing files...', max=len(filtered_files))
|
|
151
|
+
for file_path in filtered_files:
|
|
152
|
+
try:
|
|
153
|
+
file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
|
|
154
|
+
full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
|
|
155
|
+
|
|
156
|
+
self.base.print_debug(f'\nHashing file {str(full_file_path)}')
|
|
157
|
+
|
|
158
|
+
file_bytes = full_file_path.read_bytes()
|
|
159
|
+
key = CRC64.get_hash_buff(file_bytes)
|
|
160
|
+
key_str = ''.join(f'{b:02x}' for b in key)
|
|
161
|
+
rel_path = str(full_file_path.relative_to(root))
|
|
162
|
+
|
|
163
|
+
file_item = DirectoryFile(rel_path, key, key_str)
|
|
164
|
+
|
|
165
|
+
current_node = root_node
|
|
166
|
+
for part in Path(rel_path).parent.parts:
|
|
167
|
+
child_path = str(Path(current_node.path) / part)
|
|
168
|
+
if child_path not in current_node.children:
|
|
169
|
+
current_node.children[child_path] = DirectoryNode(child_path)
|
|
170
|
+
current_node = current_node.children[child_path]
|
|
171
|
+
current_node.files.append(file_item)
|
|
172
|
+
|
|
173
|
+
root_node.files.append(file_item)
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
|
|
177
|
+
|
|
178
|
+
bar.next()
|
|
179
|
+
|
|
180
|
+
bar.finish()
|
|
181
|
+
return root_node
|
|
182
|
+
|
|
183
|
+
def _hash_calc_from_node(self, node: DirectoryNode) -> dict:
|
|
184
|
+
"""
|
|
185
|
+
Recursively compute folder hash data for a directory node.
|
|
186
|
+
|
|
187
|
+
The hash data includes the path identifier, simhash for file names,
|
|
188
|
+
simhash for file content, and children node hash information.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
node (DirectoryNode): The directory node to compute the hash for.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
dict: The computed hash data for the node.
|
|
195
|
+
"""
|
|
196
|
+
hash_data = self._hash_calc(node)
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
'path_id': node.path,
|
|
200
|
+
'sim_hash_names': f'{hash_data["name_hash"]:02x}' if hash_data['name_hash'] is not None else None,
|
|
201
|
+
'sim_hash_content': f'{hash_data["content_hash"]:02x}' if hash_data['content_hash'] is not None else None,
|
|
202
|
+
'children': [self._hash_calc_from_node(child) for child in node.children.values()],
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
def _hash_calc(self, node: DirectoryNode) -> dict:
|
|
206
|
+
"""
|
|
207
|
+
Compute folder hash values for a given directory node.
|
|
208
|
+
|
|
209
|
+
The method aggregates unique file keys and sorted file names to generate
|
|
210
|
+
simhash-based hash values for both file names and file contents.
|
|
211
|
+
|
|
212
|
+
The most significant byte of the name simhash is then replaced by a computed head value.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
node (DirectoryNode): The directory node containing file items.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
dict: A dictionary with 'name_hash' and 'content_hash' keys.
|
|
219
|
+
"""
|
|
220
|
+
processed_hashes = set()
|
|
221
|
+
file_hashes = []
|
|
222
|
+
selected_names = []
|
|
223
|
+
|
|
224
|
+
for file in node.files:
|
|
225
|
+
key_str = file.key_str
|
|
226
|
+
if key_str in processed_hashes:
|
|
227
|
+
continue
|
|
228
|
+
processed_hashes.add(key_str)
|
|
229
|
+
|
|
230
|
+
selected_names.append(os.path.basename(file.path))
|
|
231
|
+
|
|
232
|
+
file_key = bytes(file.key)
|
|
233
|
+
file_hashes.append(file_key)
|
|
234
|
+
|
|
235
|
+
if len(selected_names) < MINIMUM_FILE_COUNT:
|
|
236
|
+
return {
|
|
237
|
+
'name_hash': None,
|
|
238
|
+
'content_hash': None,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
selected_names.sort()
|
|
242
|
+
concatenated_names = ''.join(selected_names)
|
|
243
|
+
|
|
244
|
+
if len(concatenated_names.encode('utf-8')) < MINIMUM_CONCATENATED_NAME_LENGTH:
|
|
245
|
+
return {
|
|
246
|
+
'name_hash': None,
|
|
247
|
+
'content_hash': None,
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
names_simhash = simhash(WordFeatureSet(concatenated_names.encode('utf-8')))
|
|
251
|
+
content_simhash = fingerprint(vectorize_bytes(file_hashes))
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
'name_hash': names_simhash,
|
|
255
|
+
'content_hash': content_simhash,
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
def present(self, output_format: str = None, output_file: str = None):
|
|
259
|
+
"""Present the hashed tree in the selected format"""
|
|
260
|
+
self.presenter.present(output_format=output_format, output_file=output_file)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class FolderHasherPresenter(AbstractPresenter):
|
|
264
|
+
"""
|
|
265
|
+
FolderHasher presenter class
|
|
266
|
+
Handles the presentation of the folder hashing scan results
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
def __init__(self, folder_hasher: FolderHasher, **kwargs):
|
|
270
|
+
super().__init__(**kwargs)
|
|
271
|
+
self.folder_hasher = folder_hasher
|
|
272
|
+
|
|
273
|
+
def _format_json_output(self) -> str:
|
|
274
|
+
"""
|
|
275
|
+
Format the scan output data into a JSON object
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
str: The formatted JSON string
|
|
279
|
+
"""
|
|
280
|
+
return json.dumps(self.folder_hasher.tree, indent=2)
|
|
281
|
+
|
|
282
|
+
def _format_plain_output(self) -> str:
|
|
283
|
+
"""
|
|
284
|
+
Format the scan output data into a plain text string
|
|
285
|
+
"""
|
|
286
|
+
return (
|
|
287
|
+
json.dumps(self.folder_hasher.tree, indent=2)
|
|
288
|
+
if isinstance(self.folder_hasher.tree, dict)
|
|
289
|
+
else str(self.folder_hasher.tree)
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
def _format_cyclonedx_output(self) -> str:
|
|
293
|
+
raise NotImplementedError('CycloneDX output is not implemented')
|
|
294
|
+
|
|
295
|
+
def _format_spdxlite_output(self) -> str:
|
|
296
|
+
raise NotImplementedError('SPDXlite output is not implemented')
|
|
297
|
+
|
|
298
|
+
def _format_csv_output(self) -> str:
|
|
299
|
+
raise NotImplementedError('CSV output is not implemented')
|
|
300
|
+
|
|
301
|
+
def _format_raw_output(self) -> str:
|
|
302
|
+
raise NotImplementedError('Raw output is not implemented')
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
from pypac.parser import PACFile
|
|
29
|
+
|
|
30
|
+
from scanoss.constants import (
|
|
31
|
+
DEFAULT_NB_THREADS,
|
|
32
|
+
DEFAULT_POST_SIZE,
|
|
33
|
+
DEFAULT_SC_TIMEOUT,
|
|
34
|
+
DEFAULT_TIMEOUT,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ScannerConfig:
|
|
40
|
+
debug: bool = False
|
|
41
|
+
trace: bool = False
|
|
42
|
+
quiet: bool = False
|
|
43
|
+
api_key: Optional[str] = None
|
|
44
|
+
url: Optional[str] = None
|
|
45
|
+
grpc_url: Optional[str] = None
|
|
46
|
+
post_size: int = DEFAULT_POST_SIZE
|
|
47
|
+
timeout: int = DEFAULT_TIMEOUT
|
|
48
|
+
sc_timeout: int = DEFAULT_SC_TIMEOUT
|
|
49
|
+
nb_threads: int = DEFAULT_NB_THREADS
|
|
50
|
+
proxy: Optional[str] = None
|
|
51
|
+
grpc_proxy: Optional[str] = None
|
|
52
|
+
|
|
53
|
+
ca_cert: Optional[str] = None
|
|
54
|
+
pac: Optional[PACFile] = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def create_scanner_config_from_args(args) -> ScannerConfig:
|
|
58
|
+
return ScannerConfig(
|
|
59
|
+
debug=args.debug,
|
|
60
|
+
trace=args.trace,
|
|
61
|
+
quiet=args.quiet,
|
|
62
|
+
api_key=getattr(args, 'key', None),
|
|
63
|
+
url=getattr(args, 'api_url', None),
|
|
64
|
+
grpc_url=getattr(args, 'grpc_url', None),
|
|
65
|
+
post_size=getattr(args, 'post_size', DEFAULT_POST_SIZE),
|
|
66
|
+
timeout=getattr(args, 'timeout', DEFAULT_TIMEOUT),
|
|
67
|
+
sc_timeout=getattr(args, 'sc_timeout', DEFAULT_SC_TIMEOUT),
|
|
68
|
+
nb_threads=getattr(args, 'nb_threads', DEFAULT_NB_THREADS),
|
|
69
|
+
proxy=getattr(args, 'proxy', None),
|
|
70
|
+
grpc_proxy=getattr(args, 'grpc_proxy', None),
|
|
71
|
+
ca_cert=getattr(args, 'ca_cert', None),
|
|
72
|
+
pac=getattr(args, 'pac', None),
|
|
73
|
+
)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import threading
|
|
27
|
+
import time
|
|
28
|
+
from typing import Dict, Optional
|
|
29
|
+
|
|
30
|
+
from progress.spinner import Spinner
|
|
31
|
+
|
|
32
|
+
from scanoss.file_filters import FileFilters
|
|
33
|
+
from scanoss.scanners.folder_hasher import FolderHasher
|
|
34
|
+
from scanoss.scanners.scanner_config import ScannerConfig
|
|
35
|
+
from scanoss.scanoss_settings import ScanossSettings
|
|
36
|
+
from scanoss.scanossbase import ScanossBase
|
|
37
|
+
from scanoss.scanossgrpc import ScanossGrpc
|
|
38
|
+
from scanoss.utils.abstract_presenter import AbstractPresenter
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ScannerHFH:
|
|
42
|
+
"""
|
|
43
|
+
Folder Hashing Scanner.
|
|
44
|
+
|
|
45
|
+
This scanner processes a directory, computes CRC64 hashes for the files,
|
|
46
|
+
and calculates simhash values based on file names and content to detect folder-level similarities.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
scan_dir: str,
|
|
52
|
+
config: ScannerConfig,
|
|
53
|
+
client: Optional[ScanossGrpc] = None,
|
|
54
|
+
scanoss_settings: Optional[ScanossSettings] = None,
|
|
55
|
+
):
|
|
56
|
+
"""
|
|
57
|
+
Initialize the ScannerHFH.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
scan_dir (str): The directory to be scanned.
|
|
61
|
+
config (ScannerConfig): Configuration parameters for the scanner.
|
|
62
|
+
client (ScanossGrpc): gRPC client for communicating with the scanning service.
|
|
63
|
+
scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
|
|
64
|
+
"""
|
|
65
|
+
self.base = ScanossBase(
|
|
66
|
+
debug=config.debug,
|
|
67
|
+
trace=config.trace,
|
|
68
|
+
quiet=config.quiet,
|
|
69
|
+
)
|
|
70
|
+
self.presenter = ScannerHFHPresenter(
|
|
71
|
+
self,
|
|
72
|
+
debug=config.debug,
|
|
73
|
+
trace=config.trace,
|
|
74
|
+
quiet=config.quiet,
|
|
75
|
+
)
|
|
76
|
+
self.file_filters = FileFilters(
|
|
77
|
+
debug=config.debug,
|
|
78
|
+
trace=config.trace,
|
|
79
|
+
quiet=config.quiet,
|
|
80
|
+
scanoss_settings=scanoss_settings,
|
|
81
|
+
)
|
|
82
|
+
self.folder_hasher = FolderHasher(
|
|
83
|
+
scan_dir=scan_dir,
|
|
84
|
+
config=config,
|
|
85
|
+
scanoss_settings=scanoss_settings,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
self.scan_dir = scan_dir
|
|
89
|
+
self.client = client
|
|
90
|
+
self.scan_results = None
|
|
91
|
+
self.best_match = False
|
|
92
|
+
self.threshold = 100
|
|
93
|
+
|
|
94
|
+
def scan(self) -> Optional[Dict]:
|
|
95
|
+
"""
|
|
96
|
+
Scan the provided directory using the folder hashing algorithm.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Optional[Dict]: The folder hash response from the gRPC client, or None if an error occurs.
|
|
100
|
+
"""
|
|
101
|
+
hfh_request = {
|
|
102
|
+
'root': self.folder_hasher.hash_directory(self.scan_dir),
|
|
103
|
+
'threshold': self.threshold,
|
|
104
|
+
'best_match': self.best_match,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
spinner = Spinner('Scanning folder...')
|
|
108
|
+
stop_spinner = False
|
|
109
|
+
|
|
110
|
+
def spin():
|
|
111
|
+
while not stop_spinner:
|
|
112
|
+
spinner.next()
|
|
113
|
+
time.sleep(0.1)
|
|
114
|
+
|
|
115
|
+
spinner_thread = threading.Thread(target=spin)
|
|
116
|
+
spinner_thread.start()
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
response = self.client.folder_hash_scan(hfh_request)
|
|
120
|
+
if response:
|
|
121
|
+
self.scan_results = response
|
|
122
|
+
finally:
|
|
123
|
+
stop_spinner = True
|
|
124
|
+
spinner_thread.join()
|
|
125
|
+
spinner.finish()
|
|
126
|
+
|
|
127
|
+
return self.scan_results
|
|
128
|
+
|
|
129
|
+
def present(self, output_format: str = None, output_file: str = None):
|
|
130
|
+
"""Present the results in the selected format"""
|
|
131
|
+
self.presenter.present(output_format=output_format, output_file=output_file)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class ScannerHFHPresenter(AbstractPresenter):
|
|
135
|
+
"""
|
|
136
|
+
ScannerHFH presenter class
|
|
137
|
+
Handles the presentation of the folder hashing scan results
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(self, scanner: ScannerHFH, **kwargs):
|
|
141
|
+
super().__init__(**kwargs)
|
|
142
|
+
self.scanner = scanner
|
|
143
|
+
|
|
144
|
+
def _format_json_output(self) -> str:
|
|
145
|
+
"""
|
|
146
|
+
Format the scan output data into a JSON object
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
str: The formatted JSON string
|
|
150
|
+
"""
|
|
151
|
+
return json.dumps(self.scanner.scan_results, indent=2)
|
|
152
|
+
|
|
153
|
+
def _format_plain_output(self) -> str:
|
|
154
|
+
"""
|
|
155
|
+
Format the scan output data into a plain text string
|
|
156
|
+
"""
|
|
157
|
+
return (
|
|
158
|
+
json.dumps(self.scanner.scan_results, indent=2)
|
|
159
|
+
if isinstance(self.scanner.scan_results, dict)
|
|
160
|
+
else str(self.scanner.scan_results)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def _format_cyclonedx_output(self) -> str:
|
|
164
|
+
raise NotImplementedError('CycloneDX output is not implemented')
|
|
165
|
+
|
|
166
|
+
def _format_spdxlite_output(self) -> str:
|
|
167
|
+
raise NotImplementedError('SPDXlite output is not implemented')
|
|
168
|
+
|
|
169
|
+
def _format_csv_output(self) -> str:
|
|
170
|
+
raise NotImplementedError('CSV output is not implemented')
|
|
171
|
+
|
|
172
|
+
def _format_raw_output(self) -> str:
|
|
173
|
+
raise NotImplementedError('Raw output is not implemented')
|
scanoss/scanoss_settings.py
CHANGED
|
@@ -24,13 +24,17 @@ SPDX-License-Identifier: MIT
|
|
|
24
24
|
|
|
25
25
|
import json
|
|
26
26
|
from pathlib import Path
|
|
27
|
-
from typing import List, TypedDict
|
|
27
|
+
from typing import List, Optional, TypedDict
|
|
28
28
|
|
|
29
29
|
import importlib_resources
|
|
30
30
|
from jsonschema import validate
|
|
31
31
|
|
|
32
32
|
from .scanossbase import ScanossBase
|
|
33
|
-
from .utils.file import
|
|
33
|
+
from .utils.file import (
|
|
34
|
+
JSON_ERROR_FILE_EMPTY,
|
|
35
|
+
JSON_ERROR_FILE_NOT_FOUND,
|
|
36
|
+
validate_json_file,
|
|
37
|
+
)
|
|
34
38
|
|
|
35
39
|
DEFAULT_SCANOSS_JSON_FILE = Path('scanoss.json')
|
|
36
40
|
|
|
@@ -96,7 +100,7 @@ class ScanossSettings(ScanossBase):
|
|
|
96
100
|
if filepath:
|
|
97
101
|
self.load_json_file(filepath)
|
|
98
102
|
|
|
99
|
-
def load_json_file(self, filepath:
|
|
103
|
+
def load_json_file(self, filepath: Optional[str] = None, scan_root: Optional[str] = None) -> 'ScanossSettings':
|
|
100
104
|
"""
|
|
101
105
|
Load the scan settings file. If no filepath is provided, scanoss.json will be used as default.
|
|
102
106
|
|
|
@@ -118,7 +122,7 @@ class ScanossSettings(ScanossBase):
|
|
|
118
122
|
|
|
119
123
|
result = validate_json_file(json_file)
|
|
120
124
|
if not result.is_valid:
|
|
121
|
-
if result.error_code
|
|
125
|
+
if result.error_code in (JSON_ERROR_FILE_NOT_FOUND, JSON_ERROR_FILE_EMPTY):
|
|
122
126
|
self.print_msg(
|
|
123
127
|
f'WARNING: The supplied settings file "{filepath}" was not found or is empty. Skipping...'
|
|
124
128
|
)
|
|
@@ -235,7 +239,7 @@ class ScanossSettings(ScanossBase):
|
|
|
235
239
|
include_bom_entries = self._remove_duplicates(self.normalize_bom_entries(self.get_bom_include()))
|
|
236
240
|
replace_bom_entries = self._remove_duplicates(self.normalize_bom_entries(self.get_bom_replace()))
|
|
237
241
|
self.print_debug(
|
|
238
|
-
f"Scan type set to 'identify'. Adding {len(include_bom_entries) + len(replace_bom_entries)} components as context to the scan. \n"
|
|
242
|
+
f"Scan type set to 'identify'. Adding {len(include_bom_entries) + len(replace_bom_entries)} components as context to the scan. \n" # noqa: E501
|
|
239
243
|
f'From Include list: {[entry["purl"] for entry in include_bom_entries]} \n'
|
|
240
244
|
f'From Replace list: {[entry["purl"] for entry in replace_bom_entries]} \n'
|
|
241
245
|
)
|
scanoss/scanossbase.py
CHANGED
|
@@ -80,20 +80,26 @@ class ScanossBase:
|
|
|
80
80
|
**kwargs,
|
|
81
81
|
)
|
|
82
82
|
|
|
83
|
-
def print_to_file_or_stdout(self,
|
|
83
|
+
def print_to_file_or_stdout(self, content: str, file: str = None):
|
|
84
84
|
"""
|
|
85
85
|
Print message to file if provided or stdout
|
|
86
86
|
"""
|
|
87
|
+
if not content:
|
|
88
|
+
return
|
|
89
|
+
|
|
87
90
|
if file:
|
|
88
91
|
with open(file, 'w') as f:
|
|
89
|
-
f.write(
|
|
92
|
+
f.write(content)
|
|
90
93
|
else:
|
|
91
|
-
self.print_stdout(
|
|
94
|
+
self.print_stdout(content)
|
|
92
95
|
|
|
93
96
|
def print_to_file_or_stderr(self, msg: str, file: str = None):
|
|
94
97
|
"""
|
|
95
98
|
Print message to file if provided or stderr
|
|
96
99
|
"""
|
|
100
|
+
if not msg:
|
|
101
|
+
return
|
|
102
|
+
|
|
97
103
|
if file:
|
|
98
104
|
with open(file, 'w') as f:
|
|
99
105
|
f.write(msg)
|