scanoss 1.18.1__py3-none-any.whl → 1.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scanoss/__init__.py +1 -1
- scanoss/cli.py +63 -79
- scanoss/data/build_date.txt +1 -1
- scanoss/data/scanoss-settings-schema.json +254 -0
- scanoss/file_filters.py +525 -0
- scanoss/scanner.py +163 -266
- scanoss/scanoss_settings.py +105 -40
- scanoss/scanpostprocessor.py +89 -81
- scanoss/utils/__init__.py +23 -0
- scanoss/utils/file.py +57 -0
- {scanoss-1.18.1.dist-info → scanoss-1.19.0.dist-info}/METADATA +6 -4
- {scanoss-1.18.1.dist-info → scanoss-1.19.0.dist-info}/RECORD +16 -12
- {scanoss-1.18.1.dist-info → scanoss-1.19.0.dist-info}/WHEEL +1 -1
- {scanoss-1.18.1.dist-info → scanoss-1.19.0.dist-info}/LICENSE +0 -0
- {scanoss-1.18.1.dist-info → scanoss-1.19.0.dist-info}/entry_points.txt +0 -0
- {scanoss-1.18.1.dist-info → scanoss-1.19.0.dist-info}/top_level.txt +0 -0
scanoss/scanner.py
CHANGED
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
"""
|
|
24
24
|
import json
|
|
25
25
|
import os
|
|
26
|
+
from pathlib import Path
|
|
26
27
|
import sys
|
|
27
28
|
import datetime
|
|
28
29
|
from typing import Any, Dict, List, Optional
|
|
@@ -32,6 +33,8 @@ from progress.bar import Bar
|
|
|
32
33
|
from progress.spinner import Spinner
|
|
33
34
|
from pypac.parser import PACFile
|
|
34
35
|
|
|
36
|
+
from scanoss.file_filters import FileFilters
|
|
37
|
+
|
|
35
38
|
from .scanossapi import ScanossApi
|
|
36
39
|
from .cyclonedx import CycloneDx
|
|
37
40
|
from .spdxlite import SpdxLite
|
|
@@ -48,44 +51,12 @@ from . import __version__
|
|
|
48
51
|
|
|
49
52
|
FAST_WINNOWING = False
|
|
50
53
|
try:
|
|
51
|
-
from .winnowing import Winnowing
|
|
52
|
-
|
|
54
|
+
from scanoss_winnowing.winnowing import Winnowing
|
|
53
55
|
FAST_WINNOWING = True
|
|
54
56
|
except ModuleNotFoundError or ImportError:
|
|
55
57
|
FAST_WINNOWING = False
|
|
56
58
|
from .winnowing import Winnowing
|
|
57
59
|
|
|
58
|
-
FILTERED_DIRS = { # Folders to skip
|
|
59
|
-
"nbproject", "nbbuild", "nbdist", "__pycache__", "venv", "_yardoc", "eggs", "wheels", "htmlcov", "__pypackages__"
|
|
60
|
-
}
|
|
61
|
-
FILTERED_DIR_EXT = { # Folder endings to skip
|
|
62
|
-
".egg-info"
|
|
63
|
-
}
|
|
64
|
-
FILTERED_EXT = [ # File extensions to skip
|
|
65
|
-
".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".ac", ".adoc", ".am",
|
|
66
|
-
".asciidoc", ".bmp", ".build", ".cfg", ".chm", ".class", ".cmake", ".cnf",
|
|
67
|
-
".conf", ".config", ".contributors", ".copying", ".crt", ".csproj", ".css",
|
|
68
|
-
".csv", ".dat", ".data", ".doc", ".docx", ".dtd", ".dts", ".iws", ".c9", ".c9revisions",
|
|
69
|
-
".dtsi", ".dump", ".eot", ".eps", ".geojson", ".gdoc", ".gif",
|
|
70
|
-
".glif", ".gmo", ".gradle", ".guess", ".hex", ".htm", ".html", ".ico", ".iml",
|
|
71
|
-
".in", ".inc", ".info", ".ini", ".ipynb", ".jpeg", ".jpg", ".json", ".jsonld", ".lock",
|
|
72
|
-
".log", ".m4", ".map", ".markdown", ".md", ".md5", ".meta", ".mk", ".mxml",
|
|
73
|
-
".o", ".otf", ".out", ".pbtxt", ".pdf", ".pem", ".phtml", ".plist", ".png",
|
|
74
|
-
".po", ".ppt", ".prefs", ".properties", ".pyc", ".qdoc", ".result", ".rgb",
|
|
75
|
-
".rst", ".scss", ".sha", ".sha1", ".sha2", ".sha256", ".sln", ".spec", ".sql",
|
|
76
|
-
".sub", ".svg", ".svn-base", ".tab", ".template", ".test", ".tex", ".tiff",
|
|
77
|
-
".toml", ".ttf", ".txt", ".utf-8", ".vim", ".wav", ".woff", ".woff2", ".xht",
|
|
78
|
-
".xhtml", ".xls", ".xlsx", ".xml", ".xpm", ".xsd", ".xul", ".yaml", ".yml", ".wfp",
|
|
79
|
-
".editorconfig", ".dotcover", ".pid", ".lcov", ".egg", ".manifest", ".cache", ".coverage", ".cover",
|
|
80
|
-
".gem", ".lst", ".pickle", ".pdb", ".gml", ".pot", ".plt",
|
|
81
|
-
# File endings
|
|
82
|
-
"-doc", "changelog", "config", "copying", "license", "authors", "news", "licenses", "notice",
|
|
83
|
-
"readme", "swiftdoc", "texidoc", "todo", "version", "ignore", "manifest", "sqlite", "sqlite3"
|
|
84
|
-
]
|
|
85
|
-
FILTERED_FILES = { # Files to skip
|
|
86
|
-
"gradlew", "gradlew.bat", "mvnw", "mvnw.cmd", "gradle-wrapper.jar", "maven-wrapper.jar",
|
|
87
|
-
"thumbs.db", "babel.config.js", "license.txt", "license.md", "copying.lib", "makefile"
|
|
88
|
-
}
|
|
89
60
|
WFP_FILE_START = "file="
|
|
90
61
|
MAX_POST_SIZE = 64 * 1024 # 64k Max post size
|
|
91
62
|
|
|
@@ -96,18 +67,44 @@ class Scanner(ScanossBase):
|
|
|
96
67
|
Handle the scanning of files, snippets and dependencies
|
|
97
68
|
"""
|
|
98
69
|
|
|
99
|
-
def __init__(
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
wfp: str = None,
|
|
73
|
+
scan_output: str = None,
|
|
74
|
+
output_format: str = 'plain',
|
|
75
|
+
debug: bool = False,
|
|
76
|
+
trace: bool = False,
|
|
77
|
+
quiet: bool = False,
|
|
78
|
+
api_key: str = None,
|
|
79
|
+
url: str = None,
|
|
80
|
+
flags: str = None,
|
|
81
|
+
nb_threads: int = 5,
|
|
82
|
+
post_size: int = 32,
|
|
83
|
+
timeout: int = 180,
|
|
84
|
+
no_wfp_file: bool = False,
|
|
85
|
+
all_extensions: bool = False,
|
|
86
|
+
all_folders: bool = False,
|
|
87
|
+
hidden_files_folders: bool = False,
|
|
88
|
+
scan_options: int = 7,
|
|
89
|
+
sc_timeout: int = 600,
|
|
90
|
+
sc_command: str = None,
|
|
91
|
+
grpc_url: str = None,
|
|
92
|
+
obfuscate: bool = False,
|
|
93
|
+
ignore_cert_errors: bool = False,
|
|
94
|
+
proxy: str = None,
|
|
95
|
+
grpc_proxy: str = None,
|
|
96
|
+
ca_cert: str = None,
|
|
97
|
+
pac: PACFile = None,
|
|
98
|
+
retry: int = 5,
|
|
99
|
+
hpsm: bool = False,
|
|
100
|
+
skip_size: int = 0,
|
|
101
|
+
skip_extensions=None,
|
|
102
|
+
skip_folders=None,
|
|
103
|
+
strip_hpsm_ids=None,
|
|
104
|
+
strip_snippet_ids=None,
|
|
105
|
+
skip_md5_ids=None,
|
|
106
|
+
scan_settings: 'ScanossSettings | None' = None
|
|
107
|
+
):
|
|
111
108
|
"""
|
|
112
109
|
Initialise scanning class, including Winnowing, ScanossApi, ThreadedScanning
|
|
113
110
|
"""
|
|
@@ -129,6 +126,7 @@ class Scanner(ScanossBase):
|
|
|
129
126
|
self.hpsm = hpsm
|
|
130
127
|
self.skip_folders = skip_folders
|
|
131
128
|
self.skip_size = skip_size
|
|
129
|
+
self.skip_extensions = skip_extensions
|
|
132
130
|
ver_details = Scanner.version_details()
|
|
133
131
|
|
|
134
132
|
self.winnowing = Winnowing(debug=debug, quiet=quiet, skip_snippets=self._skip_snippets,
|
|
@@ -157,9 +155,6 @@ class Scanner(ScanossBase):
|
|
|
157
155
|
self.post_file_count = post_size if post_size > 0 else 32 # Max number of files for any given POST (default 32)
|
|
158
156
|
if self._skip_snippets:
|
|
159
157
|
self.max_post_size = 8 * 1024 # 8k Max post size if we're skipping snippets
|
|
160
|
-
self.skip_extensions = FILTERED_EXT
|
|
161
|
-
if skip_extensions: # Append extra file extensions to skip
|
|
162
|
-
self.skip_extensions.extend(skip_extensions)
|
|
163
158
|
|
|
164
159
|
self.scan_settings = scan_settings
|
|
165
160
|
self.post_processor = ScanPostProcessor(scan_settings, debug=debug, trace=trace, quiet=quiet) if scan_settings else None
|
|
@@ -172,73 +167,6 @@ class Scanner(ScanossBase):
|
|
|
172
167
|
if sbom:
|
|
173
168
|
self.scanoss_api.set_sbom(sbom)
|
|
174
169
|
|
|
175
|
-
def __filter_files(self, files: list) -> list:
|
|
176
|
-
"""
|
|
177
|
-
Filter which files should be considered for processing
|
|
178
|
-
:param files: list of files to filter
|
|
179
|
-
:return list of filtered files
|
|
180
|
-
"""
|
|
181
|
-
file_list = []
|
|
182
|
-
for f in files:
|
|
183
|
-
ignore = False
|
|
184
|
-
if f.startswith(".") and not self.hidden_files_folders: # Ignore all . files unless requested
|
|
185
|
-
ignore = True
|
|
186
|
-
if not ignore and not self.all_extensions: # Skip this check if we're allowing all extensions
|
|
187
|
-
f_lower = f.lower()
|
|
188
|
-
if f_lower in FILTERED_FILES: # Check for exact files to ignore
|
|
189
|
-
ignore = True
|
|
190
|
-
if not ignore:
|
|
191
|
-
for ending in self.skip_extensions: # Check for file endings to ignore (static and user supplied)
|
|
192
|
-
if ending and f_lower.endswith(ending):
|
|
193
|
-
ignore = True
|
|
194
|
-
break
|
|
195
|
-
if not ignore:
|
|
196
|
-
file_list.append(f)
|
|
197
|
-
return file_list
|
|
198
|
-
|
|
199
|
-
def __filter_dirs(self, dirs: list) -> list:
|
|
200
|
-
"""
|
|
201
|
-
Filter which folders should be considered for processing
|
|
202
|
-
:param dirs: list of directories to filter
|
|
203
|
-
:return: list of filtered directories
|
|
204
|
-
"""
|
|
205
|
-
dir_list = []
|
|
206
|
-
for d in dirs:
|
|
207
|
-
ignore = False
|
|
208
|
-
if d.startswith(".") and not self.hidden_files_folders: # Ignore all . folders unless requested
|
|
209
|
-
ignore = True
|
|
210
|
-
if not ignore and not self.all_folders: # Skip this check if we're allowing all folders
|
|
211
|
-
d_lower = d.lower()
|
|
212
|
-
if d_lower in FILTERED_DIRS: # Ignore specific folders (case insensitive)
|
|
213
|
-
ignore = True
|
|
214
|
-
elif self.skip_folders and d in self.skip_folders: # Ignore user-supplied folders (case sensitive)
|
|
215
|
-
ignore = True
|
|
216
|
-
if not ignore:
|
|
217
|
-
for de in FILTERED_DIR_EXT: # Ignore specific folder endings (case insensitive)
|
|
218
|
-
if d_lower.endswith(de):
|
|
219
|
-
ignore = True
|
|
220
|
-
break
|
|
221
|
-
if not ignore:
|
|
222
|
-
dir_list.append(d)
|
|
223
|
-
return dir_list
|
|
224
|
-
|
|
225
|
-
@staticmethod
|
|
226
|
-
def __strip_dir(scan_dir: str, length: int, path: str) -> str:
|
|
227
|
-
"""
|
|
228
|
-
Strip the leading string from the specified path
|
|
229
|
-
Parameters
|
|
230
|
-
----------
|
|
231
|
-
scan_dir: str
|
|
232
|
-
Root path
|
|
233
|
-
length: int
|
|
234
|
-
length of the root path string
|
|
235
|
-
path: str
|
|
236
|
-
Path to strip
|
|
237
|
-
"""
|
|
238
|
-
if length > 0 and path.startswith(scan_dir):
|
|
239
|
-
path = path[length:]
|
|
240
|
-
return path
|
|
241
|
-
|
|
242
170
|
@staticmethod
|
|
243
171
|
def __count_files_in_wfp_file(wfp_file: str):
|
|
244
172
|
"""
|
|
@@ -255,27 +183,7 @@ class Scanner(ScanossBase):
|
|
|
255
183
|
if WFP_FILE_START in line:
|
|
256
184
|
count += 1
|
|
257
185
|
return count
|
|
258
|
-
|
|
259
|
-
@staticmethod
|
|
260
|
-
def valid_json_file(json_file: str) -> bool:
|
|
261
|
-
"""
|
|
262
|
-
Validate if the specified file is indeed valid JSON
|
|
263
|
-
:param: str JSON file to load
|
|
264
|
-
:return bool True if valid, False otherwise
|
|
265
|
-
"""
|
|
266
|
-
if not json_file:
|
|
267
|
-
Scanner.print_stderr('ERROR: No JSON file provided to parse.')
|
|
268
|
-
return False
|
|
269
|
-
if not os.path.isfile(json_file):
|
|
270
|
-
Scanner.print_stderr(f'ERROR: JSON file does not exist or is not a file: {json_file}')
|
|
271
|
-
return False
|
|
272
|
-
try:
|
|
273
|
-
with open(json_file) as f:
|
|
274
|
-
json.load(f)
|
|
275
|
-
except Exception as e:
|
|
276
|
-
Scanner.print_stderr(f'Problem parsing JSON file "{json_file}": {e}')
|
|
277
|
-
return False
|
|
278
|
-
return True
|
|
186
|
+
|
|
279
187
|
|
|
280
188
|
@staticmethod
|
|
281
189
|
def version_details() -> str:
|
|
@@ -390,11 +298,20 @@ class Scanner(ScanossBase):
|
|
|
390
298
|
"""
|
|
391
299
|
success = True
|
|
392
300
|
if not scan_dir:
|
|
393
|
-
raise Exception(
|
|
301
|
+
raise Exception('ERROR: Please specify a folder to scan')
|
|
394
302
|
if not os.path.exists(scan_dir) or not os.path.isdir(scan_dir):
|
|
395
|
-
raise Exception(f
|
|
396
|
-
|
|
397
|
-
|
|
303
|
+
raise Exception(f'ERROR: Specified folder does not exist or is not a folder: {scan_dir}')
|
|
304
|
+
|
|
305
|
+
file_filters = FileFilters(debug=self.debug, trace=self.trace, quiet=self.quiet,
|
|
306
|
+
scanoss_settings=self.scan_settings,
|
|
307
|
+
all_extensions=self.all_extensions,
|
|
308
|
+
all_folders=self.all_folders,
|
|
309
|
+
hidden_files_folders=self.hidden_files_folders,
|
|
310
|
+
skip_size=self.skip_size,
|
|
311
|
+
skip_folders=self.skip_folders,
|
|
312
|
+
skip_extensions=self.skip_extensions,
|
|
313
|
+
operation_type='scanning'
|
|
314
|
+
)
|
|
398
315
|
self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
|
|
399
316
|
spinner = None
|
|
400
317
|
if not self.quiet and self.isatty:
|
|
@@ -407,57 +324,45 @@ class Scanner(ScanossBase):
|
|
|
407
324
|
file_count = 0 # count all files fingerprinted
|
|
408
325
|
wfp_file_count = 0 # count number of files in each queue post
|
|
409
326
|
scan_started = False
|
|
410
|
-
|
|
411
|
-
|
|
327
|
+
|
|
328
|
+
to_scan_files = file_filters.get_filtered_files_from_folder(scan_dir)
|
|
329
|
+
for to_scan_file in to_scan_files:
|
|
412
330
|
if self.threaded_scan and self.threaded_scan.stop_scanning():
|
|
413
331
|
self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
|
|
414
332
|
break
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
wfp_file_count += 1
|
|
449
|
-
# If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
|
|
450
|
-
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
451
|
-
self.threaded_scan.queue_add(scan_block)
|
|
452
|
-
queue_size += 1
|
|
453
|
-
scan_block = ''
|
|
454
|
-
wfp_file_count = 0
|
|
455
|
-
if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
|
|
456
|
-
scan_started = True
|
|
457
|
-
if not self.threaded_scan.run(wait=False):
|
|
458
|
-
self.print_stderr(
|
|
459
|
-
f'Warning: Some errors encounted while scanning. Results might be incomplete.')
|
|
460
|
-
success = False
|
|
333
|
+
self.print_debug(f'Fingerprinting {to_scan_file}...')
|
|
334
|
+
if spinner:
|
|
335
|
+
spinner.next()
|
|
336
|
+
abs_path = Path(scan_dir, to_scan_file).resolve()
|
|
337
|
+
wfp = self.winnowing.wfp_for_file(str(abs_path), to_scan_file)
|
|
338
|
+
if wfp is None or wfp == '':
|
|
339
|
+
self.print_debug(f'No WFP returned for {to_scan_file}. Skipping.')
|
|
340
|
+
continue
|
|
341
|
+
if save_wfps_for_print:
|
|
342
|
+
wfp_list.append(wfp)
|
|
343
|
+
file_count += 1
|
|
344
|
+
if self.threaded_scan:
|
|
345
|
+
wfp_size = len(wfp.encode("utf-8"))
|
|
346
|
+
# If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
|
|
347
|
+
if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
|
|
348
|
+
self.threaded_scan.queue_add(scan_block)
|
|
349
|
+
queue_size += 1
|
|
350
|
+
scan_block = ''
|
|
351
|
+
wfp_file_count = 0
|
|
352
|
+
scan_block += wfp
|
|
353
|
+
scan_size = len(scan_block.encode("utf-8"))
|
|
354
|
+
wfp_file_count += 1
|
|
355
|
+
# If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
|
|
356
|
+
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
357
|
+
self.threaded_scan.queue_add(scan_block)
|
|
358
|
+
queue_size += 1
|
|
359
|
+
scan_block = ''
|
|
360
|
+
wfp_file_count = 0
|
|
361
|
+
if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
|
|
362
|
+
scan_started = True
|
|
363
|
+
if not self.threaded_scan.run(wait=False):
|
|
364
|
+
self.print_stderr('Warning: Some errors encounted while scanning. Results might be incomplete.')
|
|
365
|
+
success = False
|
|
461
366
|
# End for loop
|
|
462
367
|
if self.threaded_scan and scan_block != '':
|
|
463
368
|
self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
|
|
@@ -650,6 +555,17 @@ class Scanner(ScanossBase):
|
|
|
650
555
|
success = True
|
|
651
556
|
if not files:
|
|
652
557
|
raise Exception(f"ERROR: Please provide a non-empty list of filenames to scan")
|
|
558
|
+
|
|
559
|
+
file_filters = FileFilters(debug=self.debug, trace=self.trace, quiet=self.quiet,
|
|
560
|
+
scanoss_settings=self.scan_settings,
|
|
561
|
+
all_extensions=self.all_extensions,
|
|
562
|
+
all_folders=self.all_folders,
|
|
563
|
+
hidden_files_folders=self.hidden_files_folders,
|
|
564
|
+
skip_size=self.skip_size,
|
|
565
|
+
skip_folders=self.skip_folders,
|
|
566
|
+
skip_extensions=self.skip_extensions,
|
|
567
|
+
operation_type='scanning'
|
|
568
|
+
)
|
|
653
569
|
spinner = None
|
|
654
570
|
if not self.quiet and self.isatty:
|
|
655
571
|
spinner = Spinner('Fingerprinting ')
|
|
@@ -661,66 +577,47 @@ class Scanner(ScanossBase):
|
|
|
661
577
|
file_count = 0 # count all files fingerprinted
|
|
662
578
|
wfp_file_count = 0 # count number of files in each queue post
|
|
663
579
|
scan_started = False
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
for file in
|
|
667
|
-
filename = os.path.basename(file)
|
|
668
|
-
filtered_filenames = self.__filter_files([filename])
|
|
669
|
-
if not filtered_filenames or len(filtered_filenames) == 0:
|
|
670
|
-
self.print_debug(f'Skipping filtered file: {file}')
|
|
671
|
-
continue
|
|
672
|
-
paths = os.path.dirname(file).split(os.sep)
|
|
673
|
-
if len(self.__filter_dirs(paths)) == len(paths): # Nothing found to filter
|
|
674
|
-
filtered_files.append(file)
|
|
675
|
-
else:
|
|
676
|
-
self.print_debug(f'Skipping filtered (folder) file: {file}')
|
|
677
|
-
if len(filtered_files) > 0:
|
|
678
|
-
self.print_debug(f'Scanning {len(filtered_files)} files...')
|
|
679
|
-
# Process all the requested files
|
|
680
|
-
for file in filtered_files:
|
|
580
|
+
|
|
581
|
+
to_scan_files = file_filters.get_filtered_files_from_files(files)
|
|
582
|
+
for file in to_scan_files:
|
|
681
583
|
if self.threaded_scan and self.threaded_scan.stop_scanning():
|
|
682
584
|
self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
|
|
683
585
|
break
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
scan_block
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
if
|
|
714
|
-
self.
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
scan_started = True
|
|
720
|
-
if not self.threaded_scan.run(wait=False):
|
|
721
|
-
self.print_stderr(
|
|
722
|
-
f'Warning: Some errors encounted while scanning. Results might be incomplete.')
|
|
723
|
-
success = False
|
|
586
|
+
self.print_debug(f'Fingerprinting {file}...')
|
|
587
|
+
if spinner:
|
|
588
|
+
spinner.next()
|
|
589
|
+
wfp = self.winnowing.wfp_for_file(file, file)
|
|
590
|
+
if wfp is None or wfp == '':
|
|
591
|
+
self.print_debug(f'No WFP returned for {file}. Skipping.')
|
|
592
|
+
continue
|
|
593
|
+
if save_wfps_for_print:
|
|
594
|
+
wfp_list.append(wfp)
|
|
595
|
+
file_count += 1
|
|
596
|
+
if self.threaded_scan:
|
|
597
|
+
wfp_size = len(wfp.encode('utf-8'))
|
|
598
|
+
# If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
|
|
599
|
+
if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
|
|
600
|
+
self.threaded_scan.queue_add(scan_block)
|
|
601
|
+
queue_size += 1
|
|
602
|
+
scan_block = ''
|
|
603
|
+
wfp_file_count = 0
|
|
604
|
+
scan_block += wfp
|
|
605
|
+
scan_size = len(scan_block.encode('utf-8'))
|
|
606
|
+
wfp_file_count += 1
|
|
607
|
+
# If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
|
|
608
|
+
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
609
|
+
self.threaded_scan.queue_add(scan_block)
|
|
610
|
+
queue_size += 1
|
|
611
|
+
scan_block = ''
|
|
612
|
+
wfp_file_count = 0
|
|
613
|
+
if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
|
|
614
|
+
scan_started = True
|
|
615
|
+
if not self.threaded_scan.run(wait=False):
|
|
616
|
+
self.print_stderr(
|
|
617
|
+
f'Warning: Some errors encounted while scanning. Results might be incomplete.'
|
|
618
|
+
)
|
|
619
|
+
success = False
|
|
620
|
+
|
|
724
621
|
# End for loop
|
|
725
622
|
if self.threaded_scan and scan_block != '':
|
|
726
623
|
self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
|
|
@@ -737,7 +634,7 @@ class Scanner(ScanossBase):
|
|
|
737
634
|
if self.threaded_scan:
|
|
738
635
|
success = self.__run_scan_threaded(scan_started, file_count)
|
|
739
636
|
else:
|
|
740
|
-
Scanner.print_stderr(f'Warning: No files found to scan from: {
|
|
637
|
+
Scanner.print_stderr(f'Warning: No files found to scan from: {to_scan_files}')
|
|
741
638
|
return success
|
|
742
639
|
|
|
743
640
|
def scan_files_with_options(self, files: [], deps_file: str = None, file_map: dict = None) -> bool:
|
|
@@ -1065,32 +962,32 @@ class Scanner(ScanossBase):
|
|
|
1065
962
|
Fingerprint the specified folder producing fingerprints
|
|
1066
963
|
"""
|
|
1067
964
|
if not scan_dir:
|
|
1068
|
-
raise Exception(f
|
|
965
|
+
raise Exception(f'ERROR: Please specify a folder to fingerprint')
|
|
1069
966
|
if not os.path.exists(scan_dir) or not os.path.isdir(scan_dir):
|
|
1070
|
-
raise Exception(f
|
|
967
|
+
raise Exception(f'ERROR: Specified folder does not exist or is not a folder: {scan_dir}')
|
|
968
|
+
file_filters = FileFilters(debug=self.debug, trace=self.trace, quiet=self.quiet,
|
|
969
|
+
scanoss_settings=self.scan_settings,
|
|
970
|
+
all_extensions=self.all_extensions,
|
|
971
|
+
all_folders=self.all_folders,
|
|
972
|
+
hidden_files_folders=self.hidden_files_folders,
|
|
973
|
+
skip_size=self.skip_size,
|
|
974
|
+
skip_folders=self.skip_folders,
|
|
975
|
+
skip_extensions=self.skip_extensions,
|
|
976
|
+
operation_type='scanning'
|
|
977
|
+
)
|
|
1071
978
|
wfps = ''
|
|
1072
|
-
scan_dir_len = len(scan_dir) if scan_dir.endswith(os.path.sep) else len(scan_dir) + 1
|
|
1073
979
|
self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
|
|
1074
980
|
spinner = None
|
|
1075
981
|
if not self.quiet and self.isatty:
|
|
1076
982
|
spinner = Spinner('Fingerprinting ')
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
f_size = os.stat(path).st_size
|
|
1086
|
-
except Exception as e:
|
|
1087
|
-
self.print_trace(
|
|
1088
|
-
f'Ignoring missing symlink file: {file} ({e})') # Can fail if there is a broken symlink
|
|
1089
|
-
if f_size > 0: # Ignore empty files
|
|
1090
|
-
self.print_debug(f'Fingerprinting {path}...')
|
|
1091
|
-
if spinner:
|
|
1092
|
-
spinner.next()
|
|
1093
|
-
wfps += self.winnowing.wfp_for_file(path, Scanner.__strip_dir(scan_dir, scan_dir_len, path))
|
|
983
|
+
|
|
984
|
+
to_fingerprint_files = file_filters.get_filtered_files_from_folder(scan_dir)
|
|
985
|
+
for file in to_fingerprint_files:
|
|
986
|
+
if spinner:
|
|
987
|
+
spinner.next()
|
|
988
|
+
abs_path = Path(scan_dir, file).resolve()
|
|
989
|
+
self.print_debug(f'Fingerprinting {file}...')
|
|
990
|
+
wfps += self.winnowing.wfp_for_file(str(abs_path), file)
|
|
1094
991
|
if spinner:
|
|
1095
992
|
spinner.finish()
|
|
1096
993
|
if wfps:
|