scanoss 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scanoss/scanner.py CHANGED
@@ -23,6 +23,7 @@
23
23
  """
24
24
  import json
25
25
  import os
26
+ from pathlib import Path
26
27
  import sys
27
28
  import datetime
28
29
  from typing import Any, Dict, List, Optional
@@ -32,6 +33,8 @@ from progress.bar import Bar
32
33
  from progress.spinner import Spinner
33
34
  from pypac.parser import PACFile
34
35
 
36
+ from scanoss.file_filters import FileFilters
37
+
35
38
  from .scanossapi import ScanossApi
36
39
  from .cyclonedx import CycloneDx
37
40
  from .spdxlite import SpdxLite
@@ -48,44 +51,12 @@ from . import __version__
48
51
 
49
52
  FAST_WINNOWING = False
50
53
  try:
51
- from .winnowing import Winnowing
52
-
54
+ from scanoss_winnowing.winnowing import Winnowing
53
55
  FAST_WINNOWING = True
54
56
  except ModuleNotFoundError or ImportError:
55
57
  FAST_WINNOWING = False
56
58
  from .winnowing import Winnowing
57
59
 
58
- FILTERED_DIRS = { # Folders to skip
59
- "nbproject", "nbbuild", "nbdist", "__pycache__", "venv", "_yardoc", "eggs", "wheels", "htmlcov", "__pypackages__"
60
- }
61
- FILTERED_DIR_EXT = { # Folder endings to skip
62
- ".egg-info"
63
- }
64
- FILTERED_EXT = [ # File extensions to skip
65
- ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".ac", ".adoc", ".am",
66
- ".asciidoc", ".bmp", ".build", ".cfg", ".chm", ".class", ".cmake", ".cnf",
67
- ".conf", ".config", ".contributors", ".copying", ".crt", ".csproj", ".css",
68
- ".csv", ".dat", ".data", ".doc", ".docx", ".dtd", ".dts", ".iws", ".c9", ".c9revisions",
69
- ".dtsi", ".dump", ".eot", ".eps", ".geojson", ".gdoc", ".gif",
70
- ".glif", ".gmo", ".gradle", ".guess", ".hex", ".htm", ".html", ".ico", ".iml",
71
- ".in", ".inc", ".info", ".ini", ".ipynb", ".jpeg", ".jpg", ".json", ".jsonld", ".lock",
72
- ".log", ".m4", ".map", ".markdown", ".md", ".md5", ".meta", ".mk", ".mxml",
73
- ".o", ".otf", ".out", ".pbtxt", ".pdf", ".pem", ".phtml", ".plist", ".png",
74
- ".po", ".ppt", ".prefs", ".properties", ".pyc", ".qdoc", ".result", ".rgb",
75
- ".rst", ".scss", ".sha", ".sha1", ".sha2", ".sha256", ".sln", ".spec", ".sql",
76
- ".sub", ".svg", ".svn-base", ".tab", ".template", ".test", ".tex", ".tiff",
77
- ".toml", ".ttf", ".txt", ".utf-8", ".vim", ".wav", ".woff", ".woff2", ".xht",
78
- ".xhtml", ".xls", ".xlsx", ".xml", ".xpm", ".xsd", ".xul", ".yaml", ".yml", ".wfp",
79
- ".editorconfig", ".dotcover", ".pid", ".lcov", ".egg", ".manifest", ".cache", ".coverage", ".cover",
80
- ".gem", ".lst", ".pickle", ".pdb", ".gml", ".pot", ".plt",
81
- # File endings
82
- "-doc", "changelog", "config", "copying", "license", "authors", "news", "licenses", "notice",
83
- "readme", "swiftdoc", "texidoc", "todo", "version", "ignore", "manifest", "sqlite", "sqlite3"
84
- ]
85
- FILTERED_FILES = { # Files to skip
86
- "gradlew", "gradlew.bat", "mvnw", "mvnw.cmd", "gradle-wrapper.jar", "maven-wrapper.jar",
87
- "thumbs.db", "babel.config.js", "license.txt", "license.md", "copying.lib", "makefile"
88
- }
89
60
  WFP_FILE_START = "file="
90
61
  MAX_POST_SIZE = 64 * 1024 # 64k Max post size
91
62
 
@@ -96,18 +67,44 @@ class Scanner(ScanossBase):
96
67
  Handle the scanning of files, snippets and dependencies
97
68
  """
98
69
 
99
- def __init__(self, wfp: str = None, scan_output: str = None, output_format: str = 'plain',
100
- debug: bool = False, trace: bool = False, quiet: bool = False, api_key: str = None, url: str = None,
101
- flags: str = None, nb_threads: int = 5,
102
- post_size: int = 32, timeout: int = 180, no_wfp_file: bool = False,
103
- all_extensions: bool = False, all_folders: bool = False, hidden_files_folders: bool = False,
104
- scan_options: int = 7, sc_timeout: int = 600, sc_command: str = None, grpc_url: str = None,
105
- obfuscate: bool = False, ignore_cert_errors: bool = False, proxy: str = None, grpc_proxy: str = None,
106
- ca_cert: str = None, pac: PACFile = None, retry: int = 5, hpsm: bool = False,
107
- skip_size: int = 0, skip_extensions=None, skip_folders=None,
108
- strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None,
109
- scan_settings: ScanossSettings = None
110
- ):
70
+ def __init__(
71
+ self,
72
+ wfp: str = None,
73
+ scan_output: str = None,
74
+ output_format: str = 'plain',
75
+ debug: bool = False,
76
+ trace: bool = False,
77
+ quiet: bool = False,
78
+ api_key: str = None,
79
+ url: str = None,
80
+ flags: str = None,
81
+ nb_threads: int = 5,
82
+ post_size: int = 32,
83
+ timeout: int = 180,
84
+ no_wfp_file: bool = False,
85
+ all_extensions: bool = False,
86
+ all_folders: bool = False,
87
+ hidden_files_folders: bool = False,
88
+ scan_options: int = 7,
89
+ sc_timeout: int = 600,
90
+ sc_command: str = None,
91
+ grpc_url: str = None,
92
+ obfuscate: bool = False,
93
+ ignore_cert_errors: bool = False,
94
+ proxy: str = None,
95
+ grpc_proxy: str = None,
96
+ ca_cert: str = None,
97
+ pac: PACFile = None,
98
+ retry: int = 5,
99
+ hpsm: bool = False,
100
+ skip_size: int = 0,
101
+ skip_extensions=None,
102
+ skip_folders=None,
103
+ strip_hpsm_ids=None,
104
+ strip_snippet_ids=None,
105
+ skip_md5_ids=None,
106
+ scan_settings: 'ScanossSettings | None' = None
107
+ ):
111
108
  """
112
109
  Initialise scanning class, including Winnowing, ScanossApi, ThreadedScanning
113
110
  """
@@ -129,6 +126,7 @@ class Scanner(ScanossBase):
129
126
  self.hpsm = hpsm
130
127
  self.skip_folders = skip_folders
131
128
  self.skip_size = skip_size
129
+ self.skip_extensions = skip_extensions
132
130
  ver_details = Scanner.version_details()
133
131
 
134
132
  self.winnowing = Winnowing(debug=debug, quiet=quiet, skip_snippets=self._skip_snippets,
@@ -157,9 +155,6 @@ class Scanner(ScanossBase):
157
155
  self.post_file_count = post_size if post_size > 0 else 32 # Max number of files for any given POST (default 32)
158
156
  if self._skip_snippets:
159
157
  self.max_post_size = 8 * 1024 # 8k Max post size if we're skipping snippets
160
- self.skip_extensions = FILTERED_EXT
161
- if skip_extensions: # Append extra file extensions to skip
162
- self.skip_extensions.extend(skip_extensions)
163
158
 
164
159
  self.scan_settings = scan_settings
165
160
  self.post_processor = ScanPostProcessor(scan_settings, debug=debug, trace=trace, quiet=quiet) if scan_settings else None
@@ -172,73 +167,6 @@ class Scanner(ScanossBase):
172
167
  if sbom:
173
168
  self.scanoss_api.set_sbom(sbom)
174
169
 
175
- def __filter_files(self, files: list) -> list:
176
- """
177
- Filter which files should be considered for processing
178
- :param files: list of files to filter
179
- :return list of filtered files
180
- """
181
- file_list = []
182
- for f in files:
183
- ignore = False
184
- if f.startswith(".") and not self.hidden_files_folders: # Ignore all . files unless requested
185
- ignore = True
186
- if not ignore and not self.all_extensions: # Skip this check if we're allowing all extensions
187
- f_lower = f.lower()
188
- if f_lower in FILTERED_FILES: # Check for exact files to ignore
189
- ignore = True
190
- if not ignore:
191
- for ending in self.skip_extensions: # Check for file endings to ignore (static and user supplied)
192
- if ending and f_lower.endswith(ending):
193
- ignore = True
194
- break
195
- if not ignore:
196
- file_list.append(f)
197
- return file_list
198
-
199
- def __filter_dirs(self, dirs: list) -> list:
200
- """
201
- Filter which folders should be considered for processing
202
- :param dirs: list of directories to filter
203
- :return: list of filtered directories
204
- """
205
- dir_list = []
206
- for d in dirs:
207
- ignore = False
208
- if d.startswith(".") and not self.hidden_files_folders: # Ignore all . folders unless requested
209
- ignore = True
210
- if not ignore and not self.all_folders: # Skip this check if we're allowing all folders
211
- d_lower = d.lower()
212
- if d_lower in FILTERED_DIRS: # Ignore specific folders (case insensitive)
213
- ignore = True
214
- elif self.skip_folders and d in self.skip_folders: # Ignore user-supplied folders (case sensitive)
215
- ignore = True
216
- if not ignore:
217
- for de in FILTERED_DIR_EXT: # Ignore specific folder endings (case insensitive)
218
- if d_lower.endswith(de):
219
- ignore = True
220
- break
221
- if not ignore:
222
- dir_list.append(d)
223
- return dir_list
224
-
225
- @staticmethod
226
- def __strip_dir(scan_dir: str, length: int, path: str) -> str:
227
- """
228
- Strip the leading string from the specified path
229
- Parameters
230
- ----------
231
- scan_dir: str
232
- Root path
233
- length: int
234
- length of the root path string
235
- path: str
236
- Path to strip
237
- """
238
- if length > 0 and path.startswith(scan_dir):
239
- path = path[length:]
240
- return path
241
-
242
170
  @staticmethod
243
171
  def __count_files_in_wfp_file(wfp_file: str):
244
172
  """
@@ -255,27 +183,7 @@ class Scanner(ScanossBase):
255
183
  if WFP_FILE_START in line:
256
184
  count += 1
257
185
  return count
258
-
259
- @staticmethod
260
- def valid_json_file(json_file: str) -> bool:
261
- """
262
- Validate if the specified file is indeed valid JSON
263
- :param: str JSON file to load
264
- :return bool True if valid, False otherwise
265
- """
266
- if not json_file:
267
- Scanner.print_stderr('ERROR: No JSON file provided to parse.')
268
- return False
269
- if not os.path.isfile(json_file):
270
- Scanner.print_stderr(f'ERROR: JSON file does not exist or is not a file: {json_file}')
271
- return False
272
- try:
273
- with open(json_file) as f:
274
- json.load(f)
275
- except Exception as e:
276
- Scanner.print_stderr(f'Problem parsing JSON file "{json_file}": {e}')
277
- return False
278
- return True
186
+
279
187
 
280
188
  @staticmethod
281
189
  def version_details() -> str:
@@ -390,11 +298,20 @@ class Scanner(ScanossBase):
390
298
  """
391
299
  success = True
392
300
  if not scan_dir:
393
- raise Exception(f"ERROR: Please specify a folder to scan")
301
+ raise Exception('ERROR: Please specify a folder to scan')
394
302
  if not os.path.exists(scan_dir) or not os.path.isdir(scan_dir):
395
- raise Exception(f"ERROR: Specified folder does not exist or is not a folder: {scan_dir}")
396
-
397
- scan_dir_len = len(scan_dir) if scan_dir.endswith(os.path.sep) else len(scan_dir) + 1
303
+ raise Exception(f'ERROR: Specified folder does not exist or is not a folder: {scan_dir}')
304
+
305
+ file_filters = FileFilters(debug=self.debug, trace=self.trace, quiet=self.quiet,
306
+ scanoss_settings=self.scan_settings,
307
+ all_extensions=self.all_extensions,
308
+ all_folders=self.all_folders,
309
+ hidden_files_folders=self.hidden_files_folders,
310
+ skip_size=self.skip_size,
311
+ skip_folders=self.skip_folders,
312
+ skip_extensions=self.skip_extensions,
313
+ operation_type='scanning'
314
+ )
398
315
  self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
399
316
  spinner = None
400
317
  if not self.quiet and self.isatty:
@@ -407,57 +324,45 @@ class Scanner(ScanossBase):
407
324
  file_count = 0 # count all files fingerprinted
408
325
  wfp_file_count = 0 # count number of files in each queue post
409
326
  scan_started = False
410
- for root, dirs, files in os.walk(scan_dir):
411
- self.print_trace(f'U Root: {root}, Dirs: {dirs}, Files {files}')
327
+
328
+ to_scan_files = file_filters.get_filtered_files_from_folder(scan_dir)
329
+ for to_scan_file in to_scan_files:
412
330
  if self.threaded_scan and self.threaded_scan.stop_scanning():
413
331
  self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
414
332
  break
415
- dirs[:] = self.__filter_dirs(dirs) # Strip out unwanted directories
416
- filtered_files = self.__filter_files(files) # Strip out unwanted files
417
- self.print_debug(f'F Root: {root}, Dirs: {dirs}, Files {filtered_files}')
418
- for file in filtered_files: # Cycle through each filtered file
419
- path = os.path.join(root, file)
420
- f_size = 0
421
- try:
422
- f_size = os.stat(path).st_size
423
- except Exception as e:
424
- self.print_trace(
425
- f'Ignoring missing symlink file: {file} ({e})') # Can fail if there is a broken symlink
426
- # Ignore broken links and empty files or if a user-specified size limit is supplied
427
- if f_size > 0 and (self.skip_size <= 0 or f_size > self.skip_size):
428
- self.print_trace(f'Fingerprinting {path}...')
429
- if spinner:
430
- spinner.next()
431
- wfp = self.winnowing.wfp_for_file(path, Scanner.__strip_dir(scan_dir, scan_dir_len, path))
432
- if wfp is None or wfp == '':
433
- self.print_debug(f'No WFP returned for {path}. Skipping.')
434
- continue
435
- if save_wfps_for_print:
436
- wfp_list.append(wfp)
437
- file_count += 1
438
- if self.threaded_scan:
439
- wfp_size = len(wfp.encode("utf-8"))
440
- # If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
441
- if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
442
- self.threaded_scan.queue_add(scan_block)
443
- queue_size += 1
444
- scan_block = ''
445
- wfp_file_count = 0
446
- scan_block += wfp
447
- scan_size = len(scan_block.encode("utf-8"))
448
- wfp_file_count += 1
449
- # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
450
- if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
451
- self.threaded_scan.queue_add(scan_block)
452
- queue_size += 1
453
- scan_block = ''
454
- wfp_file_count = 0
455
- if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
456
- scan_started = True
457
- if not self.threaded_scan.run(wait=False):
458
- self.print_stderr(
459
- f'Warning: Some errors encounted while scanning. Results might be incomplete.')
460
- success = False
333
+ self.print_debug(f'Fingerprinting {to_scan_file}...')
334
+ if spinner:
335
+ spinner.next()
336
+ abs_path = Path(scan_dir, to_scan_file).resolve()
337
+ wfp = self.winnowing.wfp_for_file(str(abs_path), to_scan_file)
338
+ if wfp is None or wfp == '':
339
+ self.print_debug(f'No WFP returned for {to_scan_file}. Skipping.')
340
+ continue
341
+ if save_wfps_for_print:
342
+ wfp_list.append(wfp)
343
+ file_count += 1
344
+ if self.threaded_scan:
345
+ wfp_size = len(wfp.encode("utf-8"))
346
+ # If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
347
+ if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
348
+ self.threaded_scan.queue_add(scan_block)
349
+ queue_size += 1
350
+ scan_block = ''
351
+ wfp_file_count = 0
352
+ scan_block += wfp
353
+ scan_size = len(scan_block.encode("utf-8"))
354
+ wfp_file_count += 1
355
+ # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
356
+ if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
357
+ self.threaded_scan.queue_add(scan_block)
358
+ queue_size += 1
359
+ scan_block = ''
360
+ wfp_file_count = 0
361
+ if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
362
+ scan_started = True
363
+ if not self.threaded_scan.run(wait=False):
364
+ self.print_stderr('Warning: Some errors encounted while scanning. Results might be incomplete.')
365
+ success = False
461
366
  # End for loop
462
367
  if self.threaded_scan and scan_block != '':
463
368
  self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
@@ -650,6 +555,17 @@ class Scanner(ScanossBase):
650
555
  success = True
651
556
  if not files:
652
557
  raise Exception(f"ERROR: Please provide a non-empty list of filenames to scan")
558
+
559
+ file_filters = FileFilters(debug=self.debug, trace=self.trace, quiet=self.quiet,
560
+ scanoss_settings=self.scan_settings,
561
+ all_extensions=self.all_extensions,
562
+ all_folders=self.all_folders,
563
+ hidden_files_folders=self.hidden_files_folders,
564
+ skip_size=self.skip_size,
565
+ skip_folders=self.skip_folders,
566
+ skip_extensions=self.skip_extensions,
567
+ operation_type='scanning'
568
+ )
653
569
  spinner = None
654
570
  if not self.quiet and self.isatty:
655
571
  spinner = Spinner('Fingerprinting ')
@@ -661,66 +577,47 @@ class Scanner(ScanossBase):
661
577
  file_count = 0 # count all files fingerprinted
662
578
  wfp_file_count = 0 # count number of files in each queue post
663
579
  scan_started = False
664
- filtered_files = []
665
- # Filter the files to remove anything we shouldn't scan
666
- for file in files:
667
- filename = os.path.basename(file)
668
- filtered_filenames = self.__filter_files([filename])
669
- if not filtered_filenames or len(filtered_filenames) == 0:
670
- self.print_debug(f'Skipping filtered file: {file}')
671
- continue
672
- paths = os.path.dirname(file).split(os.sep)
673
- if len(self.__filter_dirs(paths)) == len(paths): # Nothing found to filter
674
- filtered_files.append(file)
675
- else:
676
- self.print_debug(f'Skipping filtered (folder) file: {file}')
677
- if len(filtered_files) > 0:
678
- self.print_debug(f'Scanning {len(filtered_files)} files...')
679
- # Process all the requested files
680
- for file in filtered_files:
580
+
581
+ to_scan_files = file_filters.get_filtered_files_from_files(files)
582
+ for file in to_scan_files:
681
583
  if self.threaded_scan and self.threaded_scan.stop_scanning():
682
584
  self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
683
585
  break
684
- f_size = 0
685
- try:
686
- f_size = os.stat(file).st_size
687
- except Exception as e:
688
- self.print_trace(
689
- f'Ignoring missing symlink file: {file} ({e})') # Can fail if there is a broken symlink
690
- if f_size > 0: # Ignore broken links and empty files
691
- self.print_trace(f'Fingerprinting {file}...')
692
- if spinner:
693
- spinner.next()
694
- wfp = self.winnowing.wfp_for_file(file, file)
695
- if wfp is None or wfp == '':
696
- self.print_debug(f'No WFP returned for {file}. Skipping.')
697
- continue
698
- if save_wfps_for_print:
699
- wfp_list.append(wfp)
700
- file_count += 1
701
- if self.threaded_scan:
702
- wfp_size = len(wfp.encode("utf-8"))
703
- # If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
704
- if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
705
- self.threaded_scan.queue_add(scan_block)
706
- queue_size += 1
707
- scan_block = ''
708
- wfp_file_count = 0
709
- scan_block += wfp
710
- scan_size = len(scan_block.encode("utf-8"))
711
- wfp_file_count += 1
712
- # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
713
- if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
714
- self.threaded_scan.queue_add(scan_block)
715
- queue_size += 1
716
- scan_block = ''
717
- wfp_file_count = 0
718
- if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
719
- scan_started = True
720
- if not self.threaded_scan.run(wait=False):
721
- self.print_stderr(
722
- f'Warning: Some errors encounted while scanning. Results might be incomplete.')
723
- success = False
586
+ self.print_debug(f'Fingerprinting {file}...')
587
+ if spinner:
588
+ spinner.next()
589
+ wfp = self.winnowing.wfp_for_file(file, file)
590
+ if wfp is None or wfp == '':
591
+ self.print_debug(f'No WFP returned for {file}. Skipping.')
592
+ continue
593
+ if save_wfps_for_print:
594
+ wfp_list.append(wfp)
595
+ file_count += 1
596
+ if self.threaded_scan:
597
+ wfp_size = len(wfp.encode('utf-8'))
598
+ # If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
599
+ if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
600
+ self.threaded_scan.queue_add(scan_block)
601
+ queue_size += 1
602
+ scan_block = ''
603
+ wfp_file_count = 0
604
+ scan_block += wfp
605
+ scan_size = len(scan_block.encode('utf-8'))
606
+ wfp_file_count += 1
607
+ # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
608
+ if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
609
+ self.threaded_scan.queue_add(scan_block)
610
+ queue_size += 1
611
+ scan_block = ''
612
+ wfp_file_count = 0
613
+ if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
614
+ scan_started = True
615
+ if not self.threaded_scan.run(wait=False):
616
+ self.print_stderr(
617
+ f'Warning: Some errors encounted while scanning. Results might be incomplete.'
618
+ )
619
+ success = False
620
+
724
621
  # End for loop
725
622
  if self.threaded_scan and scan_block != '':
726
623
  self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
@@ -737,7 +634,7 @@ class Scanner(ScanossBase):
737
634
  if self.threaded_scan:
738
635
  success = self.__run_scan_threaded(scan_started, file_count)
739
636
  else:
740
- Scanner.print_stderr(f'Warning: No files found to scan from: {filtered_files}')
637
+ Scanner.print_stderr(f'Warning: No files found to scan from: {to_scan_files}')
741
638
  return success
742
639
 
743
640
  def scan_files_with_options(self, files: [], deps_file: str = None, file_map: dict = None) -> bool:
@@ -1065,32 +962,32 @@ class Scanner(ScanossBase):
1065
962
  Fingerprint the specified folder producing fingerprints
1066
963
  """
1067
964
  if not scan_dir:
1068
- raise Exception(f"ERROR: Please specify a folder to fingerprint")
965
+ raise Exception(f'ERROR: Please specify a folder to fingerprint')
1069
966
  if not os.path.exists(scan_dir) or not os.path.isdir(scan_dir):
1070
- raise Exception(f"ERROR: Specified folder does not exist or is not a folder: {scan_dir}")
967
+ raise Exception(f'ERROR: Specified folder does not exist or is not a folder: {scan_dir}')
968
+ file_filters = FileFilters(debug=self.debug, trace=self.trace, quiet=self.quiet,
969
+ scanoss_settings=self.scan_settings,
970
+ all_extensions=self.all_extensions,
971
+ all_folders=self.all_folders,
972
+ hidden_files_folders=self.hidden_files_folders,
973
+ skip_size=self.skip_size,
974
+ skip_folders=self.skip_folders,
975
+ skip_extensions=self.skip_extensions,
976
+ operation_type='scanning'
977
+ )
1071
978
  wfps = ''
1072
- scan_dir_len = len(scan_dir) if scan_dir.endswith(os.path.sep) else len(scan_dir) + 1
1073
979
  self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
1074
980
  spinner = None
1075
981
  if not self.quiet and self.isatty:
1076
982
  spinner = Spinner('Fingerprinting ')
1077
- for root, dirs, files in os.walk(scan_dir):
1078
- dirs[:] = self.__filter_dirs(dirs) # Strip out unwanted directories
1079
- filtered_files = self.__filter_files(files) # Strip out unwanted files
1080
- self.print_trace(f'Root: {root}, Dirs: {dirs}, Files {filtered_files}')
1081
- for file in filtered_files:
1082
- path = os.path.join(root, file)
1083
- f_size = 0
1084
- try:
1085
- f_size = os.stat(path).st_size
1086
- except Exception as e:
1087
- self.print_trace(
1088
- f'Ignoring missing symlink file: {file} ({e})') # Can fail if there is a broken symlink
1089
- if f_size > 0: # Ignore empty files
1090
- self.print_debug(f'Fingerprinting {path}...')
1091
- if spinner:
1092
- spinner.next()
1093
- wfps += self.winnowing.wfp_for_file(path, Scanner.__strip_dir(scan_dir, scan_dir_len, path))
983
+
984
+ to_fingerprint_files = file_filters.get_filtered_files_from_folder(scan_dir)
985
+ for file in to_fingerprint_files:
986
+ if spinner:
987
+ spinner.next()
988
+ abs_path = Path(scan_dir, file).resolve()
989
+ self.print_debug(f'Fingerprinting {file}...')
990
+ wfps += self.winnowing.wfp_for_file(str(abs_path), file)
1094
991
  if spinner:
1095
992
  spinner.finish()
1096
993
  if wfps: