scanoss 1.40.0__py3-none-any.whl → 1.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scanoss/__init__.py +1 -1
- scanoss/cli.py +22 -9
- scanoss/constants.py +3 -0
- scanoss/data/build_date.txt +1 -1
- scanoss/data/osadl-copyleft.json +133 -0
- scanoss/filecount.py +37 -38
- scanoss/gitlabqualityreport.py +33 -4
- scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
- scanoss/inspection/{dependency_track → policy_check/dependency_track}/project_violation.py +24 -24
- scanoss/inspection/{policy_check.py → policy_check/policy_check.py} +22 -18
- scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
- scanoss/inspection/{raw → policy_check/scanoss}/copyleft.py +42 -36
- scanoss/inspection/{raw → policy_check/scanoss}/undeclared_component.py +30 -29
- scanoss/inspection/summary/__init__.py +0 -0
- scanoss/inspection/{raw → summary}/component_summary.py +34 -9
- scanoss/inspection/{raw → summary}/license_summary.py +46 -44
- scanoss/inspection/{raw → summary}/match_summary.py +51 -0
- scanoss/inspection/utils/license_utils.py +57 -71
- scanoss/inspection/{raw/raw_base.py → utils/scan_result_processor.py} +47 -59
- scanoss/osadl.py +125 -0
- scanoss/scanner.py +191 -189
- scanoss/scanners/folder_hasher.py +24 -24
- scanoss/scanners/scanner_hfh.py +20 -15
- scanoss/threadedscanning.py +10 -0
- {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/METADATA +1 -1
- {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/RECORD +31 -26
- /scanoss/inspection/{raw → policy_check}/__init__.py +0 -0
- {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/WHEEL +0 -0
- {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/entry_points.txt +0 -0
- {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/licenses/LICENSE +0 -0
- {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/top_level.txt +0 -0
scanoss/scanner.py
CHANGED
|
@@ -26,6 +26,7 @@ import datetime
|
|
|
26
26
|
import json
|
|
27
27
|
import os
|
|
28
28
|
import sys
|
|
29
|
+
from contextlib import nullcontext
|
|
29
30
|
from pathlib import Path
|
|
30
31
|
from typing import Any, Dict, List, Optional
|
|
31
32
|
|
|
@@ -363,62 +364,64 @@ class Scanner(ScanossBase):
|
|
|
363
364
|
operation_type='scanning',
|
|
364
365
|
)
|
|
365
366
|
self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
self.
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
scan_started
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
367
|
+
spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
|
|
368
|
+
|
|
369
|
+
with spinner_ctx as spinner:
|
|
370
|
+
save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
|
|
371
|
+
wfp_list = []
|
|
372
|
+
scan_block = ''
|
|
373
|
+
scan_size = 0
|
|
374
|
+
queue_size = 0
|
|
375
|
+
file_count = 0 # count all files fingerprinted
|
|
376
|
+
wfp_file_count = 0 # count number of files in each queue post
|
|
377
|
+
scan_started = False
|
|
378
|
+
|
|
379
|
+
to_scan_files = file_filters.get_filtered_files_from_folder(scan_dir)
|
|
380
|
+
for to_scan_file in to_scan_files:
|
|
381
|
+
if self.threaded_scan and self.threaded_scan.stop_scanning():
|
|
382
|
+
self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
|
|
383
|
+
break
|
|
384
|
+
self.print_debug(f'Fingerprinting {to_scan_file}...')
|
|
385
|
+
if spinner:
|
|
386
|
+
spinner.next()
|
|
387
|
+
abs_path = Path(scan_dir, to_scan_file).resolve()
|
|
388
|
+
wfp = self.winnowing.wfp_for_file(str(abs_path), to_scan_file)
|
|
389
|
+
if wfp is None or wfp == '':
|
|
390
|
+
self.print_debug(f'No WFP returned for {to_scan_file}. Skipping.')
|
|
391
|
+
continue
|
|
392
|
+
if save_wfps_for_print:
|
|
393
|
+
wfp_list.append(wfp)
|
|
394
|
+
file_count += 1
|
|
395
|
+
if self.threaded_scan:
|
|
396
|
+
wfp_size = len(wfp.encode('utf-8'))
|
|
397
|
+
# If the WFP is bigger than the max post size and we already have something
|
|
398
|
+
# stored in the scan block, add it to the queue
|
|
399
|
+
if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
|
|
400
|
+
self.threaded_scan.queue_add(scan_block)
|
|
401
|
+
queue_size += 1
|
|
402
|
+
scan_block = ''
|
|
403
|
+
wfp_file_count = 0
|
|
404
|
+
scan_block += wfp
|
|
405
|
+
scan_size = len(scan_block.encode('utf-8'))
|
|
406
|
+
wfp_file_count += 1
|
|
407
|
+
# If the scan request block (group of WFPs) is larger than the POST size
|
|
408
|
+
# or we have reached the file limit, add it to the queue
|
|
409
|
+
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
410
|
+
self.threaded_scan.queue_add(scan_block)
|
|
411
|
+
queue_size += 1
|
|
412
|
+
scan_block = ''
|
|
413
|
+
wfp_file_count = 0
|
|
414
|
+
if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
|
|
415
|
+
scan_started = True
|
|
416
|
+
if not self.threaded_scan.run(wait=False):
|
|
417
|
+
self.print_stderr(
|
|
418
|
+
'Warning: Some errors encountered while scanning. '
|
|
419
|
+
'Results might be incomplete.'
|
|
420
|
+
)
|
|
421
|
+
success = False
|
|
422
|
+
# End for loop
|
|
423
|
+
if self.threaded_scan and scan_block != '':
|
|
424
|
+
self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
|
|
422
425
|
|
|
423
426
|
if file_count > 0:
|
|
424
427
|
if save_wfps_for_print: # Write a WFP file if no threading is requested
|
|
@@ -631,63 +634,64 @@ class Scanner(ScanossBase):
|
|
|
631
634
|
skip_extensions=self.skip_extensions,
|
|
632
635
|
operation_type='scanning',
|
|
633
636
|
)
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
self.
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
if not self.
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
637
|
+
spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
|
|
638
|
+
|
|
639
|
+
with spinner_ctx as spinner:
|
|
640
|
+
save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
|
|
641
|
+
wfp_list = []
|
|
642
|
+
scan_block = ''
|
|
643
|
+
scan_size = 0
|
|
644
|
+
queue_size = 0
|
|
645
|
+
file_count = 0 # count all files fingerprinted
|
|
646
|
+
wfp_file_count = 0 # count number of files in each queue post
|
|
647
|
+
scan_started = False
|
|
648
|
+
|
|
649
|
+
to_scan_files = file_filters.get_filtered_files_from_files(files)
|
|
650
|
+
for file in to_scan_files:
|
|
651
|
+
if self.threaded_scan and self.threaded_scan.stop_scanning():
|
|
652
|
+
self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
|
|
653
|
+
break
|
|
654
|
+
self.print_debug(f'Fingerprinting {file}...')
|
|
655
|
+
if spinner:
|
|
656
|
+
spinner.next()
|
|
657
|
+
wfp = self.winnowing.wfp_for_file(file, file)
|
|
658
|
+
if wfp is None or wfp == '':
|
|
659
|
+
self.print_debug(f'No WFP returned for {file}. Skipping.')
|
|
660
|
+
continue
|
|
661
|
+
if save_wfps_for_print:
|
|
662
|
+
wfp_list.append(wfp)
|
|
663
|
+
file_count += 1
|
|
664
|
+
if self.threaded_scan:
|
|
665
|
+
wfp_size = len(wfp.encode('utf-8'))
|
|
666
|
+
# If the WFP is bigger than the max post size and we already have something
|
|
667
|
+
# stored in the scan block, add it to the queue
|
|
668
|
+
if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
|
|
669
|
+
self.threaded_scan.queue_add(scan_block)
|
|
670
|
+
queue_size += 1
|
|
671
|
+
scan_block = ''
|
|
672
|
+
wfp_file_count = 0
|
|
673
|
+
scan_block += wfp
|
|
674
|
+
scan_size = len(scan_block.encode('utf-8'))
|
|
675
|
+
wfp_file_count += 1
|
|
676
|
+
# If the scan request block (group of WFPs) is larger than the POST size
|
|
677
|
+
# or we have reached the file limit, add it to the queue
|
|
678
|
+
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
679
|
+
self.threaded_scan.queue_add(scan_block)
|
|
680
|
+
queue_size += 1
|
|
681
|
+
scan_block = ''
|
|
682
|
+
wfp_file_count = 0
|
|
683
|
+
if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
|
|
684
|
+
scan_started = True
|
|
685
|
+
if not self.threaded_scan.run(wait=False):
|
|
686
|
+
self.print_stderr(
|
|
687
|
+
'Warning: Some errors encountered while scanning. '
|
|
688
|
+
'Results might be incomplete.'
|
|
689
|
+
)
|
|
690
|
+
success = False
|
|
685
691
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
if spinner:
|
|
690
|
-
spinner.finish()
|
|
692
|
+
# End for loop
|
|
693
|
+
if self.threaded_scan and scan_block != '':
|
|
694
|
+
self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
|
|
691
695
|
|
|
692
696
|
if file_count > 0:
|
|
693
697
|
if save_wfps_for_print: # Write a WFP file if no threading is requested
|
|
@@ -778,73 +782,74 @@ class Scanner(ScanossBase):
|
|
|
778
782
|
self.print_debug(f'Found {file_count} files to process.')
|
|
779
783
|
raw_output = '{\n'
|
|
780
784
|
file_print = ''
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
bar.next(0)
|
|
785
|
-
with open(wfp_file) as f:
|
|
786
|
-
for line in f:
|
|
787
|
-
if line.startswith(WFP_FILE_START):
|
|
788
|
-
if file_print:
|
|
789
|
-
wfp += file_print # Store the WFP for the current file
|
|
790
|
-
cur_size = len(wfp.encode('utf-8'))
|
|
791
|
-
file_print = line # Start storing the next file
|
|
792
|
-
cur_files += 1
|
|
793
|
-
batch_files += 1
|
|
794
|
-
else:
|
|
795
|
-
file_print += line # Store the rest of the WFP for this file
|
|
796
|
-
l_size = cur_size + len(file_print.encode('utf-8'))
|
|
797
|
-
# Hit the max post size, so sending the current batch and continue processing
|
|
798
|
-
if l_size >= self.max_post_size and wfp:
|
|
799
|
-
self.print_debug(
|
|
800
|
-
f'Sending {batch_files} ({cur_files}) of'
|
|
801
|
-
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
802
|
-
)
|
|
803
|
-
if self.debug and cur_size > self.max_post_size:
|
|
804
|
-
Scanner.print_stderr(f'Warning: Post size {cur_size} greater than limit {self.max_post_size}')
|
|
805
|
-
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
806
|
-
if bar:
|
|
807
|
-
bar.next(batch_files)
|
|
808
|
-
if scan_resp is not None:
|
|
809
|
-
for key, value in scan_resp.items():
|
|
810
|
-
raw_output += ' "%s":%s,' % (key, json.dumps(value, indent=2))
|
|
811
|
-
for v in value:
|
|
812
|
-
if hasattr(v, 'get'):
|
|
813
|
-
if v.get('id') != 'none':
|
|
814
|
-
vcv = '%s:%s:%s' % (v.get('vendor'), v.get('component'), v.get('version'))
|
|
815
|
-
components[vcv] = components[vcv] + 1 if vcv in components else 1
|
|
816
|
-
if max_component['hits'] < components[vcv]:
|
|
817
|
-
max_component['name'] = v.get('component')
|
|
818
|
-
max_component['hits'] = components[vcv]
|
|
819
|
-
else:
|
|
820
|
-
Scanner.print_stderr(f'Warning: Unknown value: {v}')
|
|
821
|
-
else:
|
|
822
|
-
success = False
|
|
823
|
-
batch_files = 0
|
|
824
|
-
wfp = ''
|
|
825
|
-
if file_print:
|
|
826
|
-
wfp += file_print # Store the WFP for the current file
|
|
827
|
-
if wfp:
|
|
828
|
-
self.print_debug(
|
|
829
|
-
f'Sending {batch_files} ({cur_files}) of'
|
|
830
|
-
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
831
|
-
)
|
|
832
|
-
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
785
|
+
bar_ctx = Bar('Scanning', max=file_count) if (not self.quiet and self.isatty) else nullcontext()
|
|
786
|
+
|
|
787
|
+
with bar_ctx as bar:
|
|
833
788
|
if bar:
|
|
834
|
-
bar.next(
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
789
|
+
bar.next(0)
|
|
790
|
+
with open(wfp_file) as f:
|
|
791
|
+
for line in f:
|
|
792
|
+
if line.startswith(WFP_FILE_START):
|
|
793
|
+
if file_print:
|
|
794
|
+
wfp += file_print # Store the WFP for the current file
|
|
795
|
+
cur_size = len(wfp.encode('utf-8'))
|
|
796
|
+
file_print = line # Start storing the next file
|
|
797
|
+
cur_files += 1
|
|
798
|
+
batch_files += 1
|
|
841
799
|
else:
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
800
|
+
file_print += line # Store the rest of the WFP for this file
|
|
801
|
+
l_size = cur_size + len(file_print.encode('utf-8'))
|
|
802
|
+
# Hit the max post size, so sending the current batch and continue processing
|
|
803
|
+
if l_size >= self.max_post_size and wfp:
|
|
804
|
+
self.print_debug(
|
|
805
|
+
f'Sending {batch_files} ({cur_files}) of'
|
|
806
|
+
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
807
|
+
)
|
|
808
|
+
if self.debug and cur_size > self.max_post_size:
|
|
809
|
+
Scanner.print_stderr(
|
|
810
|
+
f'Warning: Post size {cur_size} greater than limit {self.max_post_size}'
|
|
811
|
+
)
|
|
812
|
+
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
813
|
+
if bar:
|
|
814
|
+
bar.next(batch_files)
|
|
815
|
+
if scan_resp is not None:
|
|
816
|
+
for key, value in scan_resp.items():
|
|
817
|
+
raw_output += ' "%s":%s,' % (key, json.dumps(value, indent=2))
|
|
818
|
+
for v in value:
|
|
819
|
+
if hasattr(v, 'get'):
|
|
820
|
+
if v.get('id') != 'none':
|
|
821
|
+
vcv = '%s:%s:%s' % (v.get('vendor'), v.get('component'), v.get('version'))
|
|
822
|
+
components[vcv] = components[vcv] + 1 if vcv in components else 1
|
|
823
|
+
if max_component['hits'] < components[vcv]:
|
|
824
|
+
max_component['name'] = v.get('component')
|
|
825
|
+
max_component['hits'] = components[vcv]
|
|
826
|
+
else:
|
|
827
|
+
Scanner.print_stderr(f'Warning: Unknown value: {v}')
|
|
828
|
+
else:
|
|
829
|
+
success = False
|
|
830
|
+
batch_files = 0
|
|
831
|
+
wfp = ''
|
|
832
|
+
if file_print:
|
|
833
|
+
wfp += file_print # Store the WFP for the current file
|
|
834
|
+
if wfp:
|
|
835
|
+
self.print_debug(
|
|
836
|
+
f'Sending {batch_files} ({cur_files}) of'
|
|
837
|
+
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
838
|
+
)
|
|
839
|
+
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
840
|
+
if bar:
|
|
841
|
+
bar.next(batch_files)
|
|
842
|
+
first = True
|
|
843
|
+
if scan_resp is not None:
|
|
844
|
+
for key, value in scan_resp.items():
|
|
845
|
+
if first:
|
|
846
|
+
raw_output += ' "%s":%s' % (key, json.dumps(value, indent=2))
|
|
847
|
+
first = False
|
|
848
|
+
else:
|
|
849
|
+
raw_output += ',\n "%s":%s' % (key, json.dumps(value, indent=2))
|
|
850
|
+
else:
|
|
851
|
+
success = False
|
|
845
852
|
raw_output += '\n}'
|
|
846
|
-
if bar:
|
|
847
|
-
bar.finish()
|
|
848
853
|
if self.output_format == 'plain':
|
|
849
854
|
self.__log_result(raw_output)
|
|
850
855
|
elif self.output_format == 'cyclonedx':
|
|
@@ -1052,19 +1057,16 @@ class Scanner(ScanossBase):
|
|
|
1052
1057
|
)
|
|
1053
1058
|
wfps = ''
|
|
1054
1059
|
self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
wfps += self.winnowing.wfp_for_file(str(abs_path), file)
|
|
1066
|
-
if spinner:
|
|
1067
|
-
spinner.finish()
|
|
1060
|
+
spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
|
|
1061
|
+
|
|
1062
|
+
with spinner_ctx as spinner:
|
|
1063
|
+
to_fingerprint_files = file_filters.get_filtered_files_from_folder(scan_dir)
|
|
1064
|
+
for file in to_fingerprint_files:
|
|
1065
|
+
if spinner:
|
|
1066
|
+
spinner.next()
|
|
1067
|
+
abs_path = Path(scan_dir, file).resolve()
|
|
1068
|
+
self.print_debug(f'Fingerprinting {file}...')
|
|
1069
|
+
wfps += self.winnowing.wfp_for_file(str(abs_path), file)
|
|
1068
1070
|
if wfps:
|
|
1069
1071
|
if wfp_file:
|
|
1070
1072
|
self.print_stderr(f'Writing fingerprints to {wfp_file}')
|
|
@@ -157,38 +157,38 @@ class FolderHasher:
|
|
|
157
157
|
# Sort the files by name to ensure the hash is the same for the same folder
|
|
158
158
|
filtered_files.sort()
|
|
159
159
|
|
|
160
|
-
|
|
161
|
-
full_file_path = ''
|
|
162
|
-
for file_path in filtered_files:
|
|
163
|
-
try:
|
|
164
|
-
file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
|
|
165
|
-
full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
|
|
160
|
+
bar_ctx = Bar('Hashing files...', max=len(filtered_files))
|
|
166
161
|
|
|
167
|
-
|
|
162
|
+
with bar_ctx as bar:
|
|
163
|
+
full_file_path = ''
|
|
164
|
+
for file_path in filtered_files:
|
|
165
|
+
try:
|
|
166
|
+
file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
|
|
167
|
+
full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
|
|
168
168
|
|
|
169
|
-
|
|
170
|
-
key = CRC64.get_hash_buff(file_bytes)
|
|
171
|
-
key_str = ''.join(f'{b:02x}' for b in key)
|
|
172
|
-
rel_path = str(full_file_path.relative_to(root))
|
|
169
|
+
self.base.print_debug(f'\nHashing file {str(full_file_path)}')
|
|
173
170
|
|
|
174
|
-
|
|
171
|
+
file_bytes = full_file_path.read_bytes()
|
|
172
|
+
key = CRC64.get_hash_buff(file_bytes)
|
|
173
|
+
key_str = ''.join(f'{b:02x}' for b in key)
|
|
174
|
+
rel_path = str(full_file_path.relative_to(root))
|
|
175
175
|
|
|
176
|
-
|
|
177
|
-
for part in Path(rel_path).parent.parts:
|
|
178
|
-
child_path = str(Path(current_node.path) / part)
|
|
179
|
-
if child_path not in current_node.children:
|
|
180
|
-
current_node.children[child_path] = DirectoryNode(child_path)
|
|
181
|
-
current_node = current_node.children[child_path]
|
|
182
|
-
current_node.files.append(file_item)
|
|
176
|
+
file_item = DirectoryFile(rel_path, key, key_str)
|
|
183
177
|
|
|
184
|
-
|
|
178
|
+
current_node = root_node
|
|
179
|
+
for part in Path(rel_path).parent.parts:
|
|
180
|
+
child_path = str(Path(current_node.path) / part)
|
|
181
|
+
if child_path not in current_node.children:
|
|
182
|
+
current_node.children[child_path] = DirectoryNode(child_path)
|
|
183
|
+
current_node = current_node.children[child_path]
|
|
184
|
+
current_node.files.append(file_item)
|
|
185
185
|
|
|
186
|
-
|
|
187
|
-
self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
|
|
186
|
+
root_node.files.append(file_item)
|
|
188
187
|
|
|
189
|
-
|
|
188
|
+
except Exception as e:
|
|
189
|
+
self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
|
|
190
190
|
|
|
191
|
-
|
|
191
|
+
bar.next()
|
|
192
192
|
return root_node
|
|
193
193
|
|
|
194
194
|
def _hash_calc_from_node(self, node: DirectoryNode, current_depth: int = 1) -> dict:
|
scanoss/scanners/scanner_hfh.py
CHANGED
|
@@ -110,6 +110,19 @@ class ScannerHFH:
|
|
|
110
110
|
self.min_accepted_score = min_accepted_score
|
|
111
111
|
self.use_grpc = use_grpc
|
|
112
112
|
|
|
113
|
+
def _execute_grpc_scan(self, hfh_request: Dict) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Execute folder hash scan.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
hfh_request: Request dictionary for the gRPC call
|
|
119
|
+
"""
|
|
120
|
+
try:
|
|
121
|
+
self.scan_results = self.client.folder_hash_scan(hfh_request, self.use_grpc)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
self.base.print_stderr(f'Error during folder hash scan: {e}')
|
|
124
|
+
self.scan_results = None
|
|
125
|
+
|
|
113
126
|
def scan(self) -> Optional[Dict]:
|
|
114
127
|
"""
|
|
115
128
|
Scan the provided directory using the folder hashing algorithm.
|
|
@@ -124,25 +137,17 @@ class ScannerHFH:
|
|
|
124
137
|
'min_accepted_score': self.min_accepted_score,
|
|
125
138
|
}
|
|
126
139
|
|
|
127
|
-
|
|
128
|
-
|
|
140
|
+
spinner_ctx = Spinner('Scanning folder...')
|
|
141
|
+
|
|
142
|
+
with spinner_ctx as spinner:
|
|
143
|
+
grpc_thread = threading.Thread(target=self._execute_grpc_scan, args=(hfh_request,))
|
|
144
|
+
grpc_thread.start()
|
|
129
145
|
|
|
130
|
-
|
|
131
|
-
while not stop_spinner:
|
|
146
|
+
while grpc_thread.is_alive():
|
|
132
147
|
spinner.next()
|
|
133
148
|
time.sleep(0.1)
|
|
134
149
|
|
|
135
|
-
|
|
136
|
-
spinner_thread.start()
|
|
137
|
-
|
|
138
|
-
try:
|
|
139
|
-
response = self.client.folder_hash_scan(hfh_request, self.use_grpc)
|
|
140
|
-
if response:
|
|
141
|
-
self.scan_results = response
|
|
142
|
-
finally:
|
|
143
|
-
stop_spinner = True
|
|
144
|
-
spinner_thread.join()
|
|
145
|
-
spinner.finish()
|
|
150
|
+
grpc_thread.join()
|
|
146
151
|
|
|
147
152
|
return self.scan_results
|
|
148
153
|
|
scanoss/threadedscanning.py
CHANGED
|
@@ -22,6 +22,7 @@ SPDX-License-Identifier: MIT
|
|
|
22
22
|
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
+
import atexit
|
|
25
26
|
import os
|
|
26
27
|
import queue
|
|
27
28
|
import sys
|
|
@@ -77,6 +78,8 @@ class ThreadedScanning(ScanossBase):
|
|
|
77
78
|
if nb_threads > MAX_ALLOWED_THREADS:
|
|
78
79
|
self.print_msg(f'Warning: Requested threads too large: {nb_threads}. Reducing to {MAX_ALLOWED_THREADS}')
|
|
79
80
|
self.nb_threads = MAX_ALLOWED_THREADS
|
|
81
|
+
# Register cleanup to ensure progress bar is finished on exit
|
|
82
|
+
atexit.register(self.complete_bar)
|
|
80
83
|
|
|
81
84
|
@staticmethod
|
|
82
85
|
def __count_files_in_wfp(wfp: str):
|
|
@@ -101,6 +104,13 @@ class ThreadedScanning(ScanossBase):
|
|
|
101
104
|
if self.bar:
|
|
102
105
|
self.bar.finish()
|
|
103
106
|
|
|
107
|
+
def __del__(self):
|
|
108
|
+
"""Ensure progress bar is cleaned up when object is destroyed"""
|
|
109
|
+
try:
|
|
110
|
+
self.complete_bar()
|
|
111
|
+
except Exception:
|
|
112
|
+
pass # Ignore errors during cleanup
|
|
113
|
+
|
|
104
114
|
def set_bar(self, bar: Bar) -> None:
|
|
105
115
|
"""
|
|
106
116
|
Set the Progress Bar to display progress while scanning
|