scanoss 1.40.1__py3-none-any.whl → 1.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scanoss/__init__.py +1 -1
- scanoss/cli.py +14 -0
- scanoss/constants.py +3 -0
- scanoss/data/build_date.txt +1 -1
- scanoss/data/osadl-copyleft.json +133 -0
- scanoss/filecount.py +37 -38
- scanoss/inspection/policy_check/scanoss/copyleft.py +7 -1
- scanoss/inspection/utils/license_utils.py +57 -71
- scanoss/inspection/utils/scan_result_processor.py +22 -11
- scanoss/osadl.py +125 -0
- scanoss/scanner.py +191 -189
- scanoss/scanners/folder_hasher.py +24 -24
- scanoss/scanners/scanner_hfh.py +20 -15
- scanoss/threadedscanning.py +10 -0
- {scanoss-1.40.1.dist-info → scanoss-1.41.0.dist-info}/METADATA +1 -1
- {scanoss-1.40.1.dist-info → scanoss-1.41.0.dist-info}/RECORD +20 -18
- {scanoss-1.40.1.dist-info → scanoss-1.41.0.dist-info}/WHEEL +0 -0
- {scanoss-1.40.1.dist-info → scanoss-1.41.0.dist-info}/entry_points.txt +0 -0
- {scanoss-1.40.1.dist-info → scanoss-1.41.0.dist-info}/licenses/LICENSE +0 -0
- {scanoss-1.40.1.dist-info → scanoss-1.41.0.dist-info}/top_level.txt +0 -0
scanoss/osadl.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import sys
|
|
27
|
+
|
|
28
|
+
import importlib_resources
|
|
29
|
+
|
|
30
|
+
from scanoss.scanossbase import ScanossBase
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Osadl(ScanossBase):
|
|
34
|
+
"""
|
|
35
|
+
OSADL data accessor class.
|
|
36
|
+
|
|
37
|
+
Provides access to OSADL (Open Source Automation Development Lab) authoritative
|
|
38
|
+
checklist data for license analysis.
|
|
39
|
+
|
|
40
|
+
Data is loaded once at class level and shared across all instances for efficiency.
|
|
41
|
+
|
|
42
|
+
Data source: https://www.osadl.org/fileadmin/checklists/copyleft.json
|
|
43
|
+
License: CC-BY-4.0
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
_shared_copyleft_data = {}
|
|
47
|
+
_data_loaded = False
|
|
48
|
+
|
|
49
|
+
def __init__(self, debug: bool = False, trace: bool = True, quiet: bool = False):
|
|
50
|
+
"""
|
|
51
|
+
Initialize the Osadl class.
|
|
52
|
+
Data is loaded once at class level and shared across all instances.
|
|
53
|
+
"""
|
|
54
|
+
super().__init__(debug, trace, quiet)
|
|
55
|
+
self._load_copyleft_data()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _load_copyleft_data(self) -> bool:
|
|
59
|
+
"""
|
|
60
|
+
Load the embedded OSADL copyleft JSON file into class-level shared data.
|
|
61
|
+
Data is loaded only once and shared across all instances.
|
|
62
|
+
|
|
63
|
+
:return: True if successful, False otherwise
|
|
64
|
+
"""
|
|
65
|
+
if Osadl._data_loaded:
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
# OSADL copyleft license checklist from: https://www.osadl.org/Checklists
|
|
69
|
+
# Data source: https://www.osadl.org/fileadmin/checklists/copyleft.json
|
|
70
|
+
# License: CC-BY-4.0 (Creative Commons Attribution 4.0 International)
|
|
71
|
+
# Copyright: (C) 2017 - 2024 Open Source Automation Development Lab (OSADL) eG
|
|
72
|
+
try:
|
|
73
|
+
f_name = importlib_resources.files(__name__) / 'data/osadl-copyleft.json'
|
|
74
|
+
with importlib_resources.as_file(f_name) as f:
|
|
75
|
+
with open(f, 'r', encoding='utf-8') as file:
|
|
76
|
+
data = json.load(file)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
self.print_stderr(f'ERROR: Problem loading OSADL copyleft data: {e}')
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
# Process copyleft data
|
|
82
|
+
copyleft = data.get('copyleft', {})
|
|
83
|
+
if not copyleft:
|
|
84
|
+
self.print_stderr('ERROR: No copyleft data found in OSADL JSON')
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
# Store in class-level shared dictionary
|
|
88
|
+
for lic_id, status in copyleft.items():
|
|
89
|
+
# Normalize license ID (lowercase) for consistent lookup
|
|
90
|
+
lic_id_lc = lic_id.lower()
|
|
91
|
+
Osadl._shared_copyleft_data[lic_id_lc] = status
|
|
92
|
+
|
|
93
|
+
Osadl._data_loaded = True
|
|
94
|
+
self.print_debug(f'Loaded {len(Osadl._shared_copyleft_data)} OSADL copyleft entries')
|
|
95
|
+
return True
|
|
96
|
+
|
|
97
|
+
def is_copyleft(self, spdx_id: str) -> bool:
|
|
98
|
+
"""
|
|
99
|
+
Check if a license is copyleft according to OSADL data.
|
|
100
|
+
|
|
101
|
+
Returns True for both strong copyleft ("Yes") and weak/restricted copyleft ("Yes (restricted)").
|
|
102
|
+
|
|
103
|
+
:param spdx_id: SPDX license identifier
|
|
104
|
+
:return: True if copyleft, False otherwise
|
|
105
|
+
"""
|
|
106
|
+
if not spdx_id:
|
|
107
|
+
self.print_debug('No license ID provided for copyleft check')
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
# Normalize lookup
|
|
111
|
+
spdx_id_lc = spdx_id.lower()
|
|
112
|
+
# Use class-level shared data
|
|
113
|
+
status = Osadl._shared_copyleft_data.get(spdx_id_lc)
|
|
114
|
+
|
|
115
|
+
if not status:
|
|
116
|
+
self.print_debug(f'No OSADL copyleft data for license: {spdx_id}')
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
# Consider both "Yes" and "Yes (restricted)" as copyleft (case-insensitive)
|
|
120
|
+
return status.lower().startswith('yes')
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
#
|
|
124
|
+
# End of Osadl Class
|
|
125
|
+
#
|
scanoss/scanner.py
CHANGED
|
@@ -26,6 +26,7 @@ import datetime
|
|
|
26
26
|
import json
|
|
27
27
|
import os
|
|
28
28
|
import sys
|
|
29
|
+
from contextlib import nullcontext
|
|
29
30
|
from pathlib import Path
|
|
30
31
|
from typing import Any, Dict, List, Optional
|
|
31
32
|
|
|
@@ -363,62 +364,64 @@ class Scanner(ScanossBase):
|
|
|
363
364
|
operation_type='scanning',
|
|
364
365
|
)
|
|
365
366
|
self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
self.
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
scan_started
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
367
|
+
spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
|
|
368
|
+
|
|
369
|
+
with spinner_ctx as spinner:
|
|
370
|
+
save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
|
|
371
|
+
wfp_list = []
|
|
372
|
+
scan_block = ''
|
|
373
|
+
scan_size = 0
|
|
374
|
+
queue_size = 0
|
|
375
|
+
file_count = 0 # count all files fingerprinted
|
|
376
|
+
wfp_file_count = 0 # count number of files in each queue post
|
|
377
|
+
scan_started = False
|
|
378
|
+
|
|
379
|
+
to_scan_files = file_filters.get_filtered_files_from_folder(scan_dir)
|
|
380
|
+
for to_scan_file in to_scan_files:
|
|
381
|
+
if self.threaded_scan and self.threaded_scan.stop_scanning():
|
|
382
|
+
self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
|
|
383
|
+
break
|
|
384
|
+
self.print_debug(f'Fingerprinting {to_scan_file}...')
|
|
385
|
+
if spinner:
|
|
386
|
+
spinner.next()
|
|
387
|
+
abs_path = Path(scan_dir, to_scan_file).resolve()
|
|
388
|
+
wfp = self.winnowing.wfp_for_file(str(abs_path), to_scan_file)
|
|
389
|
+
if wfp is None or wfp == '':
|
|
390
|
+
self.print_debug(f'No WFP returned for {to_scan_file}. Skipping.')
|
|
391
|
+
continue
|
|
392
|
+
if save_wfps_for_print:
|
|
393
|
+
wfp_list.append(wfp)
|
|
394
|
+
file_count += 1
|
|
395
|
+
if self.threaded_scan:
|
|
396
|
+
wfp_size = len(wfp.encode('utf-8'))
|
|
397
|
+
# If the WFP is bigger than the max post size and we already have something
|
|
398
|
+
# stored in the scan block, add it to the queue
|
|
399
|
+
if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
|
|
400
|
+
self.threaded_scan.queue_add(scan_block)
|
|
401
|
+
queue_size += 1
|
|
402
|
+
scan_block = ''
|
|
403
|
+
wfp_file_count = 0
|
|
404
|
+
scan_block += wfp
|
|
405
|
+
scan_size = len(scan_block.encode('utf-8'))
|
|
406
|
+
wfp_file_count += 1
|
|
407
|
+
# If the scan request block (group of WFPs) is larger than the POST size
|
|
408
|
+
# or we have reached the file limit, add it to the queue
|
|
409
|
+
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
410
|
+
self.threaded_scan.queue_add(scan_block)
|
|
411
|
+
queue_size += 1
|
|
412
|
+
scan_block = ''
|
|
413
|
+
wfp_file_count = 0
|
|
414
|
+
if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
|
|
415
|
+
scan_started = True
|
|
416
|
+
if not self.threaded_scan.run(wait=False):
|
|
417
|
+
self.print_stderr(
|
|
418
|
+
'Warning: Some errors encountered while scanning. '
|
|
419
|
+
'Results might be incomplete.'
|
|
420
|
+
)
|
|
421
|
+
success = False
|
|
422
|
+
# End for loop
|
|
423
|
+
if self.threaded_scan and scan_block != '':
|
|
424
|
+
self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
|
|
422
425
|
|
|
423
426
|
if file_count > 0:
|
|
424
427
|
if save_wfps_for_print: # Write a WFP file if no threading is requested
|
|
@@ -631,63 +634,64 @@ class Scanner(ScanossBase):
|
|
|
631
634
|
skip_extensions=self.skip_extensions,
|
|
632
635
|
operation_type='scanning',
|
|
633
636
|
)
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
self.
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
if not self.
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
637
|
+
spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
|
|
638
|
+
|
|
639
|
+
with spinner_ctx as spinner:
|
|
640
|
+
save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
|
|
641
|
+
wfp_list = []
|
|
642
|
+
scan_block = ''
|
|
643
|
+
scan_size = 0
|
|
644
|
+
queue_size = 0
|
|
645
|
+
file_count = 0 # count all files fingerprinted
|
|
646
|
+
wfp_file_count = 0 # count number of files in each queue post
|
|
647
|
+
scan_started = False
|
|
648
|
+
|
|
649
|
+
to_scan_files = file_filters.get_filtered_files_from_files(files)
|
|
650
|
+
for file in to_scan_files:
|
|
651
|
+
if self.threaded_scan and self.threaded_scan.stop_scanning():
|
|
652
|
+
self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
|
|
653
|
+
break
|
|
654
|
+
self.print_debug(f'Fingerprinting {file}...')
|
|
655
|
+
if spinner:
|
|
656
|
+
spinner.next()
|
|
657
|
+
wfp = self.winnowing.wfp_for_file(file, file)
|
|
658
|
+
if wfp is None or wfp == '':
|
|
659
|
+
self.print_debug(f'No WFP returned for {file}. Skipping.')
|
|
660
|
+
continue
|
|
661
|
+
if save_wfps_for_print:
|
|
662
|
+
wfp_list.append(wfp)
|
|
663
|
+
file_count += 1
|
|
664
|
+
if self.threaded_scan:
|
|
665
|
+
wfp_size = len(wfp.encode('utf-8'))
|
|
666
|
+
# If the WFP is bigger than the max post size and we already have something
|
|
667
|
+
# stored in the scan block, add it to the queue
|
|
668
|
+
if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
|
|
669
|
+
self.threaded_scan.queue_add(scan_block)
|
|
670
|
+
queue_size += 1
|
|
671
|
+
scan_block = ''
|
|
672
|
+
wfp_file_count = 0
|
|
673
|
+
scan_block += wfp
|
|
674
|
+
scan_size = len(scan_block.encode('utf-8'))
|
|
675
|
+
wfp_file_count += 1
|
|
676
|
+
# If the scan request block (group of WFPs) is larger than the POST size
|
|
677
|
+
# or we have reached the file limit, add it to the queue
|
|
678
|
+
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
679
|
+
self.threaded_scan.queue_add(scan_block)
|
|
680
|
+
queue_size += 1
|
|
681
|
+
scan_block = ''
|
|
682
|
+
wfp_file_count = 0
|
|
683
|
+
if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
|
|
684
|
+
scan_started = True
|
|
685
|
+
if not self.threaded_scan.run(wait=False):
|
|
686
|
+
self.print_stderr(
|
|
687
|
+
'Warning: Some errors encountered while scanning. '
|
|
688
|
+
'Results might be incomplete.'
|
|
689
|
+
)
|
|
690
|
+
success = False
|
|
685
691
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
if spinner:
|
|
690
|
-
spinner.finish()
|
|
692
|
+
# End for loop
|
|
693
|
+
if self.threaded_scan and scan_block != '':
|
|
694
|
+
self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
|
|
691
695
|
|
|
692
696
|
if file_count > 0:
|
|
693
697
|
if save_wfps_for_print: # Write a WFP file if no threading is requested
|
|
@@ -778,73 +782,74 @@ class Scanner(ScanossBase):
|
|
|
778
782
|
self.print_debug(f'Found {file_count} files to process.')
|
|
779
783
|
raw_output = '{\n'
|
|
780
784
|
file_print = ''
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
bar.next(0)
|
|
785
|
-
with open(wfp_file) as f:
|
|
786
|
-
for line in f:
|
|
787
|
-
if line.startswith(WFP_FILE_START):
|
|
788
|
-
if file_print:
|
|
789
|
-
wfp += file_print # Store the WFP for the current file
|
|
790
|
-
cur_size = len(wfp.encode('utf-8'))
|
|
791
|
-
file_print = line # Start storing the next file
|
|
792
|
-
cur_files += 1
|
|
793
|
-
batch_files += 1
|
|
794
|
-
else:
|
|
795
|
-
file_print += line # Store the rest of the WFP for this file
|
|
796
|
-
l_size = cur_size + len(file_print.encode('utf-8'))
|
|
797
|
-
# Hit the max post size, so sending the current batch and continue processing
|
|
798
|
-
if l_size >= self.max_post_size and wfp:
|
|
799
|
-
self.print_debug(
|
|
800
|
-
f'Sending {batch_files} ({cur_files}) of'
|
|
801
|
-
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
802
|
-
)
|
|
803
|
-
if self.debug and cur_size > self.max_post_size:
|
|
804
|
-
Scanner.print_stderr(f'Warning: Post size {cur_size} greater than limit {self.max_post_size}')
|
|
805
|
-
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
806
|
-
if bar:
|
|
807
|
-
bar.next(batch_files)
|
|
808
|
-
if scan_resp is not None:
|
|
809
|
-
for key, value in scan_resp.items():
|
|
810
|
-
raw_output += ' "%s":%s,' % (key, json.dumps(value, indent=2))
|
|
811
|
-
for v in value:
|
|
812
|
-
if hasattr(v, 'get'):
|
|
813
|
-
if v.get('id') != 'none':
|
|
814
|
-
vcv = '%s:%s:%s' % (v.get('vendor'), v.get('component'), v.get('version'))
|
|
815
|
-
components[vcv] = components[vcv] + 1 if vcv in components else 1
|
|
816
|
-
if max_component['hits'] < components[vcv]:
|
|
817
|
-
max_component['name'] = v.get('component')
|
|
818
|
-
max_component['hits'] = components[vcv]
|
|
819
|
-
else:
|
|
820
|
-
Scanner.print_stderr(f'Warning: Unknown value: {v}')
|
|
821
|
-
else:
|
|
822
|
-
success = False
|
|
823
|
-
batch_files = 0
|
|
824
|
-
wfp = ''
|
|
825
|
-
if file_print:
|
|
826
|
-
wfp += file_print # Store the WFP for the current file
|
|
827
|
-
if wfp:
|
|
828
|
-
self.print_debug(
|
|
829
|
-
f'Sending {batch_files} ({cur_files}) of'
|
|
830
|
-
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
831
|
-
)
|
|
832
|
-
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
785
|
+
bar_ctx = Bar('Scanning', max=file_count) if (not self.quiet and self.isatty) else nullcontext()
|
|
786
|
+
|
|
787
|
+
with bar_ctx as bar:
|
|
833
788
|
if bar:
|
|
834
|
-
bar.next(
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
789
|
+
bar.next(0)
|
|
790
|
+
with open(wfp_file) as f:
|
|
791
|
+
for line in f:
|
|
792
|
+
if line.startswith(WFP_FILE_START):
|
|
793
|
+
if file_print:
|
|
794
|
+
wfp += file_print # Store the WFP for the current file
|
|
795
|
+
cur_size = len(wfp.encode('utf-8'))
|
|
796
|
+
file_print = line # Start storing the next file
|
|
797
|
+
cur_files += 1
|
|
798
|
+
batch_files += 1
|
|
841
799
|
else:
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
800
|
+
file_print += line # Store the rest of the WFP for this file
|
|
801
|
+
l_size = cur_size + len(file_print.encode('utf-8'))
|
|
802
|
+
# Hit the max post size, so sending the current batch and continue processing
|
|
803
|
+
if l_size >= self.max_post_size and wfp:
|
|
804
|
+
self.print_debug(
|
|
805
|
+
f'Sending {batch_files} ({cur_files}) of'
|
|
806
|
+
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
807
|
+
)
|
|
808
|
+
if self.debug and cur_size > self.max_post_size:
|
|
809
|
+
Scanner.print_stderr(
|
|
810
|
+
f'Warning: Post size {cur_size} greater than limit {self.max_post_size}'
|
|
811
|
+
)
|
|
812
|
+
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
813
|
+
if bar:
|
|
814
|
+
bar.next(batch_files)
|
|
815
|
+
if scan_resp is not None:
|
|
816
|
+
for key, value in scan_resp.items():
|
|
817
|
+
raw_output += ' "%s":%s,' % (key, json.dumps(value, indent=2))
|
|
818
|
+
for v in value:
|
|
819
|
+
if hasattr(v, 'get'):
|
|
820
|
+
if v.get('id') != 'none':
|
|
821
|
+
vcv = '%s:%s:%s' % (v.get('vendor'), v.get('component'), v.get('version'))
|
|
822
|
+
components[vcv] = components[vcv] + 1 if vcv in components else 1
|
|
823
|
+
if max_component['hits'] < components[vcv]:
|
|
824
|
+
max_component['name'] = v.get('component')
|
|
825
|
+
max_component['hits'] = components[vcv]
|
|
826
|
+
else:
|
|
827
|
+
Scanner.print_stderr(f'Warning: Unknown value: {v}')
|
|
828
|
+
else:
|
|
829
|
+
success = False
|
|
830
|
+
batch_files = 0
|
|
831
|
+
wfp = ''
|
|
832
|
+
if file_print:
|
|
833
|
+
wfp += file_print # Store the WFP for the current file
|
|
834
|
+
if wfp:
|
|
835
|
+
self.print_debug(
|
|
836
|
+
f'Sending {batch_files} ({cur_files}) of'
|
|
837
|
+
f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
|
|
838
|
+
)
|
|
839
|
+
scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
|
|
840
|
+
if bar:
|
|
841
|
+
bar.next(batch_files)
|
|
842
|
+
first = True
|
|
843
|
+
if scan_resp is not None:
|
|
844
|
+
for key, value in scan_resp.items():
|
|
845
|
+
if first:
|
|
846
|
+
raw_output += ' "%s":%s' % (key, json.dumps(value, indent=2))
|
|
847
|
+
first = False
|
|
848
|
+
else:
|
|
849
|
+
raw_output += ',\n "%s":%s' % (key, json.dumps(value, indent=2))
|
|
850
|
+
else:
|
|
851
|
+
success = False
|
|
845
852
|
raw_output += '\n}'
|
|
846
|
-
if bar:
|
|
847
|
-
bar.finish()
|
|
848
853
|
if self.output_format == 'plain':
|
|
849
854
|
self.__log_result(raw_output)
|
|
850
855
|
elif self.output_format == 'cyclonedx':
|
|
@@ -1052,19 +1057,16 @@ class Scanner(ScanossBase):
|
|
|
1052
1057
|
)
|
|
1053
1058
|
wfps = ''
|
|
1054
1059
|
self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
wfps += self.winnowing.wfp_for_file(str(abs_path), file)
|
|
1066
|
-
if spinner:
|
|
1067
|
-
spinner.finish()
|
|
1060
|
+
spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
|
|
1061
|
+
|
|
1062
|
+
with spinner_ctx as spinner:
|
|
1063
|
+
to_fingerprint_files = file_filters.get_filtered_files_from_folder(scan_dir)
|
|
1064
|
+
for file in to_fingerprint_files:
|
|
1065
|
+
if spinner:
|
|
1066
|
+
spinner.next()
|
|
1067
|
+
abs_path = Path(scan_dir, file).resolve()
|
|
1068
|
+
self.print_debug(f'Fingerprinting {file}...')
|
|
1069
|
+
wfps += self.winnowing.wfp_for_file(str(abs_path), file)
|
|
1068
1070
|
if wfps:
|
|
1069
1071
|
if wfp_file:
|
|
1070
1072
|
self.print_stderr(f'Writing fingerprints to {wfp_file}')
|
|
@@ -157,38 +157,38 @@ class FolderHasher:
|
|
|
157
157
|
# Sort the files by name to ensure the hash is the same for the same folder
|
|
158
158
|
filtered_files.sort()
|
|
159
159
|
|
|
160
|
-
|
|
161
|
-
full_file_path = ''
|
|
162
|
-
for file_path in filtered_files:
|
|
163
|
-
try:
|
|
164
|
-
file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
|
|
165
|
-
full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
|
|
160
|
+
bar_ctx = Bar('Hashing files...', max=len(filtered_files))
|
|
166
161
|
|
|
167
|
-
|
|
162
|
+
with bar_ctx as bar:
|
|
163
|
+
full_file_path = ''
|
|
164
|
+
for file_path in filtered_files:
|
|
165
|
+
try:
|
|
166
|
+
file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
|
|
167
|
+
full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
|
|
168
168
|
|
|
169
|
-
|
|
170
|
-
key = CRC64.get_hash_buff(file_bytes)
|
|
171
|
-
key_str = ''.join(f'{b:02x}' for b in key)
|
|
172
|
-
rel_path = str(full_file_path.relative_to(root))
|
|
169
|
+
self.base.print_debug(f'\nHashing file {str(full_file_path)}')
|
|
173
170
|
|
|
174
|
-
|
|
171
|
+
file_bytes = full_file_path.read_bytes()
|
|
172
|
+
key = CRC64.get_hash_buff(file_bytes)
|
|
173
|
+
key_str = ''.join(f'{b:02x}' for b in key)
|
|
174
|
+
rel_path = str(full_file_path.relative_to(root))
|
|
175
175
|
|
|
176
|
-
|
|
177
|
-
for part in Path(rel_path).parent.parts:
|
|
178
|
-
child_path = str(Path(current_node.path) / part)
|
|
179
|
-
if child_path not in current_node.children:
|
|
180
|
-
current_node.children[child_path] = DirectoryNode(child_path)
|
|
181
|
-
current_node = current_node.children[child_path]
|
|
182
|
-
current_node.files.append(file_item)
|
|
176
|
+
file_item = DirectoryFile(rel_path, key, key_str)
|
|
183
177
|
|
|
184
|
-
|
|
178
|
+
current_node = root_node
|
|
179
|
+
for part in Path(rel_path).parent.parts:
|
|
180
|
+
child_path = str(Path(current_node.path) / part)
|
|
181
|
+
if child_path not in current_node.children:
|
|
182
|
+
current_node.children[child_path] = DirectoryNode(child_path)
|
|
183
|
+
current_node = current_node.children[child_path]
|
|
184
|
+
current_node.files.append(file_item)
|
|
185
185
|
|
|
186
|
-
|
|
187
|
-
self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
|
|
186
|
+
root_node.files.append(file_item)
|
|
188
187
|
|
|
189
|
-
|
|
188
|
+
except Exception as e:
|
|
189
|
+
self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
|
|
190
190
|
|
|
191
|
-
|
|
191
|
+
bar.next()
|
|
192
192
|
return root_node
|
|
193
193
|
|
|
194
194
|
def _hash_calc_from_node(self, node: DirectoryNode, current_depth: int = 1) -> dict:
|