scanoss 1.26.2__py3-none-any.whl → 1.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scanoss/__init__.py +1 -1
- scanoss/api/scanning/v2/scanoss_scanning_pb2.py +29 -23
- scanoss/cli.py +16 -25
- scanoss/constants.py +2 -0
- scanoss/data/build_date.txt +1 -1
- scanoss/file_filters.py +7 -2
- scanoss/inspection/copyleft.py +1 -1
- scanoss/inspection/inspect_base.py +6 -4
- scanoss/inspection/license_summary.py +1 -1
- scanoss/inspection/policy_check.py +1 -1
- scanoss/inspection/undeclared_component.py +1 -1
- scanoss/scanners/folder_hasher.py +61 -20
- scanoss/scanners/scanner_hfh.py +50 -5
- {scanoss-1.26.2.dist-info → scanoss-1.27.0.dist-info}/METADATA +1 -1
- {scanoss-1.26.2.dist-info → scanoss-1.27.0.dist-info}/RECORD +19 -19
- {scanoss-1.26.2.dist-info → scanoss-1.27.0.dist-info}/WHEEL +0 -0
- {scanoss-1.26.2.dist-info → scanoss-1.27.0.dist-info}/entry_points.txt +0 -0
- {scanoss-1.26.2.dist-info → scanoss-1.27.0.dist-info}/licenses/LICENSE +0 -0
- {scanoss-1.26.2.dist-info → scanoss-1.27.0.dist-info}/top_level.txt +0 -0
scanoss/__init__.py
CHANGED
|
@@ -16,28 +16,34 @@ from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
|
|
|
16
16
|
from protoc_gen_swagger.options import annotations_pb2 as protoc__gen__swagger_dot_options_dot_annotations__pb2
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n.scanoss/api/scanning/v2/scanoss-scanning.proto\x12\x17scanoss.api.scanning.v2\x1a*scanoss/api/common/v2/scanoss-common.proto\x1a\x1cgoogle/api/annotations.proto\x1a,protoc-gen-swagger/options/annotations.proto\"\
|
|
19
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n.scanoss/api/scanning/v2/scanoss-scanning.proto\x12\x17scanoss.api.scanning.v2\x1a*scanoss/api/common/v2/scanoss-common.proto\x1a\x1cgoogle/api/annotations.proto\x1a,protoc-gen-swagger/options/annotations.proto\"\xc5\x03\n\nHFHRequest\x12:\n\x04root\x18\x01 \x01(\x0b\x32,.scanoss.api.scanning.v2.HFHRequest.Children\x12\x16\n\x0erank_threshold\x18\x02 \x01(\x05\x12\x10\n\x08\x63\x61tegory\x18\x03 \x01(\t\x12\x13\n\x0bquery_limit\x18\x04 \x01(\x05\x1a\xbb\x02\n\x08\x43hildren\x12\x0f\n\x07path_id\x18\x01 \x01(\t\x12\x16\n\x0esim_hash_names\x18\x02 \x01(\t\x12\x18\n\x10sim_hash_content\x18\x03 \x01(\t\x12>\n\x08\x63hildren\x18\x04 \x03(\x0b\x32,.scanoss.api.scanning.v2.HFHRequest.Children\x12\x1a\n\x12sim_hash_dir_names\x18\x05 \x01(\t\x12Y\n\x0flang_extensions\x18\x06 \x03(\x0b\x32@.scanoss.api.scanning.v2.HFHRequest.Children.LangExtensionsEntry\x1a\x35\n\x13LangExtensionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05:\x02\x38\x01\"\xa3\x03\n\x0bHFHResponse\x12<\n\x07results\x18\x01 \x03(\x0b\x32+.scanoss.api.scanning.v2.HFHResponse.Result\x12\x35\n\x06status\x18\x02 \x01(\x0b\x32%.scanoss.api.common.v2.StatusResponse\x1a)\n\x07Version\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x1a\x94\x01\n\tComponent\x12\x0c\n\x04purl\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06vendor\x18\x03 \x01(\t\x12>\n\x08versions\x18\x04 \x03(\x0b\x32,.scanoss.api.scanning.v2.HFHResponse.Version\x12\x0c\n\x04rank\x18\x05 \x01(\x05\x12\r\n\x05order\x18\x06 \x01(\x05\x1a]\n\x06Result\x12\x0f\n\x07path_id\x18\x01 \x01(\t\x12\x42\n\ncomponents\x18\x02 \x03(\x0b\x32..scanoss.api.scanning.v2.HFHResponse.Component2\x81\x02\n\x08Scanning\x12q\n\x04\x45\x63ho\x12\".scanoss.api.common.v2.EchoRequest\x1a#.scanoss.api.common.v2.EchoResponse\" \x82\xd3\xe4\x93\x02\x1a\"\x15/api/v2/scanning/echo:\x01*\x12\x81\x01\n\x0e\x46olderHashScan\x12#.scanoss.api.scanning.v2.HFHRequest\x1a$.scanoss.api.scanning.v2.HFHResponse\"$\x82\xd3\xe4\x93\x02\x1e\"\x19/api/v2/scanning/hfh/scan:\x01*B\x8a\x02Z1github.com/scanoss/papi/api/scanningv2;scanningv2\x92\x41\xd3\x01\x12m\n\x18SCANOSS Scanning Service\"L\n\x10scanoss-scanning\x12#https://github.com/scanoss/scanning\x1a\x13support@scanoss.com2\x03\x32.0*\x01\x01\x32\x10\x61pplication/json:\x10\x61pplication/jsonR;\n\x03\x34\x30\x34\x12\x34\n*Returned when the resource does not exist.\x12\x06\n\x04\x9a\x02\x01\x07\x62\x06proto3')
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
_builder.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
DESCRIPTOR.
|
|
26
|
-
DESCRIPTOR._serialized_options = b'Z1github.com/scanoss/papi/api/scanningv2;scanningv2\222A\323\001\022m\n\030SCANOSS Scanning Service\"L\n\020scanoss-scanning\022#https://github.com/scanoss/scanning\032\023support@scanoss.com2\0032.0*\001\0012\020application/json:\020application/jsonR;\n\003404\0224\n*Returned when the resource does not exist.\022\006\n\004\232\002\001\007'
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
_SCANNING.methods_by_name['
|
|
30
|
-
_SCANNING.methods_by_name['
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
21
|
+
_globals = globals()
|
|
22
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
23
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'scanoss.api.scanning.v2.scanoss_scanning_pb2', _globals)
|
|
24
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
25
|
+
_globals['DESCRIPTOR']._loaded_options = None
|
|
26
|
+
_globals['DESCRIPTOR']._serialized_options = b'Z1github.com/scanoss/papi/api/scanningv2;scanningv2\222A\323\001\022m\n\030SCANOSS Scanning Service\"L\n\020scanoss-scanning\022#https://github.com/scanoss/scanning\032\023support@scanoss.com2\0032.0*\001\0012\020application/json:\020application/jsonR;\n\003404\0224\n*Returned when the resource does not exist.\022\006\n\004\232\002\001\007'
|
|
27
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._loaded_options = None
|
|
28
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._serialized_options = b'8\001'
|
|
29
|
+
_globals['_SCANNING'].methods_by_name['Echo']._loaded_options = None
|
|
30
|
+
_globals['_SCANNING'].methods_by_name['Echo']._serialized_options = b'\202\323\344\223\002\032\"\025/api/v2/scanning/echo:\001*'
|
|
31
|
+
_globals['_SCANNING'].methods_by_name['FolderHashScan']._loaded_options = None
|
|
32
|
+
_globals['_SCANNING'].methods_by_name['FolderHashScan']._serialized_options = b'\202\323\344\223\002\036\"\031/api/v2/scanning/hfh/scan:\001*'
|
|
33
|
+
_globals['_HFHREQUEST']._serialized_start=196
|
|
34
|
+
_globals['_HFHREQUEST']._serialized_end=649
|
|
35
|
+
_globals['_HFHREQUEST_CHILDREN']._serialized_start=334
|
|
36
|
+
_globals['_HFHREQUEST_CHILDREN']._serialized_end=649
|
|
37
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._serialized_start=596
|
|
38
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._serialized_end=649
|
|
39
|
+
_globals['_HFHRESPONSE']._serialized_start=652
|
|
40
|
+
_globals['_HFHRESPONSE']._serialized_end=1071
|
|
41
|
+
_globals['_HFHRESPONSE_VERSION']._serialized_start=784
|
|
42
|
+
_globals['_HFHRESPONSE_VERSION']._serialized_end=825
|
|
43
|
+
_globals['_HFHRESPONSE_COMPONENT']._serialized_start=828
|
|
44
|
+
_globals['_HFHRESPONSE_COMPONENT']._serialized_end=976
|
|
45
|
+
_globals['_HFHRESPONSE_RESULT']._serialized_start=978
|
|
46
|
+
_globals['_HFHRESPONSE_RESULT']._serialized_end=1071
|
|
47
|
+
_globals['_SCANNING']._serialized_start=1074
|
|
48
|
+
_globals['_SCANNING']._serialized_end=1331
|
|
43
49
|
# @@protoc_insertion_point(module_scope)
|
scanoss/cli.py
CHANGED
|
@@ -54,6 +54,7 @@ from . import __version__
|
|
|
54
54
|
from .components import Components
|
|
55
55
|
from .constants import (
|
|
56
56
|
DEFAULT_API_TIMEOUT,
|
|
57
|
+
DEFAULT_HFH_RANK_THRESHOLD,
|
|
57
58
|
DEFAULT_POST_SIZE,
|
|
58
59
|
DEFAULT_RETRY,
|
|
59
60
|
DEFAULT_TIMEOUT,
|
|
@@ -623,24 +624,16 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
623
624
|
'--format',
|
|
624
625
|
'-f',
|
|
625
626
|
type=str,
|
|
626
|
-
choices=['json'],
|
|
627
|
+
choices=['json', 'cyclonedx'],
|
|
627
628
|
default='json',
|
|
628
629
|
help='Result output format (optional - default: json)',
|
|
629
630
|
)
|
|
630
631
|
p_folder_scan.add_argument(
|
|
631
|
-
'--
|
|
632
|
-
'-bm',
|
|
633
|
-
action='store_true',
|
|
634
|
-
default=False,
|
|
635
|
-
help='Enable best match mode (optional - default: False)',
|
|
636
|
-
)
|
|
637
|
-
p_folder_scan.add_argument(
|
|
638
|
-
'--threshold',
|
|
632
|
+
'--rank-threshold',
|
|
639
633
|
type=int,
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
help='Threshold for result matching (optional - default: 100)',
|
|
634
|
+
default=DEFAULT_HFH_RANK_THRESHOLD,
|
|
635
|
+
help='Filter results to only show those with rank value at or below this threshold (e.g., --rank-threshold 3 '
|
|
636
|
+
'returns results with rank 1, 2, or 3). Lower rank values indicate higher quality matches.',
|
|
644
637
|
)
|
|
645
638
|
p_folder_scan.set_defaults(func=folder_hashing_scan)
|
|
646
639
|
|
|
@@ -1455,7 +1448,7 @@ def utils_certloc(*_):
|
|
|
1455
1448
|
Run the "utils certloc" sub-command
|
|
1456
1449
|
:param _: ignored/unused
|
|
1457
1450
|
"""
|
|
1458
|
-
import certifi
|
|
1451
|
+
import certifi # noqa: PLC0415,I001
|
|
1459
1452
|
|
|
1460
1453
|
print(f'CA Cert File: {certifi.where()}')
|
|
1461
1454
|
|
|
@@ -1466,11 +1459,11 @@ def utils_cert_download(_, args): # pylint: disable=PLR0912 # noqa: PLR0912
|
|
|
1466
1459
|
:param _: ignore/unused
|
|
1467
1460
|
:param args: Parsed arguments
|
|
1468
1461
|
"""
|
|
1469
|
-
import socket
|
|
1470
|
-
import traceback
|
|
1471
|
-
from urllib.parse import urlparse
|
|
1462
|
+
import socket # noqa: PLC0415,I001
|
|
1463
|
+
import traceback # noqa: PLC0415,I001
|
|
1464
|
+
from urllib.parse import urlparse # noqa: PLC0415,I001
|
|
1472
1465
|
|
|
1473
|
-
from OpenSSL import SSL, crypto
|
|
1466
|
+
from OpenSSL import SSL, crypto # noqa: PLC0415,I001
|
|
1474
1467
|
|
|
1475
1468
|
file = sys.stdout
|
|
1476
1469
|
if args.output:
|
|
@@ -1518,7 +1511,7 @@ def utils_pac_proxy(_, args):
|
|
|
1518
1511
|
:param _: ignore/unused
|
|
1519
1512
|
:param args: Parsed arguments
|
|
1520
1513
|
"""
|
|
1521
|
-
from pypac.resolver import ProxyResolver
|
|
1514
|
+
from pypac.resolver import ProxyResolver # noqa: PLC0415,I001
|
|
1522
1515
|
|
|
1523
1516
|
if not args.pac:
|
|
1524
1517
|
print_stderr('Error: No pac file option specified.')
|
|
@@ -1592,7 +1585,7 @@ def crypto_algorithms(parser, args):
|
|
|
1592
1585
|
sys.exit(1)
|
|
1593
1586
|
except Exception as e:
|
|
1594
1587
|
if args.debug:
|
|
1595
|
-
import traceback
|
|
1588
|
+
import traceback # noqa: PLC0415,I001
|
|
1596
1589
|
|
|
1597
1590
|
traceback.print_exc()
|
|
1598
1591
|
print_stderr(f'ERROR: {e}')
|
|
@@ -1634,7 +1627,7 @@ def crypto_hints(parser, args):
|
|
|
1634
1627
|
sys.exit(1)
|
|
1635
1628
|
except Exception as e:
|
|
1636
1629
|
if args.debug:
|
|
1637
|
-
import traceback
|
|
1630
|
+
import traceback # noqa: PLC0415,I001
|
|
1638
1631
|
|
|
1639
1632
|
traceback.print_exc()
|
|
1640
1633
|
print_stderr(f'ERROR: {e}')
|
|
@@ -1676,7 +1669,7 @@ def crypto_versions_in_range(parser, args):
|
|
|
1676
1669
|
sys.exit(1)
|
|
1677
1670
|
except Exception as e:
|
|
1678
1671
|
if args.debug:
|
|
1679
|
-
import traceback
|
|
1672
|
+
import traceback # noqa: PLC0415,I001
|
|
1680
1673
|
|
|
1681
1674
|
traceback.print_exc()
|
|
1682
1675
|
print_stderr(f'ERROR: {e}')
|
|
@@ -1965,11 +1958,9 @@ def folder_hashing_scan(parser, args):
|
|
|
1965
1958
|
config=scanner_config,
|
|
1966
1959
|
client=client,
|
|
1967
1960
|
scanoss_settings=scanoss_settings,
|
|
1961
|
+
rank_threshold=args.rank_threshold,
|
|
1968
1962
|
)
|
|
1969
1963
|
|
|
1970
|
-
scanner.best_match = args.best_match
|
|
1971
|
-
scanner.threshold = args.threshold
|
|
1972
|
-
|
|
1973
1964
|
if scanner.scan():
|
|
1974
1965
|
scanner.present(output_file=args.output, output_format=args.format)
|
|
1975
1966
|
except ScanossGrpcError as e:
|
scanoss/constants.py
CHANGED
scanoss/data/build_date.txt
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
date:
|
|
1
|
+
date: 20250708100043, utime: 1751968843
|
scanoss/file_filters.py
CHANGED
|
@@ -25,7 +25,7 @@ SPDX-License-Identifier: MIT
|
|
|
25
25
|
import os
|
|
26
26
|
import sys
|
|
27
27
|
from pathlib import Path
|
|
28
|
-
from typing import List
|
|
28
|
+
from typing import List, Optional
|
|
29
29
|
|
|
30
30
|
from pathspec import GitIgnoreSpec
|
|
31
31
|
|
|
@@ -511,7 +511,7 @@ class FileFilters(ScanossBase):
|
|
|
511
511
|
# Now filter the files and return the reduced list
|
|
512
512
|
return self.get_filtered_files_from_files(all_files, str(root_path))
|
|
513
513
|
|
|
514
|
-
def get_filtered_files_from_files(self, files: List[str], scan_root: str = None) -> List[str]:
|
|
514
|
+
def get_filtered_files_from_files(self, files: List[str], scan_root: Optional[str] = None) -> List[str]:
|
|
515
515
|
"""
|
|
516
516
|
Retrieve a list of files to scan or fingerprint from a given list of files based on filter settings.
|
|
517
517
|
|
|
@@ -615,8 +615,13 @@ class FileFilters(ScanossBase):
|
|
|
615
615
|
# Default patterns for skipping directories
|
|
616
616
|
if not self.all_folders:
|
|
617
617
|
DEFAULT_SKIPPED_DIR_LIST = DEFAULT_SKIPPED_DIRS_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_DIRS
|
|
618
|
+
DEFAULT_SKIPPED_DIR_EXT_LIST = (
|
|
619
|
+
DEFAULT_SKIPPED_DIR_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_DIR_EXT
|
|
620
|
+
)
|
|
618
621
|
for dir_name in DEFAULT_SKIPPED_DIR_LIST:
|
|
619
622
|
patterns.append(f'{dir_name}/')
|
|
623
|
+
for dir_extension in DEFAULT_SKIPPED_DIR_EXT_LIST:
|
|
624
|
+
patterns.append(f'*{dir_extension}/')
|
|
620
625
|
|
|
621
626
|
# Custom patterns added in SCANOSS settings file
|
|
622
627
|
if self.scanoss_settings:
|
scanoss/inspection/copyleft.py
CHANGED
|
@@ -66,7 +66,7 @@ class InspectBase(ScanossBase):
|
|
|
66
66
|
def __init__( # noqa: PLR0913
|
|
67
67
|
self,
|
|
68
68
|
debug: bool = False,
|
|
69
|
-
trace: bool =
|
|
69
|
+
trace: bool = False,
|
|
70
70
|
quiet: bool = False,
|
|
71
71
|
filepath: str = None,
|
|
72
72
|
output: str = None,
|
|
@@ -152,9 +152,6 @@ class InspectBase(ScanossBase):
|
|
|
152
152
|
'declared': 1 if status == 'identified' else 0,
|
|
153
153
|
'undeclared': 1 if status == 'pending' else 0
|
|
154
154
|
}
|
|
155
|
-
if not new_component.get('licenses'):
|
|
156
|
-
self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
|
|
157
|
-
return components
|
|
158
155
|
|
|
159
156
|
## Append license to component
|
|
160
157
|
self._append_license_to_component(components, new_component, component_key)
|
|
@@ -179,6 +176,11 @@ class InspectBase(ScanossBase):
|
|
|
179
176
|
new_component: Component whose licenses need to be processed
|
|
180
177
|
component_key: purl + version of the component to be updated
|
|
181
178
|
"""
|
|
179
|
+
# If not licenses are present
|
|
180
|
+
if not new_component.get('licenses'):
|
|
181
|
+
self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
|
|
182
|
+
return
|
|
183
|
+
|
|
182
184
|
licenses_order_by_source_priority = self._get_licenses_order_by_source_priority(new_component['licenses'])
|
|
183
185
|
# Process licenses for this component
|
|
184
186
|
for license_item in licenses_order_by_source_priority:
|
|
@@ -15,7 +15,7 @@ from scanoss.utils.simhash import WordFeatureSet, fingerprint, simhash, vectoriz
|
|
|
15
15
|
|
|
16
16
|
MINIMUM_FILE_COUNT = 8
|
|
17
17
|
MINIMUM_CONCATENATED_NAME_LENGTH = 32
|
|
18
|
-
|
|
18
|
+
MAXIMUM_FILE_NAME_LENGTH = 32
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class DirectoryNode:
|
|
@@ -35,7 +35,7 @@ class DirectoryFile:
|
|
|
35
35
|
Represents a file in the directory tree for folder hashing.
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
-
def __init__(self, path: str, key: bytes, key_str: str):
|
|
38
|
+
def __init__(self, path: str, key: List[bytes], key_str: str):
|
|
39
39
|
self.path = path
|
|
40
40
|
self.key = key
|
|
41
41
|
self.key_str = key_str
|
|
@@ -77,7 +77,7 @@ class FolderHasher:
|
|
|
77
77
|
def __init__(
|
|
78
78
|
self,
|
|
79
79
|
scan_dir: str,
|
|
80
|
-
config:
|
|
80
|
+
config: FolderHasherConfig,
|
|
81
81
|
scanoss_settings: Optional[ScanossSettings] = None,
|
|
82
82
|
):
|
|
83
83
|
self.base = ScanossBase(
|
|
@@ -140,7 +140,7 @@ class FolderHasher:
|
|
|
140
140
|
root_node = DirectoryNode(str(root))
|
|
141
141
|
|
|
142
142
|
all_files = [
|
|
143
|
-
f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <=
|
|
143
|
+
f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <= MAXIMUM_FILE_NAME_LENGTH
|
|
144
144
|
]
|
|
145
145
|
filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
|
|
146
146
|
|
|
@@ -185,7 +185,7 @@ class FolderHasher:
|
|
|
185
185
|
Recursively compute folder hash data for a directory node.
|
|
186
186
|
|
|
187
187
|
The hash data includes the path identifier, simhash for file names,
|
|
188
|
-
simhash for file content, and children node hash information.
|
|
188
|
+
simhash for file content, directory hash, language extensions, and children node hash information.
|
|
189
189
|
|
|
190
190
|
Args:
|
|
191
191
|
node (DirectoryNode): The directory node to compute the hash for.
|
|
@@ -194,11 +194,22 @@ class FolderHasher:
|
|
|
194
194
|
dict: The computed hash data for the node.
|
|
195
195
|
"""
|
|
196
196
|
hash_data = self._hash_calc(node)
|
|
197
|
+
|
|
198
|
+
# Safely calculate relative path
|
|
199
|
+
try:
|
|
200
|
+
node_path = Path(node.path).resolve()
|
|
201
|
+
scan_dir_path = Path(self.scan_dir).resolve()
|
|
202
|
+
rel_path = node_path.relative_to(scan_dir_path)
|
|
203
|
+
except ValueError:
|
|
204
|
+
# If relative_to fails, use the node path as is or a fallback
|
|
205
|
+
rel_path = Path(node.path).name if node.path else Path('.')
|
|
197
206
|
|
|
198
207
|
return {
|
|
199
|
-
'path_id':
|
|
208
|
+
'path_id': str(rel_path),
|
|
200
209
|
'sim_hash_names': f'{hash_data["name_hash"]:02x}' if hash_data['name_hash'] is not None else None,
|
|
201
210
|
'sim_hash_content': f'{hash_data["content_hash"]:02x}' if hash_data['content_hash'] is not None else None,
|
|
211
|
+
'sim_hash_dir_names': f'{hash_data["dir_hash"]:02x}' if hash_data['dir_hash'] is not None else None,
|
|
212
|
+
'lang_extensions': hash_data['lang_extensions'],
|
|
202
213
|
'children': [self._hash_calc_from_node(child) for child in node.children.values()],
|
|
203
214
|
}
|
|
204
215
|
|
|
@@ -215,9 +226,12 @@ class FolderHasher:
|
|
|
215
226
|
node (DirectoryNode): The directory node containing file items.
|
|
216
227
|
|
|
217
228
|
Returns:
|
|
218
|
-
dict: A dictionary with 'name_hash' and '
|
|
229
|
+
dict: A dictionary with 'name_hash', 'content_hash', 'dir_hash', and 'lang_extensions' keys.
|
|
219
230
|
"""
|
|
220
231
|
processed_hashes = set()
|
|
232
|
+
unique_file_names = set()
|
|
233
|
+
unique_directories = set()
|
|
234
|
+
extension_map = {}
|
|
221
235
|
file_hashes = []
|
|
222
236
|
selected_names = []
|
|
223
237
|
|
|
@@ -225,37 +239,64 @@ class FolderHasher:
|
|
|
225
239
|
key_str = file.key_str
|
|
226
240
|
if key_str in processed_hashes:
|
|
227
241
|
continue
|
|
228
|
-
processed_hashes.add(key_str)
|
|
229
242
|
|
|
230
|
-
|
|
243
|
+
file_name = os.path.basename(file.path)
|
|
244
|
+
|
|
245
|
+
file_name_without_extension, extension = os.path.splitext(file_name)
|
|
246
|
+
current_directory = os.path.dirname(file.path)
|
|
247
|
+
|
|
248
|
+
if extension and len(extension) > 1:
|
|
249
|
+
ext_without_dot = extension[1:]
|
|
250
|
+
extension_map[ext_without_dot] = extension_map.get(ext_without_dot, 0) + 1
|
|
251
|
+
|
|
252
|
+
current_directory.replace(self.scan_dir, '', 1).lstrip(os.path.sep)
|
|
253
|
+
parts = current_directory.split(os.path.sep)
|
|
254
|
+
for d in parts:
|
|
255
|
+
if d in {'', '.', '..'}:
|
|
256
|
+
continue
|
|
257
|
+
unique_directories.add(d)
|
|
231
258
|
|
|
232
|
-
|
|
233
|
-
|
|
259
|
+
processed_hashes.add(key_str)
|
|
260
|
+
unique_file_names.add(file_name_without_extension)
|
|
261
|
+
selected_names.append(file_name)
|
|
262
|
+
file_hashes.append(file.key)
|
|
234
263
|
|
|
235
264
|
if len(selected_names) < MINIMUM_FILE_COUNT:
|
|
236
|
-
return {
|
|
237
|
-
'name_hash': None,
|
|
238
|
-
'content_hash': None,
|
|
239
|
-
}
|
|
265
|
+
return {'name_hash': None, 'content_hash': None, 'dir_hash': None, 'lang_extensions': None}
|
|
240
266
|
|
|
241
267
|
selected_names.sort()
|
|
242
268
|
concatenated_names = ''.join(selected_names)
|
|
243
269
|
|
|
244
270
|
if len(concatenated_names.encode('utf-8')) < MINIMUM_CONCATENATED_NAME_LENGTH:
|
|
245
|
-
return {
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
271
|
+
return {'name_hash': None, 'content_hash': None, 'dir_hash': None, 'lang_extensions': None}
|
|
272
|
+
|
|
273
|
+
# Concatenate the unique file names without the extensions, adding a space and sorting them alphabetically
|
|
274
|
+
unique_file_names_list = list(unique_file_names)
|
|
275
|
+
unique_file_names_list.sort()
|
|
276
|
+
concatenated_names = ' '.join(unique_file_names_list)
|
|
277
|
+
|
|
278
|
+
# We do the same for the directory names, adding a space and sorting them alphabetically
|
|
279
|
+
unique_directories_list = list(unique_directories)
|
|
280
|
+
unique_directories_list.sort()
|
|
281
|
+
concatenated_directories = ' '.join(unique_directories_list)
|
|
249
282
|
|
|
250
283
|
names_simhash = simhash(WordFeatureSet(concatenated_names.encode('utf-8')))
|
|
284
|
+
dir_simhash = simhash(WordFeatureSet(concatenated_directories.encode('utf-8')))
|
|
251
285
|
content_simhash = fingerprint(vectorize_bytes(file_hashes))
|
|
252
286
|
|
|
287
|
+
# Debug logging similar to Go implementation
|
|
288
|
+
self.base.print_debug(f'Unique file names: {unique_file_names_list}')
|
|
289
|
+
self.base.print_debug(f'Unique directories: {unique_directories_list}')
|
|
290
|
+
self.base.print_debug(f'{dir_simhash:x}/{names_simhash:x} - {content_simhash:x} - {extension_map}')
|
|
291
|
+
|
|
253
292
|
return {
|
|
254
293
|
'name_hash': names_simhash,
|
|
255
294
|
'content_hash': content_simhash,
|
|
295
|
+
'dir_hash': dir_simhash,
|
|
296
|
+
'lang_extensions': extension_map,
|
|
256
297
|
}
|
|
257
298
|
|
|
258
|
-
def present(self, output_format: str = None, output_file: str = None):
|
|
299
|
+
def present(self, output_format: Optional[str] = None, output_file: Optional[str] = None):
|
|
259
300
|
"""Present the hashed tree in the selected format"""
|
|
260
301
|
self.presenter.present(output_format=output_format, output_file=output_file)
|
|
261
302
|
|
scanoss/scanners/scanner_hfh.py
CHANGED
|
@@ -29,6 +29,8 @@ from typing import Dict, Optional
|
|
|
29
29
|
|
|
30
30
|
from progress.spinner import Spinner
|
|
31
31
|
|
|
32
|
+
from scanoss.constants import DEFAULT_HFH_RANK_THRESHOLD
|
|
33
|
+
from scanoss.cyclonedx import CycloneDx
|
|
32
34
|
from scanoss.file_filters import FileFilters
|
|
33
35
|
from scanoss.scanners.folder_hasher import FolderHasher
|
|
34
36
|
from scanoss.scanners.scanner_config import ScannerConfig
|
|
@@ -52,6 +54,7 @@ class ScannerHFH:
|
|
|
52
54
|
config: ScannerConfig,
|
|
53
55
|
client: Optional[ScanossGrpc] = None,
|
|
54
56
|
scanoss_settings: Optional[ScanossSettings] = None,
|
|
57
|
+
rank_threshold: int = DEFAULT_HFH_RANK_THRESHOLD,
|
|
55
58
|
):
|
|
56
59
|
"""
|
|
57
60
|
Initialize the ScannerHFH.
|
|
@@ -61,6 +64,7 @@ class ScannerHFH:
|
|
|
61
64
|
config (ScannerConfig): Configuration parameters for the scanner.
|
|
62
65
|
client (ScanossGrpc): gRPC client for communicating with the scanning service.
|
|
63
66
|
scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
|
|
67
|
+
rank_threshold (int): Get results with rank below this threshold (default: 5).
|
|
64
68
|
"""
|
|
65
69
|
self.base = ScanossBase(
|
|
66
70
|
debug=config.debug,
|
|
@@ -88,8 +92,7 @@ class ScannerHFH:
|
|
|
88
92
|
self.scan_dir = scan_dir
|
|
89
93
|
self.client = client
|
|
90
94
|
self.scan_results = None
|
|
91
|
-
self.
|
|
92
|
-
self.threshold = 100
|
|
95
|
+
self.rank_threshold = rank_threshold
|
|
93
96
|
|
|
94
97
|
def scan(self) -> Optional[Dict]:
|
|
95
98
|
"""
|
|
@@ -100,8 +103,7 @@ class ScannerHFH:
|
|
|
100
103
|
"""
|
|
101
104
|
hfh_request = {
|
|
102
105
|
'root': self.folder_hasher.hash_directory(self.scan_dir),
|
|
103
|
-
'
|
|
104
|
-
'best_match': self.best_match,
|
|
106
|
+
'rank_threshold': self.rank_threshold,
|
|
105
107
|
}
|
|
106
108
|
|
|
107
109
|
spinner = Spinner('Scanning folder...')
|
|
@@ -161,7 +163,50 @@ class ScannerHFHPresenter(AbstractPresenter):
|
|
|
161
163
|
)
|
|
162
164
|
|
|
163
165
|
def _format_cyclonedx_output(self) -> str:
|
|
164
|
-
|
|
166
|
+
if not self.scanner.scan_results:
|
|
167
|
+
return ''
|
|
168
|
+
try:
|
|
169
|
+
if 'results' not in self.scanner.scan_results or not self.scanner.scan_results['results']:
|
|
170
|
+
self.base.print_stderr('ERROR: No scan results found')
|
|
171
|
+
return ''
|
|
172
|
+
|
|
173
|
+
first_result = self.scanner.scan_results['results'][0]
|
|
174
|
+
|
|
175
|
+
best_match_components = [c for c in first_result.get('components', []) if c.get('order') == 1]
|
|
176
|
+
if not best_match_components:
|
|
177
|
+
self.base.print_stderr('ERROR: No best match component found')
|
|
178
|
+
return ''
|
|
179
|
+
|
|
180
|
+
best_match_component = best_match_components[0]
|
|
181
|
+
if not best_match_component.get('versions'):
|
|
182
|
+
self.base.print_stderr('ERROR: No versions found for best match component')
|
|
183
|
+
return ''
|
|
184
|
+
|
|
185
|
+
best_match_version = best_match_component['versions'][0]
|
|
186
|
+
purl = best_match_component['purl']
|
|
187
|
+
|
|
188
|
+
get_dependencies_json_request = {
|
|
189
|
+
'files': [
|
|
190
|
+
{
|
|
191
|
+
'file': f'{best_match_component["name"]}:{best_match_version["version"]}',
|
|
192
|
+
'purls': [{'purl': purl, 'requirement': best_match_version['version']}],
|
|
193
|
+
}
|
|
194
|
+
]
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
decorated_scan_results = self.scanner.client.get_dependencies(get_dependencies_json_request)
|
|
198
|
+
|
|
199
|
+
cdx = CycloneDx(self.base.debug, self.output_file)
|
|
200
|
+
scan_results = {}
|
|
201
|
+
for f in decorated_scan_results['files']:
|
|
202
|
+
scan_results[f['file']] = [f]
|
|
203
|
+
if not cdx.produce_from_json(scan_results, self.output_file):
|
|
204
|
+
error_msg = 'ERROR: Failed to produce CycloneDX output'
|
|
205
|
+
self.base.print_stderr(error_msg)
|
|
206
|
+
raise ValueError(error_msg)
|
|
207
|
+
except Exception as e:
|
|
208
|
+
self.base.print_stderr(f'ERROR: Failed to get license information: {e}')
|
|
209
|
+
return None
|
|
165
210
|
|
|
166
211
|
def _format_spdxlite_output(self) -> str:
|
|
167
212
|
raise NotImplementedError('SPDXlite output is not implemented')
|
|
@@ -4,14 +4,14 @@ protoc_gen_swagger/options/annotations_pb2.py,sha256=b25EDD6gssUWnFby9gxgcpLIROT
|
|
|
4
4
|
protoc_gen_swagger/options/annotations_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
|
5
5
|
protoc_gen_swagger/options/openapiv2_pb2.py,sha256=vYElGp8E1vGHszvWqX97zNG9GFJ7u2QcdK9ouq0XdyI,14939
|
|
6
6
|
protoc_gen_swagger/options/openapiv2_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
|
7
|
-
scanoss/__init__.py,sha256=
|
|
8
|
-
scanoss/cli.py,sha256=
|
|
7
|
+
scanoss/__init__.py,sha256=YH4I-lAz5Zn3nEU1mwGqNZPPhcS1o4Lu6itgmXKlV0c,1146
|
|
8
|
+
scanoss/cli.py,sha256=9ELIAJy06g4KyvnALzPSQ_Rh1ypALbyQGGKrjb4sCOk,72615
|
|
9
9
|
scanoss/components.py,sha256=b0R9DdKuXqyQiw5nZZwjQ6NJXBr1U9gyx1RI2FP9ozA,14511
|
|
10
|
-
scanoss/constants.py,sha256=
|
|
10
|
+
scanoss/constants.py,sha256=On8mQ-8ardVMHSJ7WOJqeTvGXIOWPLCgUanjE7Wk-wE,351
|
|
11
11
|
scanoss/cryptography.py,sha256=Q39MOCscP-OFvrnPXaPOMFFkc8OKnf3mC3SgZYEtCog,9407
|
|
12
12
|
scanoss/csvoutput.py,sha256=qNKRwcChSkgIwLm00kZiVX6iHVQUF4Apl-sMbzJ5Taw,10192
|
|
13
13
|
scanoss/cyclonedx.py,sha256=UktDuqZUbXSggdt864Pg8ziTD7sdEQtLxfYL7vd_ZCE,12756
|
|
14
|
-
scanoss/file_filters.py,sha256=
|
|
14
|
+
scanoss/file_filters.py,sha256=2DzyvSVR7We7U36UurtJj3cdQturUjDl8j3OIqmv4Pg,20638
|
|
15
15
|
scanoss/filecount.py,sha256=RZjKQ6M5P_RQg0_PMD2tsRe5Z8f98ke0sxYVjPDN8iQ,6538
|
|
16
16
|
scanoss/results.py,sha256=47ZXXuU2sDjYa5vhtbWTmikit9jHhA0rsYKwkvZFI5w,9252
|
|
17
17
|
scanoss/scancodedeps.py,sha256=JbpoGW1POtPMmowzfwa4oh8sSBeeQCqaW9onvc4UFYM,11517
|
|
@@ -47,7 +47,7 @@ scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py,sha256=Z4k9qvU2klesnPR
|
|
|
47
47
|
scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py,sha256=B6sv2Taagt05CMWaw7T6silg7PW8E6xQVbqn_4-v14U,6854
|
|
48
48
|
scanoss/api/scanning/__init__.py,sha256=hx-P78xbDsh6WQIigewkJ7Y7y1fqc_eYnyHC5IZTKmo,1122
|
|
49
49
|
scanoss/api/scanning/v2/__init__.py,sha256=hx-P78xbDsh6WQIigewkJ7Y7y1fqc_eYnyHC5IZTKmo,1122
|
|
50
|
-
scanoss/api/scanning/v2/scanoss_scanning_pb2.py,sha256
|
|
50
|
+
scanoss/api/scanning/v2/scanoss_scanning_pb2.py,sha256=-TLfUUVCytOSc1-PtGl2g2-IlOrYuktQ2lRxFq5vM6A,5493
|
|
51
51
|
scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py,sha256=kyP1JRjyHlUR9vc0MXSJDvEGBiROEu5WvHvt737g27Q,4670
|
|
52
52
|
scanoss/api/semgrep/__init__.py,sha256=UAhvL2dFNZsG4g3I8HCauwQK6e0QoEFhMGqZ_9GgGhI,1122
|
|
53
53
|
scanoss/api/semgrep/v2/__init__.py,sha256=UAhvL2dFNZsG4g3I8HCauwQK6e0QoEFhMGqZ_9GgGhI,1122
|
|
@@ -57,31 +57,31 @@ scanoss/api/vulnerabilities/__init__.py,sha256=IFrDk_DTJgKSZmmU-nuLXuq_s8sQZlrSC
|
|
|
57
57
|
scanoss/api/vulnerabilities/v2/__init__.py,sha256=IFrDk_DTJgKSZmmU-nuLXuq_s8sQZlrSCHhIDMJT4r0,1122
|
|
58
58
|
scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py,sha256=CFhF80av8tenGvn9AIsGEtRJPuV2dC_syA5JLZb2lDw,5464
|
|
59
59
|
scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py,sha256=HlS4k4Zmx6RIAqaO9I96jD-eyF5yU6Xx04pVm7pdqOg,6864
|
|
60
|
-
scanoss/data/build_date.txt,sha256=
|
|
60
|
+
scanoss/data/build_date.txt,sha256=MYvdFBxu-jVdZOEyHhyOpGsXpUjQK19aUZUWJivaIgU,40
|
|
61
61
|
scanoss/data/scanoss-settings-schema.json,sha256=ClkRYAkjAN0Sk704G8BE_Ok006oQ6YnIGmX84CF8h9w,8798
|
|
62
62
|
scanoss/data/spdx-exceptions.json,sha256=s7UTYxC7jqQXr11YBlIWYCNwN6lRDFTR33Y8rpN_dA4,17953
|
|
63
63
|
scanoss/data/spdx-licenses.json,sha256=A6Z0q82gaTLtnopBfzeIVZjJFxkdRW1g2TuumQc-lII,228794
|
|
64
64
|
scanoss/inspection/__init__.py,sha256=D4C0lWLuNp8k_BjQZEc07WZcUgAvriVwQWOk063b0ZU,1122
|
|
65
65
|
scanoss/inspection/component_summary.py,sha256=h1l3rF6NnoK0wMkS4ib6rDfcza2aqunyoMDbN2lw2G4,4049
|
|
66
|
-
scanoss/inspection/copyleft.py,sha256=
|
|
67
|
-
scanoss/inspection/inspect_base.py,sha256=
|
|
68
|
-
scanoss/inspection/license_summary.py,sha256=
|
|
69
|
-
scanoss/inspection/policy_check.py,sha256=
|
|
70
|
-
scanoss/inspection/undeclared_component.py,sha256=
|
|
66
|
+
scanoss/inspection/copyleft.py,sha256=ZSA97Vc3o06e66r4SCxwLKjGZOjv6lo92sWbvpzKHvo,9237
|
|
67
|
+
scanoss/inspection/inspect_base.py,sha256=buvJ9l3VJatzX5tNj7GOC5kSFQiFMpcYm8e1Iqolho0,18199
|
|
68
|
+
scanoss/inspection/license_summary.py,sha256=1iSVkjNa3oj-XEA-tNNqpwBOLb-i_jkXRTuu9Fcr0q4,5764
|
|
69
|
+
scanoss/inspection/policy_check.py,sha256=R9-7PxDHGzXCDVF8sWE3KcORgICDuZbx1-xvSot_C-g,8330
|
|
70
|
+
scanoss/inspection/undeclared_component.py,sha256=HGto8-ZBccrtczIARughG298Cwqb4k1BLCihkbmiFnk,11496
|
|
71
71
|
scanoss/inspection/utils/license_utils.py,sha256=Zb6QLmVJb86lKCwZyBsmwakyAtY1SXa54kUyyKmWMqA,5093
|
|
72
72
|
scanoss/scanners/__init__.py,sha256=D4C0lWLuNp8k_BjQZEc07WZcUgAvriVwQWOk063b0ZU,1122
|
|
73
73
|
scanoss/scanners/container_scanner.py,sha256=leP4roes6B9B95F49mJ0P_F8WcKCQkvJgk9azWyJrjg,16294
|
|
74
|
-
scanoss/scanners/folder_hasher.py,sha256
|
|
74
|
+
scanoss/scanners/folder_hasher.py,sha256=-qvTtMC0iPj7zS8nMSZZJyt9d62MeQIK0LcrNDkt7yc,12267
|
|
75
75
|
scanoss/scanners/scanner_config.py,sha256=egG7cw3S2akU-D9M1aLE5jLrfz_c8e7_DIotMnnpM84,2601
|
|
76
|
-
scanoss/scanners/scanner_hfh.py,sha256=
|
|
76
|
+
scanoss/scanners/scanner_hfh.py,sha256=CGTRzg9Epyyi7DCvQXVY91A8P0GGl8bzfr0zRCaM3XA,7906
|
|
77
77
|
scanoss/utils/__init__.py,sha256=0hjb5ktavp7utJzFhGMPImPaZiHWgilM2HwvTp5lXJE,1122
|
|
78
78
|
scanoss/utils/abstract_presenter.py,sha256=teiDTxBj5jBMCk2T8i4l1BJPf_u4zBLWrtCTFHSSECM,3148
|
|
79
79
|
scanoss/utils/crc64.py,sha256=TMrwQimSdE6imhFOUL7oAG6Kxu-8qMpGWMuMg8QpSVs,3169
|
|
80
80
|
scanoss/utils/file.py,sha256=62cA9a17TU9ZvfA3FY5HY4-QOajJeSrc8S6xLA_f-3M,2980
|
|
81
81
|
scanoss/utils/simhash.py,sha256=6iu8DOcecPAY36SZjCOzrrLMT9oIE7-gI6QuYwUQ7B0,5793
|
|
82
|
-
scanoss-1.
|
|
83
|
-
scanoss-1.
|
|
84
|
-
scanoss-1.
|
|
85
|
-
scanoss-1.
|
|
86
|
-
scanoss-1.
|
|
87
|
-
scanoss-1.
|
|
82
|
+
scanoss-1.27.0.dist-info/licenses/LICENSE,sha256=LLUaXoiyOroIbr5ubAyrxBOwSRLTm35ETO2FmLpy8QQ,1074
|
|
83
|
+
scanoss-1.27.0.dist-info/METADATA,sha256=TB02dYgadlHHeQhCDWJiSRDJxQ52lT10TuWFTdE6W1E,6060
|
|
84
|
+
scanoss-1.27.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
85
|
+
scanoss-1.27.0.dist-info/entry_points.txt,sha256=Uy28xnaDL5KQ7V77sZD5VLDXPNxYYzSr5tsqtiXVzAs,48
|
|
86
|
+
scanoss-1.27.0.dist-info/top_level.txt,sha256=V11PrQ6Pnrc-nDF9xnisnJ8e6-i7HqSIKVNqduRWcL8,27
|
|
87
|
+
scanoss-1.27.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|