scanoss 1.26.3__tar.gz → 1.27.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scanoss-1.26.3/src/scanoss.egg-info → scanoss-1.27.1}/PKG-INFO +1 -1
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/__init__.py +1 -1
- scanoss-1.27.1/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +49 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/cli.py +16 -25
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/constants.py +2 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/cyclonedx.py +15 -11
- scanoss-1.27.1/src/scanoss/data/build_date.txt +1 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/file_filters.py +7 -2
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanner.py +23 -23
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanners/container_scanner.py +4 -2
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanners/folder_hasher.py +61 -20
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanners/scanner_hfh.py +53 -6
- {scanoss-1.26.3 → scanoss-1.27.1/src/scanoss.egg-info}/PKG-INFO +1 -1
- scanoss-1.26.3/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +0 -43
- scanoss-1.26.3/src/scanoss/data/build_date.txt +0 -1
- {scanoss-1.26.3 → scanoss-1.27.1}/LICENSE +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/PACKAGE.md +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/README.md +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/pyproject.toml +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/setup.cfg +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/protoc_gen_swagger/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/protoc_gen_swagger/options/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/protoc_gen_swagger/options/annotations_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/protoc_gen_swagger/options/annotations_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/protoc_gen_swagger/options/openapiv2_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/protoc_gen_swagger/options/openapiv2_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/common/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/common/v2/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/common/v2/scanoss_common_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/components/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/components/v2/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/components/v2/scanoss_components_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/dependencies/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/dependencies/v2/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/geoprovenance/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/geoprovenance/v2/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/scanning/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/scanning/v2/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/semgrep/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/semgrep/v2/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/vulnerabilities/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/vulnerabilities/v2/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/components.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/cryptography.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/csvoutput.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/data/scanoss-settings-schema.json +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/data/spdx-exceptions.json +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/data/spdx-licenses.json +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/filecount.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/component_summary.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/copyleft.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/inspect_base.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/license_summary.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/policy_check.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/undeclared_component.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/inspection/utils/license_utils.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/results.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scancodedeps.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanners/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanners/scanner_config.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanoss_settings.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanossapi.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanossbase.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanossgrpc.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scanpostprocessor.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/scantype.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/spdxlite.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/threadeddependencies.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/threadedscanning.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/utils/__init__.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/utils/abstract_presenter.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/utils/crc64.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/utils/file.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/utils/simhash.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/winnowing.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss.egg-info/SOURCES.txt +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss.egg-info/dependency_links.txt +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss.egg-info/entry_points.txt +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss.egg-info/requires.txt +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss.egg-info/top_level.txt +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/tests/test_csv_output.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/tests/test_file_filters.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/tests/test_policy_inspect.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/tests/test_scan_post_processor.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/tests/test_spdxlite.py +0 -0
- {scanoss-1.26.3 → scanoss-1.27.1}/tests/test_winnowing.py +0 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: scanoss/api/scanning/v2/scanoss-scanning.proto
|
|
4
|
+
"""Generated protocol buffer code."""
|
|
5
|
+
from google.protobuf.internal import builder as _builder
|
|
6
|
+
from google.protobuf import descriptor as _descriptor
|
|
7
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
8
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
9
|
+
# @@protoc_insertion_point(imports)
|
|
10
|
+
|
|
11
|
+
_sym_db = _symbol_database.Default()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
from scanoss.api.common.v2 import scanoss_common_pb2 as scanoss_dot_api_dot_common_dot_v2_dot_scanoss__common__pb2
|
|
15
|
+
from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
|
|
16
|
+
from protoc_gen_swagger.options import annotations_pb2 as protoc__gen__swagger_dot_options_dot_annotations__pb2
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n.scanoss/api/scanning/v2/scanoss-scanning.proto\x12\x17scanoss.api.scanning.v2\x1a*scanoss/api/common/v2/scanoss-common.proto\x1a\x1cgoogle/api/annotations.proto\x1a,protoc-gen-swagger/options/annotations.proto\"\xc5\x03\n\nHFHRequest\x12:\n\x04root\x18\x01 \x01(\x0b\x32,.scanoss.api.scanning.v2.HFHRequest.Children\x12\x16\n\x0erank_threshold\x18\x02 \x01(\x05\x12\x10\n\x08\x63\x61tegory\x18\x03 \x01(\t\x12\x13\n\x0bquery_limit\x18\x04 \x01(\x05\x1a\xbb\x02\n\x08\x43hildren\x12\x0f\n\x07path_id\x18\x01 \x01(\t\x12\x16\n\x0esim_hash_names\x18\x02 \x01(\t\x12\x18\n\x10sim_hash_content\x18\x03 \x01(\t\x12>\n\x08\x63hildren\x18\x04 \x03(\x0b\x32,.scanoss.api.scanning.v2.HFHRequest.Children\x12\x1a\n\x12sim_hash_dir_names\x18\x05 \x01(\t\x12Y\n\x0flang_extensions\x18\x06 \x03(\x0b\x32@.scanoss.api.scanning.v2.HFHRequest.Children.LangExtensionsEntry\x1a\x35\n\x13LangExtensionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05:\x02\x38\x01\"\xa3\x03\n\x0bHFHResponse\x12<\n\x07results\x18\x01 \x03(\x0b\x32+.scanoss.api.scanning.v2.HFHResponse.Result\x12\x35\n\x06status\x18\x02 \x01(\x0b\x32%.scanoss.api.common.v2.StatusResponse\x1a)\n\x07Version\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x1a\x94\x01\n\tComponent\x12\x0c\n\x04purl\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06vendor\x18\x03 \x01(\t\x12>\n\x08versions\x18\x04 \x03(\x0b\x32,.scanoss.api.scanning.v2.HFHResponse.Version\x12\x0c\n\x04rank\x18\x05 \x01(\x05\x12\r\n\x05order\x18\x06 \x01(\x05\x1a]\n\x06Result\x12\x0f\n\x07path_id\x18\x01 \x01(\t\x12\x42\n\ncomponents\x18\x02 \x03(\x0b\x32..scanoss.api.scanning.v2.HFHResponse.Component2\x81\x02\n\x08Scanning\x12q\n\x04\x45\x63ho\x12\".scanoss.api.common.v2.EchoRequest\x1a#.scanoss.api.common.v2.EchoResponse\" \x82\xd3\xe4\x93\x02\x1a\"\x15/api/v2/scanning/echo:\x01*\x12\x81\x01\n\x0e\x46olderHashScan\x12#.scanoss.api.scanning.v2.HFHRequest\x1a$.scanoss.api.scanning.v2.HFHResponse\"$\x82\xd3\xe4\x93\x02\x1e\"\x19/api/v2/scanning/hfh/scan:\x01*B\x8a\x02Z1github.com/scanoss/papi/api/scanningv2;scanningv2\x92\x41\xd3\x01\x12m\n\x18SCANOSS Scanning Service\"L\n\x10scanoss-scanning\x12#https://github.com/scanoss/scanning\x1a\x13support@scanoss.com2\x03\x32.0*\x01\x01\x32\x10\x61pplication/json:\x10\x61pplication/jsonR;\n\x03\x34\x30\x34\x12\x34\n*Returned when the resource does not exist.\x12\x06\n\x04\x9a\x02\x01\x07\x62\x06proto3')
|
|
20
|
+
|
|
21
|
+
_globals = globals()
|
|
22
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
23
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'scanoss.api.scanning.v2.scanoss_scanning_pb2', _globals)
|
|
24
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
25
|
+
_globals['DESCRIPTOR']._loaded_options = None
|
|
26
|
+
_globals['DESCRIPTOR']._serialized_options = b'Z1github.com/scanoss/papi/api/scanningv2;scanningv2\222A\323\001\022m\n\030SCANOSS Scanning Service\"L\n\020scanoss-scanning\022#https://github.com/scanoss/scanning\032\023support@scanoss.com2\0032.0*\001\0012\020application/json:\020application/jsonR;\n\003404\0224\n*Returned when the resource does not exist.\022\006\n\004\232\002\001\007'
|
|
27
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._loaded_options = None
|
|
28
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._serialized_options = b'8\001'
|
|
29
|
+
_globals['_SCANNING'].methods_by_name['Echo']._loaded_options = None
|
|
30
|
+
_globals['_SCANNING'].methods_by_name['Echo']._serialized_options = b'\202\323\344\223\002\032\"\025/api/v2/scanning/echo:\001*'
|
|
31
|
+
_globals['_SCANNING'].methods_by_name['FolderHashScan']._loaded_options = None
|
|
32
|
+
_globals['_SCANNING'].methods_by_name['FolderHashScan']._serialized_options = b'\202\323\344\223\002\036\"\031/api/v2/scanning/hfh/scan:\001*'
|
|
33
|
+
_globals['_HFHREQUEST']._serialized_start=196
|
|
34
|
+
_globals['_HFHREQUEST']._serialized_end=649
|
|
35
|
+
_globals['_HFHREQUEST_CHILDREN']._serialized_start=334
|
|
36
|
+
_globals['_HFHREQUEST_CHILDREN']._serialized_end=649
|
|
37
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._serialized_start=596
|
|
38
|
+
_globals['_HFHREQUEST_CHILDREN_LANGEXTENSIONSENTRY']._serialized_end=649
|
|
39
|
+
_globals['_HFHRESPONSE']._serialized_start=652
|
|
40
|
+
_globals['_HFHRESPONSE']._serialized_end=1071
|
|
41
|
+
_globals['_HFHRESPONSE_VERSION']._serialized_start=784
|
|
42
|
+
_globals['_HFHRESPONSE_VERSION']._serialized_end=825
|
|
43
|
+
_globals['_HFHRESPONSE_COMPONENT']._serialized_start=828
|
|
44
|
+
_globals['_HFHRESPONSE_COMPONENT']._serialized_end=976
|
|
45
|
+
_globals['_HFHRESPONSE_RESULT']._serialized_start=978
|
|
46
|
+
_globals['_HFHRESPONSE_RESULT']._serialized_end=1071
|
|
47
|
+
_globals['_SCANNING']._serialized_start=1074
|
|
48
|
+
_globals['_SCANNING']._serialized_end=1331
|
|
49
|
+
# @@protoc_insertion_point(module_scope)
|
|
@@ -54,6 +54,7 @@ from . import __version__
|
|
|
54
54
|
from .components import Components
|
|
55
55
|
from .constants import (
|
|
56
56
|
DEFAULT_API_TIMEOUT,
|
|
57
|
+
DEFAULT_HFH_RANK_THRESHOLD,
|
|
57
58
|
DEFAULT_POST_SIZE,
|
|
58
59
|
DEFAULT_RETRY,
|
|
59
60
|
DEFAULT_TIMEOUT,
|
|
@@ -623,24 +624,16 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
623
624
|
'--format',
|
|
624
625
|
'-f',
|
|
625
626
|
type=str,
|
|
626
|
-
choices=['json'],
|
|
627
|
+
choices=['json', 'cyclonedx'],
|
|
627
628
|
default='json',
|
|
628
629
|
help='Result output format (optional - default: json)',
|
|
629
630
|
)
|
|
630
631
|
p_folder_scan.add_argument(
|
|
631
|
-
'--
|
|
632
|
-
'-bm',
|
|
633
|
-
action='store_true',
|
|
634
|
-
default=False,
|
|
635
|
-
help='Enable best match mode (optional - default: False)',
|
|
636
|
-
)
|
|
637
|
-
p_folder_scan.add_argument(
|
|
638
|
-
'--threshold',
|
|
632
|
+
'--rank-threshold',
|
|
639
633
|
type=int,
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
help='Threshold for result matching (optional - default: 100)',
|
|
634
|
+
default=DEFAULT_HFH_RANK_THRESHOLD,
|
|
635
|
+
help='Filter results to only show those with rank value at or below this threshold (e.g., --rank-threshold 3 '
|
|
636
|
+
'returns results with rank 1, 2, or 3). Lower rank values indicate higher quality matches.',
|
|
644
637
|
)
|
|
645
638
|
p_folder_scan.set_defaults(func=folder_hashing_scan)
|
|
646
639
|
|
|
@@ -1455,7 +1448,7 @@ def utils_certloc(*_):
|
|
|
1455
1448
|
Run the "utils certloc" sub-command
|
|
1456
1449
|
:param _: ignored/unused
|
|
1457
1450
|
"""
|
|
1458
|
-
import certifi
|
|
1451
|
+
import certifi # noqa: PLC0415,I001
|
|
1459
1452
|
|
|
1460
1453
|
print(f'CA Cert File: {certifi.where()}')
|
|
1461
1454
|
|
|
@@ -1466,11 +1459,11 @@ def utils_cert_download(_, args): # pylint: disable=PLR0912 # noqa: PLR0912
|
|
|
1466
1459
|
:param _: ignore/unused
|
|
1467
1460
|
:param args: Parsed arguments
|
|
1468
1461
|
"""
|
|
1469
|
-
import socket
|
|
1470
|
-
import traceback
|
|
1471
|
-
from urllib.parse import urlparse
|
|
1462
|
+
import socket # noqa: PLC0415,I001
|
|
1463
|
+
import traceback # noqa: PLC0415,I001
|
|
1464
|
+
from urllib.parse import urlparse # noqa: PLC0415,I001
|
|
1472
1465
|
|
|
1473
|
-
from OpenSSL import SSL, crypto
|
|
1466
|
+
from OpenSSL import SSL, crypto # noqa: PLC0415,I001
|
|
1474
1467
|
|
|
1475
1468
|
file = sys.stdout
|
|
1476
1469
|
if args.output:
|
|
@@ -1518,7 +1511,7 @@ def utils_pac_proxy(_, args):
|
|
|
1518
1511
|
:param _: ignore/unused
|
|
1519
1512
|
:param args: Parsed arguments
|
|
1520
1513
|
"""
|
|
1521
|
-
from pypac.resolver import ProxyResolver
|
|
1514
|
+
from pypac.resolver import ProxyResolver # noqa: PLC0415,I001
|
|
1522
1515
|
|
|
1523
1516
|
if not args.pac:
|
|
1524
1517
|
print_stderr('Error: No pac file option specified.')
|
|
@@ -1592,7 +1585,7 @@ def crypto_algorithms(parser, args):
|
|
|
1592
1585
|
sys.exit(1)
|
|
1593
1586
|
except Exception as e:
|
|
1594
1587
|
if args.debug:
|
|
1595
|
-
import traceback
|
|
1588
|
+
import traceback # noqa: PLC0415,I001
|
|
1596
1589
|
|
|
1597
1590
|
traceback.print_exc()
|
|
1598
1591
|
print_stderr(f'ERROR: {e}')
|
|
@@ -1634,7 +1627,7 @@ def crypto_hints(parser, args):
|
|
|
1634
1627
|
sys.exit(1)
|
|
1635
1628
|
except Exception as e:
|
|
1636
1629
|
if args.debug:
|
|
1637
|
-
import traceback
|
|
1630
|
+
import traceback # noqa: PLC0415,I001
|
|
1638
1631
|
|
|
1639
1632
|
traceback.print_exc()
|
|
1640
1633
|
print_stderr(f'ERROR: {e}')
|
|
@@ -1676,7 +1669,7 @@ def crypto_versions_in_range(parser, args):
|
|
|
1676
1669
|
sys.exit(1)
|
|
1677
1670
|
except Exception as e:
|
|
1678
1671
|
if args.debug:
|
|
1679
|
-
import traceback
|
|
1672
|
+
import traceback # noqa: PLC0415,I001
|
|
1680
1673
|
|
|
1681
1674
|
traceback.print_exc()
|
|
1682
1675
|
print_stderr(f'ERROR: {e}')
|
|
@@ -1965,11 +1958,9 @@ def folder_hashing_scan(parser, args):
|
|
|
1965
1958
|
config=scanner_config,
|
|
1966
1959
|
client=client,
|
|
1967
1960
|
scanoss_settings=scanoss_settings,
|
|
1961
|
+
rank_threshold=args.rank_threshold,
|
|
1968
1962
|
)
|
|
1969
1963
|
|
|
1970
|
-
scanner.best_match = args.best_match
|
|
1971
|
-
scanner.threshold = args.threshold
|
|
1972
|
-
|
|
1973
1964
|
if scanner.scan():
|
|
1974
1965
|
scanner.present(output_file=args.output, output_format=args.format)
|
|
1975
1966
|
except ScanossGrpcError as e:
|
|
@@ -22,14 +22,13 @@ SPDX-License-Identifier: MIT
|
|
|
22
22
|
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
+
import datetime
|
|
25
26
|
import json
|
|
26
27
|
import os.path
|
|
27
28
|
import sys
|
|
28
29
|
import uuid
|
|
29
|
-
import datetime
|
|
30
30
|
|
|
31
31
|
from . import __version__
|
|
32
|
-
|
|
33
32
|
from .scanossbase import ScanossBase
|
|
34
33
|
from .spdxlite import SpdxLite
|
|
35
34
|
|
|
@@ -49,7 +48,7 @@ class CycloneDx(ScanossBase):
|
|
|
49
48
|
self.debug = debug
|
|
50
49
|
self._spdx = SpdxLite(debug=debug)
|
|
51
50
|
|
|
52
|
-
def parse(self, data: json):
|
|
51
|
+
def parse(self, data: json): # noqa: PLR0912, PLR0915
|
|
53
52
|
"""
|
|
54
53
|
Parse the given input (raw/plain) JSON string and return CycloneDX summary
|
|
55
54
|
:param data: json - JSON object
|
|
@@ -58,7 +57,7 @@ class CycloneDx(ScanossBase):
|
|
|
58
57
|
if not data:
|
|
59
58
|
self.print_stderr('ERROR: No JSON data provided to parse.')
|
|
60
59
|
return None, None
|
|
61
|
-
self.print_debug(
|
|
60
|
+
self.print_debug('Processing raw results into CycloneDX format...')
|
|
62
61
|
cdx = {}
|
|
63
62
|
vdx = {}
|
|
64
63
|
for f in data:
|
|
@@ -171,17 +170,22 @@ class CycloneDx(ScanossBase):
|
|
|
171
170
|
success = self.produce_from_str(f.read(), output_file)
|
|
172
171
|
return success
|
|
173
172
|
|
|
174
|
-
def produce_from_json(self, data: json, output_file: str = None) -> bool:
|
|
173
|
+
def produce_from_json(self, data: json, output_file: str = None) -> tuple[bool, json]: # noqa: PLR0912
|
|
175
174
|
"""
|
|
176
|
-
Produce the CycloneDX output from the input data
|
|
177
|
-
|
|
178
|
-
:
|
|
179
|
-
|
|
175
|
+
Produce the CycloneDX output from the raw scan results input data
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
data (json): JSON object
|
|
179
|
+
output_file (str, optional): Output file (optional). Defaults to None.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
bool: True if successful, False otherwise
|
|
183
|
+
json: The CycloneDX output
|
|
180
184
|
"""
|
|
181
185
|
cdx, vdx = self.parse(data)
|
|
182
186
|
if not cdx:
|
|
183
187
|
self.print_stderr('ERROR: No CycloneDX data returned for the JSON string provided.')
|
|
184
|
-
return False
|
|
188
|
+
return False, None
|
|
185
189
|
self._spdx.load_license_data() # Load SPDX license name data for later reference
|
|
186
190
|
#
|
|
187
191
|
# Using CDX version 1.4: https://cyclonedx.org/docs/1.4/json/
|
|
@@ -264,7 +268,7 @@ class CycloneDx(ScanossBase):
|
|
|
264
268
|
if output_file:
|
|
265
269
|
file.close()
|
|
266
270
|
|
|
267
|
-
return True
|
|
271
|
+
return True, data
|
|
268
272
|
|
|
269
273
|
def produce_from_str(self, json_str: str, output_file: str = None) -> bool:
|
|
270
274
|
"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
date: 20250709092546, utime: 1752053146
|
|
@@ -25,7 +25,7 @@ SPDX-License-Identifier: MIT
|
|
|
25
25
|
import os
|
|
26
26
|
import sys
|
|
27
27
|
from pathlib import Path
|
|
28
|
-
from typing import List
|
|
28
|
+
from typing import List, Optional
|
|
29
29
|
|
|
30
30
|
from pathspec import GitIgnoreSpec
|
|
31
31
|
|
|
@@ -511,7 +511,7 @@ class FileFilters(ScanossBase):
|
|
|
511
511
|
# Now filter the files and return the reduced list
|
|
512
512
|
return self.get_filtered_files_from_files(all_files, str(root_path))
|
|
513
513
|
|
|
514
|
-
def get_filtered_files_from_files(self, files: List[str], scan_root: str = None) -> List[str]:
|
|
514
|
+
def get_filtered_files_from_files(self, files: List[str], scan_root: Optional[str] = None) -> List[str]:
|
|
515
515
|
"""
|
|
516
516
|
Retrieve a list of files to scan or fingerprint from a given list of files based on filter settings.
|
|
517
517
|
|
|
@@ -615,8 +615,13 @@ class FileFilters(ScanossBase):
|
|
|
615
615
|
# Default patterns for skipping directories
|
|
616
616
|
if not self.all_folders:
|
|
617
617
|
DEFAULT_SKIPPED_DIR_LIST = DEFAULT_SKIPPED_DIRS_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_DIRS
|
|
618
|
+
DEFAULT_SKIPPED_DIR_EXT_LIST = (
|
|
619
|
+
DEFAULT_SKIPPED_DIR_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_DIR_EXT
|
|
620
|
+
)
|
|
618
621
|
for dir_name in DEFAULT_SKIPPED_DIR_LIST:
|
|
619
622
|
patterns.append(f'{dir_name}/')
|
|
623
|
+
for dir_extension in DEFAULT_SKIPPED_DIR_EXT_LIST:
|
|
624
|
+
patterns.append(f'*{dir_extension}/')
|
|
620
625
|
|
|
621
626
|
# Custom patterns added in SCANOSS settings file
|
|
622
627
|
if self.scanoss_settings:
|
|
@@ -22,40 +22,40 @@ SPDX-License-Identifier: MIT
|
|
|
22
22
|
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
+
import datetime
|
|
25
26
|
import json
|
|
26
27
|
import os
|
|
27
|
-
from pathlib import Path
|
|
28
28
|
import sys
|
|
29
|
-
import
|
|
29
|
+
from pathlib import Path
|
|
30
30
|
from typing import Any, Dict, List, Optional
|
|
31
|
-
import importlib_resources
|
|
32
31
|
|
|
32
|
+
import importlib_resources
|
|
33
33
|
from progress.bar import Bar
|
|
34
34
|
from progress.spinner import Spinner
|
|
35
35
|
from pypac.parser import PACFile
|
|
36
36
|
|
|
37
37
|
from scanoss.file_filters import FileFilters
|
|
38
38
|
|
|
39
|
-
from .
|
|
40
|
-
from .cyclonedx import CycloneDx
|
|
41
|
-
from .spdxlite import SpdxLite
|
|
39
|
+
from . import __version__
|
|
42
40
|
from .csvoutput import CsvOutput
|
|
43
|
-
from .
|
|
41
|
+
from .cyclonedx import CycloneDx
|
|
44
42
|
from .scancodedeps import ScancodeDeps
|
|
45
|
-
from .threadeddependencies import ThreadedDependencies, SCOPE
|
|
46
|
-
from .scanossgrpc import ScanossGrpc
|
|
47
|
-
from .scantype import ScanType
|
|
48
|
-
from .scanossbase import ScanossBase
|
|
49
43
|
from .scanoss_settings import ScanossSettings
|
|
44
|
+
from .scanossapi import ScanossApi
|
|
45
|
+
from .scanossbase import ScanossBase
|
|
46
|
+
from .scanossgrpc import ScanossGrpc
|
|
50
47
|
from .scanpostprocessor import ScanPostProcessor
|
|
51
|
-
from . import
|
|
48
|
+
from .scantype import ScanType
|
|
49
|
+
from .spdxlite import SpdxLite
|
|
50
|
+
from .threadeddependencies import SCOPE, ThreadedDependencies
|
|
51
|
+
from .threadedscanning import ThreadedScanning
|
|
52
52
|
|
|
53
53
|
FAST_WINNOWING = False
|
|
54
54
|
try:
|
|
55
55
|
from scanoss_winnowing.winnowing import Winnowing
|
|
56
56
|
|
|
57
57
|
FAST_WINNOWING = True
|
|
58
|
-
except ModuleNotFoundError
|
|
58
|
+
except (ModuleNotFoundError, ImportError):
|
|
59
59
|
FAST_WINNOWING = False
|
|
60
60
|
from .winnowing import Winnowing
|
|
61
61
|
|
|
@@ -284,7 +284,7 @@ class Scanner(ScanossBase):
|
|
|
284
284
|
return True
|
|
285
285
|
return False
|
|
286
286
|
|
|
287
|
-
def scan_folder_with_options(
|
|
287
|
+
def scan_folder_with_options( # noqa: PLR0913
|
|
288
288
|
self,
|
|
289
289
|
scan_dir: str,
|
|
290
290
|
deps_file: str = None,
|
|
@@ -332,7 +332,7 @@ class Scanner(ScanossBase):
|
|
|
332
332
|
success = False
|
|
333
333
|
return success
|
|
334
334
|
|
|
335
|
-
def scan_folder(self, scan_dir: str) -> bool:
|
|
335
|
+
def scan_folder(self, scan_dir: str) -> bool: # noqa: PLR0912, PLR0915
|
|
336
336
|
"""
|
|
337
337
|
Scan the specified folder producing fingerprints, send to the SCANOSS API and return results
|
|
338
338
|
|
|
@@ -400,7 +400,7 @@ class Scanner(ScanossBase):
|
|
|
400
400
|
scan_block += wfp
|
|
401
401
|
scan_size = len(scan_block.encode('utf-8'))
|
|
402
402
|
wfp_file_count += 1
|
|
403
|
-
# If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
|
|
403
|
+
# If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue # noqa: E501
|
|
404
404
|
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
405
405
|
self.threaded_scan.queue_add(scan_block)
|
|
406
406
|
queue_size += 1
|
|
@@ -484,7 +484,7 @@ class Scanner(ScanossBase):
|
|
|
484
484
|
self.__log_result(json.dumps(results, indent=2, sort_keys=True))
|
|
485
485
|
elif self.output_format == 'cyclonedx':
|
|
486
486
|
cdx = CycloneDx(self.debug, self.scan_output)
|
|
487
|
-
success = cdx.produce_from_json(results)
|
|
487
|
+
success, _ = cdx.produce_from_json(results)
|
|
488
488
|
elif self.output_format == 'spdxlite':
|
|
489
489
|
spdxlite = SpdxLite(self.debug, self.scan_output)
|
|
490
490
|
success = spdxlite.produce_from_json(results)
|
|
@@ -509,7 +509,7 @@ class Scanner(ScanossBase):
|
|
|
509
509
|
for response in scan_responses:
|
|
510
510
|
if response is not None:
|
|
511
511
|
if file_map:
|
|
512
|
-
response = self._deobfuscate_filenames(response, file_map)
|
|
512
|
+
response = self._deobfuscate_filenames(response, file_map) # noqa: PLW2901
|
|
513
513
|
results.update(response)
|
|
514
514
|
|
|
515
515
|
dep_files = dep_responses.get('files', None) if dep_responses else None
|
|
@@ -532,7 +532,7 @@ class Scanner(ScanossBase):
|
|
|
532
532
|
deobfuscated[key] = value
|
|
533
533
|
return deobfuscated
|
|
534
534
|
|
|
535
|
-
def scan_file_with_options(
|
|
535
|
+
def scan_file_with_options( # noqa: PLR0913
|
|
536
536
|
self,
|
|
537
537
|
file: str,
|
|
538
538
|
deps_file: str = None,
|
|
@@ -603,7 +603,7 @@ class Scanner(ScanossBase):
|
|
|
603
603
|
success = False
|
|
604
604
|
return success
|
|
605
605
|
|
|
606
|
-
def scan_files(self, files: []) -> bool:
|
|
606
|
+
def scan_files(self, files: []) -> bool: # noqa: PLR0912, PLR0915
|
|
607
607
|
"""
|
|
608
608
|
Scan the specified list of files, producing fingerprints, send to the SCANOSS API and return results
|
|
609
609
|
Please note that by providing an explicit list you bypass any exclusions that may be defined on the scanner
|
|
@@ -657,7 +657,7 @@ class Scanner(ScanossBase):
|
|
|
657
657
|
file_count += 1
|
|
658
658
|
if self.threaded_scan:
|
|
659
659
|
wfp_size = len(wfp.encode('utf-8'))
|
|
660
|
-
# If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
|
|
660
|
+
# If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue # noqa: E501
|
|
661
661
|
if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
|
|
662
662
|
self.threaded_scan.queue_add(scan_block)
|
|
663
663
|
queue_size += 1
|
|
@@ -666,7 +666,7 @@ class Scanner(ScanossBase):
|
|
|
666
666
|
scan_block += wfp
|
|
667
667
|
scan_size = len(scan_block.encode('utf-8'))
|
|
668
668
|
wfp_file_count += 1
|
|
669
|
-
# If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
|
|
669
|
+
# If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue # noqa: E501
|
|
670
670
|
if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
|
|
671
671
|
self.threaded_scan.queue_add(scan_block)
|
|
672
672
|
queue_size += 1
|
|
@@ -755,7 +755,7 @@ class Scanner(ScanossBase):
|
|
|
755
755
|
success = False
|
|
756
756
|
return success
|
|
757
757
|
|
|
758
|
-
def scan_wfp_file(self, file: str = None) -> bool:
|
|
758
|
+
def scan_wfp_file(self, file: str = None) -> bool: # noqa: PLR0912, PLR0915
|
|
759
759
|
"""
|
|
760
760
|
Scan the contents of the specified WFP file (in the current process)
|
|
761
761
|
:param file: Scan the contents of the specified WFP file (in the current process)
|
|
@@ -436,10 +436,12 @@ class ContainerScannerPresenter(AbstractPresenter):
|
|
|
436
436
|
scan_results = {}
|
|
437
437
|
for f in self.scanner.decorated_scan_results['files']:
|
|
438
438
|
scan_results[f['file']] = [f]
|
|
439
|
-
|
|
439
|
+
success, cdx_output = cdx.produce_from_json(scan_results)
|
|
440
|
+
if not success:
|
|
440
441
|
error_msg = 'Failed to produce CycloneDX output'
|
|
441
442
|
self.base.print_stderr(error_msg)
|
|
442
|
-
|
|
443
|
+
return None
|
|
444
|
+
return json.dumps(cdx_output, indent=2)
|
|
443
445
|
|
|
444
446
|
def _format_spdxlite_output(self) -> str:
|
|
445
447
|
"""
|
|
@@ -15,7 +15,7 @@ from scanoss.utils.simhash import WordFeatureSet, fingerprint, simhash, vectoriz
|
|
|
15
15
|
|
|
16
16
|
MINIMUM_FILE_COUNT = 8
|
|
17
17
|
MINIMUM_CONCATENATED_NAME_LENGTH = 32
|
|
18
|
-
|
|
18
|
+
MAXIMUM_FILE_NAME_LENGTH = 32
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class DirectoryNode:
|
|
@@ -35,7 +35,7 @@ class DirectoryFile:
|
|
|
35
35
|
Represents a file in the directory tree for folder hashing.
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
-
def __init__(self, path: str, key: bytes, key_str: str):
|
|
38
|
+
def __init__(self, path: str, key: List[bytes], key_str: str):
|
|
39
39
|
self.path = path
|
|
40
40
|
self.key = key
|
|
41
41
|
self.key_str = key_str
|
|
@@ -77,7 +77,7 @@ class FolderHasher:
|
|
|
77
77
|
def __init__(
|
|
78
78
|
self,
|
|
79
79
|
scan_dir: str,
|
|
80
|
-
config:
|
|
80
|
+
config: FolderHasherConfig,
|
|
81
81
|
scanoss_settings: Optional[ScanossSettings] = None,
|
|
82
82
|
):
|
|
83
83
|
self.base = ScanossBase(
|
|
@@ -140,7 +140,7 @@ class FolderHasher:
|
|
|
140
140
|
root_node = DirectoryNode(str(root))
|
|
141
141
|
|
|
142
142
|
all_files = [
|
|
143
|
-
f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <=
|
|
143
|
+
f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <= MAXIMUM_FILE_NAME_LENGTH
|
|
144
144
|
]
|
|
145
145
|
filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
|
|
146
146
|
|
|
@@ -185,7 +185,7 @@ class FolderHasher:
|
|
|
185
185
|
Recursively compute folder hash data for a directory node.
|
|
186
186
|
|
|
187
187
|
The hash data includes the path identifier, simhash for file names,
|
|
188
|
-
simhash for file content, and children node hash information.
|
|
188
|
+
simhash for file content, directory hash, language extensions, and children node hash information.
|
|
189
189
|
|
|
190
190
|
Args:
|
|
191
191
|
node (DirectoryNode): The directory node to compute the hash for.
|
|
@@ -194,11 +194,22 @@ class FolderHasher:
|
|
|
194
194
|
dict: The computed hash data for the node.
|
|
195
195
|
"""
|
|
196
196
|
hash_data = self._hash_calc(node)
|
|
197
|
+
|
|
198
|
+
# Safely calculate relative path
|
|
199
|
+
try:
|
|
200
|
+
node_path = Path(node.path).resolve()
|
|
201
|
+
scan_dir_path = Path(self.scan_dir).resolve()
|
|
202
|
+
rel_path = node_path.relative_to(scan_dir_path)
|
|
203
|
+
except ValueError:
|
|
204
|
+
# If relative_to fails, use the node path as is or a fallback
|
|
205
|
+
rel_path = Path(node.path).name if node.path else Path('.')
|
|
197
206
|
|
|
198
207
|
return {
|
|
199
|
-
'path_id':
|
|
208
|
+
'path_id': str(rel_path),
|
|
200
209
|
'sim_hash_names': f'{hash_data["name_hash"]:02x}' if hash_data['name_hash'] is not None else None,
|
|
201
210
|
'sim_hash_content': f'{hash_data["content_hash"]:02x}' if hash_data['content_hash'] is not None else None,
|
|
211
|
+
'sim_hash_dir_names': f'{hash_data["dir_hash"]:02x}' if hash_data['dir_hash'] is not None else None,
|
|
212
|
+
'lang_extensions': hash_data['lang_extensions'],
|
|
202
213
|
'children': [self._hash_calc_from_node(child) for child in node.children.values()],
|
|
203
214
|
}
|
|
204
215
|
|
|
@@ -215,9 +226,12 @@ class FolderHasher:
|
|
|
215
226
|
node (DirectoryNode): The directory node containing file items.
|
|
216
227
|
|
|
217
228
|
Returns:
|
|
218
|
-
dict: A dictionary with 'name_hash' and '
|
|
229
|
+
dict: A dictionary with 'name_hash', 'content_hash', 'dir_hash', and 'lang_extensions' keys.
|
|
219
230
|
"""
|
|
220
231
|
processed_hashes = set()
|
|
232
|
+
unique_file_names = set()
|
|
233
|
+
unique_directories = set()
|
|
234
|
+
extension_map = {}
|
|
221
235
|
file_hashes = []
|
|
222
236
|
selected_names = []
|
|
223
237
|
|
|
@@ -225,37 +239,64 @@ class FolderHasher:
|
|
|
225
239
|
key_str = file.key_str
|
|
226
240
|
if key_str in processed_hashes:
|
|
227
241
|
continue
|
|
228
|
-
processed_hashes.add(key_str)
|
|
229
242
|
|
|
230
|
-
|
|
243
|
+
file_name = os.path.basename(file.path)
|
|
244
|
+
|
|
245
|
+
file_name_without_extension, extension = os.path.splitext(file_name)
|
|
246
|
+
current_directory = os.path.dirname(file.path)
|
|
247
|
+
|
|
248
|
+
if extension and len(extension) > 1:
|
|
249
|
+
ext_without_dot = extension[1:]
|
|
250
|
+
extension_map[ext_without_dot] = extension_map.get(ext_without_dot, 0) + 1
|
|
251
|
+
|
|
252
|
+
current_directory.replace(self.scan_dir, '', 1).lstrip(os.path.sep)
|
|
253
|
+
parts = current_directory.split(os.path.sep)
|
|
254
|
+
for d in parts:
|
|
255
|
+
if d in {'', '.', '..'}:
|
|
256
|
+
continue
|
|
257
|
+
unique_directories.add(d)
|
|
231
258
|
|
|
232
|
-
|
|
233
|
-
|
|
259
|
+
processed_hashes.add(key_str)
|
|
260
|
+
unique_file_names.add(file_name_without_extension)
|
|
261
|
+
selected_names.append(file_name)
|
|
262
|
+
file_hashes.append(file.key)
|
|
234
263
|
|
|
235
264
|
if len(selected_names) < MINIMUM_FILE_COUNT:
|
|
236
|
-
return {
|
|
237
|
-
'name_hash': None,
|
|
238
|
-
'content_hash': None,
|
|
239
|
-
}
|
|
265
|
+
return {'name_hash': None, 'content_hash': None, 'dir_hash': None, 'lang_extensions': None}
|
|
240
266
|
|
|
241
267
|
selected_names.sort()
|
|
242
268
|
concatenated_names = ''.join(selected_names)
|
|
243
269
|
|
|
244
270
|
if len(concatenated_names.encode('utf-8')) < MINIMUM_CONCATENATED_NAME_LENGTH:
|
|
245
|
-
return {
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
271
|
+
return {'name_hash': None, 'content_hash': None, 'dir_hash': None, 'lang_extensions': None}
|
|
272
|
+
|
|
273
|
+
# Concatenate the unique file names without the extensions, adding a space and sorting them alphabetically
|
|
274
|
+
unique_file_names_list = list(unique_file_names)
|
|
275
|
+
unique_file_names_list.sort()
|
|
276
|
+
concatenated_names = ' '.join(unique_file_names_list)
|
|
277
|
+
|
|
278
|
+
# We do the same for the directory names, adding a space and sorting them alphabetically
|
|
279
|
+
unique_directories_list = list(unique_directories)
|
|
280
|
+
unique_directories_list.sort()
|
|
281
|
+
concatenated_directories = ' '.join(unique_directories_list)
|
|
249
282
|
|
|
250
283
|
names_simhash = simhash(WordFeatureSet(concatenated_names.encode('utf-8')))
|
|
284
|
+
dir_simhash = simhash(WordFeatureSet(concatenated_directories.encode('utf-8')))
|
|
251
285
|
content_simhash = fingerprint(vectorize_bytes(file_hashes))
|
|
252
286
|
|
|
287
|
+
# Debug logging similar to Go implementation
|
|
288
|
+
self.base.print_debug(f'Unique file names: {unique_file_names_list}')
|
|
289
|
+
self.base.print_debug(f'Unique directories: {unique_directories_list}')
|
|
290
|
+
self.base.print_debug(f'{dir_simhash:x}/{names_simhash:x} - {content_simhash:x} - {extension_map}')
|
|
291
|
+
|
|
253
292
|
return {
|
|
254
293
|
'name_hash': names_simhash,
|
|
255
294
|
'content_hash': content_simhash,
|
|
295
|
+
'dir_hash': dir_simhash,
|
|
296
|
+
'lang_extensions': extension_map,
|
|
256
297
|
}
|
|
257
298
|
|
|
258
|
-
def present(self, output_format: str = None, output_file: str = None):
|
|
299
|
+
def present(self, output_format: Optional[str] = None, output_file: Optional[str] = None):
|
|
259
300
|
"""Present the hashed tree in the selected format"""
|
|
260
301
|
self.presenter.present(output_format=output_format, output_file=output_file)
|
|
261
302
|
|
|
@@ -29,6 +29,8 @@ from typing import Dict, Optional
|
|
|
29
29
|
|
|
30
30
|
from progress.spinner import Spinner
|
|
31
31
|
|
|
32
|
+
from scanoss.constants import DEFAULT_HFH_RANK_THRESHOLD
|
|
33
|
+
from scanoss.cyclonedx import CycloneDx
|
|
32
34
|
from scanoss.file_filters import FileFilters
|
|
33
35
|
from scanoss.scanners.folder_hasher import FolderHasher
|
|
34
36
|
from scanoss.scanners.scanner_config import ScannerConfig
|
|
@@ -52,6 +54,7 @@ class ScannerHFH:
|
|
|
52
54
|
config: ScannerConfig,
|
|
53
55
|
client: Optional[ScanossGrpc] = None,
|
|
54
56
|
scanoss_settings: Optional[ScanossSettings] = None,
|
|
57
|
+
rank_threshold: int = DEFAULT_HFH_RANK_THRESHOLD,
|
|
55
58
|
):
|
|
56
59
|
"""
|
|
57
60
|
Initialize the ScannerHFH.
|
|
@@ -61,6 +64,7 @@ class ScannerHFH:
|
|
|
61
64
|
config (ScannerConfig): Configuration parameters for the scanner.
|
|
62
65
|
client (ScanossGrpc): gRPC client for communicating with the scanning service.
|
|
63
66
|
scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
|
|
67
|
+
rank_threshold (int): Get results with rank below this threshold (default: 5).
|
|
64
68
|
"""
|
|
65
69
|
self.base = ScanossBase(
|
|
66
70
|
debug=config.debug,
|
|
@@ -88,8 +92,7 @@ class ScannerHFH:
|
|
|
88
92
|
self.scan_dir = scan_dir
|
|
89
93
|
self.client = client
|
|
90
94
|
self.scan_results = None
|
|
91
|
-
self.
|
|
92
|
-
self.threshold = 100
|
|
95
|
+
self.rank_threshold = rank_threshold
|
|
93
96
|
|
|
94
97
|
def scan(self) -> Optional[Dict]:
|
|
95
98
|
"""
|
|
@@ -100,8 +103,7 @@ class ScannerHFH:
|
|
|
100
103
|
"""
|
|
101
104
|
hfh_request = {
|
|
102
105
|
'root': self.folder_hasher.hash_directory(self.scan_dir),
|
|
103
|
-
'
|
|
104
|
-
'best_match': self.best_match,
|
|
106
|
+
'rank_threshold': self.rank_threshold,
|
|
105
107
|
}
|
|
106
108
|
|
|
107
109
|
spinner = Spinner('Scanning folder...')
|
|
@@ -160,8 +162,53 @@ class ScannerHFHPresenter(AbstractPresenter):
|
|
|
160
162
|
else str(self.scanner.scan_results)
|
|
161
163
|
)
|
|
162
164
|
|
|
163
|
-
def _format_cyclonedx_output(self) -> str:
|
|
164
|
-
|
|
165
|
+
def _format_cyclonedx_output(self) -> str: # noqa: PLR0911
|
|
166
|
+
if not self.scanner.scan_results:
|
|
167
|
+
return ''
|
|
168
|
+
try:
|
|
169
|
+
if 'results' not in self.scanner.scan_results or not self.scanner.scan_results['results']:
|
|
170
|
+
self.base.print_stderr('ERROR: No scan results found')
|
|
171
|
+
return ''
|
|
172
|
+
|
|
173
|
+
first_result = self.scanner.scan_results['results'][0]
|
|
174
|
+
|
|
175
|
+
best_match_components = [c for c in first_result.get('components', []) if c.get('order') == 1]
|
|
176
|
+
if not best_match_components:
|
|
177
|
+
self.base.print_stderr('ERROR: No best match component found')
|
|
178
|
+
return ''
|
|
179
|
+
|
|
180
|
+
best_match_component = best_match_components[0]
|
|
181
|
+
if not best_match_component.get('versions'):
|
|
182
|
+
self.base.print_stderr('ERROR: No versions found for best match component')
|
|
183
|
+
return ''
|
|
184
|
+
|
|
185
|
+
best_match_version = best_match_component['versions'][0]
|
|
186
|
+
purl = best_match_component['purl']
|
|
187
|
+
|
|
188
|
+
get_dependencies_json_request = {
|
|
189
|
+
'files': [
|
|
190
|
+
{
|
|
191
|
+
'file': f'{best_match_component["name"]}:{best_match_version["version"]}',
|
|
192
|
+
'purls': [{'purl': purl, 'requirement': best_match_version['version']}],
|
|
193
|
+
}
|
|
194
|
+
]
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
decorated_scan_results = self.scanner.client.get_dependencies(get_dependencies_json_request)
|
|
198
|
+
|
|
199
|
+
cdx = CycloneDx(self.base.debug)
|
|
200
|
+
scan_results = {}
|
|
201
|
+
for f in decorated_scan_results['files']:
|
|
202
|
+
scan_results[f['file']] = [f]
|
|
203
|
+
success, cdx_output = cdx.produce_from_json(scan_results)
|
|
204
|
+
if not success:
|
|
205
|
+
error_msg = 'ERROR: Failed to produce CycloneDX output'
|
|
206
|
+
self.base.print_stderr(error_msg)
|
|
207
|
+
return None
|
|
208
|
+
return json.dumps(cdx_output, indent=2)
|
|
209
|
+
except Exception as e:
|
|
210
|
+
self.base.print_stderr(f'ERROR: Failed to get license information: {e}')
|
|
211
|
+
return None
|
|
165
212
|
|
|
166
213
|
def _format_spdxlite_output(self) -> str:
|
|
167
214
|
raise NotImplementedError('SPDXlite output is not implemented')
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
-
# source: scanoss/api/scanning/v2/scanoss-scanning.proto
|
|
4
|
-
"""Generated protocol buffer code."""
|
|
5
|
-
from google.protobuf.internal import builder as _builder
|
|
6
|
-
from google.protobuf import descriptor as _descriptor
|
|
7
|
-
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
8
|
-
from google.protobuf import symbol_database as _symbol_database
|
|
9
|
-
# @@protoc_insertion_point(imports)
|
|
10
|
-
|
|
11
|
-
_sym_db = _symbol_database.Default()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
from scanoss.api.common.v2 import scanoss_common_pb2 as scanoss_dot_api_dot_common_dot_v2_dot_scanoss__common__pb2
|
|
15
|
-
from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
|
|
16
|
-
from protoc_gen_swagger.options import annotations_pb2 as protoc__gen__swagger_dot_options_dot_annotations__pb2
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n.scanoss/api/scanning/v2/scanoss-scanning.proto\x12\x17scanoss.api.scanning.v2\x1a*scanoss/api/common/v2/scanoss-common.proto\x1a\x1cgoogle/api/annotations.proto\x1a,protoc-gen-swagger/options/annotations.proto\"\xff\x01\n\nHFHRequest\x12\x12\n\nbest_match\x18\x01 \x01(\x08\x12\x11\n\tthreshold\x18\x02 \x01(\x05\x12:\n\x04root\x18\x03 \x01(\x0b\x32,.scanoss.api.scanning.v2.HFHRequest.Children\x1a\x8d\x01\n\x08\x43hildren\x12\x0f\n\x07path_id\x18\x01 \x01(\t\x12\x16\n\x0esim_hash_names\x18\x02 \x01(\t\x12\x18\n\x10sim_hash_content\x18\x03 \x01(\t\x12>\n\x08\x63hildren\x18\x04 \x03(\x0b\x32,.scanoss.api.scanning.v2.HFHRequest.Children\"\xc1\x02\n\x0bHFHResponse\x12<\n\x07results\x18\x01 \x03(\x0b\x32+.scanoss.api.scanning.v2.HFHResponse.Result\x12\x35\n\x06status\x18\x02 \x01(\x0b\x32%.scanoss.api.common.v2.StatusResponse\x1a\x39\n\tComponent\x12\x0c\n\x04purl\x18\x01 \x01(\t\x12\x10\n\x08versions\x18\x02 \x03(\t\x12\x0c\n\x04rank\x18\x03 \x01(\x05\x1a\x81\x01\n\x06Result\x12\x0f\n\x07path_id\x18\x01 \x01(\t\x12\x42\n\ncomponents\x18\x02 \x03(\x0b\x32..scanoss.api.scanning.v2.HFHResponse.Component\x12\x13\n\x0bprobability\x18\x03 \x01(\x02\x12\r\n\x05stage\x18\x04 \x01(\x05\x32\x81\x02\n\x08Scanning\x12q\n\x04\x45\x63ho\x12\".scanoss.api.common.v2.EchoRequest\x1a#.scanoss.api.common.v2.EchoResponse\" \x82\xd3\xe4\x93\x02\x1a\"\x15/api/v2/scanning/echo:\x01*\x12\x81\x01\n\x0e\x46olderHashScan\x12#.scanoss.api.scanning.v2.HFHRequest\x1a$.scanoss.api.scanning.v2.HFHResponse\"$\x82\xd3\xe4\x93\x02\x1e\"\x19/api/v2/scanning/hfh/scan:\x01*B\x8a\x02Z1github.com/scanoss/papi/api/scanningv2;scanningv2\x92\x41\xd3\x01\x12m\n\x18SCANOSS Scanning Service\"L\n\x10scanoss-scanning\x12#https://github.com/scanoss/scanning\x1a\x13support@scanoss.com2\x03\x32.0*\x01\x01\x32\x10\x61pplication/json:\x10\x61pplication/jsonR;\n\x03\x34\x30\x34\x12\x34\n*Returned when the resource does not exist.\x12\x06\n\x04\x9a\x02\x01\x07\x62\x06proto3')
|
|
20
|
-
|
|
21
|
-
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
|
22
|
-
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'scanoss.api.scanning.v2.scanoss_scanning_pb2', globals())
|
|
23
|
-
if _descriptor._USE_C_DESCRIPTORS == False:
|
|
24
|
-
|
|
25
|
-
DESCRIPTOR._options = None
|
|
26
|
-
DESCRIPTOR._serialized_options = b'Z1github.com/scanoss/papi/api/scanningv2;scanningv2\222A\323\001\022m\n\030SCANOSS Scanning Service\"L\n\020scanoss-scanning\022#https://github.com/scanoss/scanning\032\023support@scanoss.com2\0032.0*\001\0012\020application/json:\020application/jsonR;\n\003404\0224\n*Returned when the resource does not exist.\022\006\n\004\232\002\001\007'
|
|
27
|
-
_SCANNING.methods_by_name['Echo']._options = None
|
|
28
|
-
_SCANNING.methods_by_name['Echo']._serialized_options = b'\202\323\344\223\002\032\"\025/api/v2/scanning/echo:\001*'
|
|
29
|
-
_SCANNING.methods_by_name['FolderHashScan']._options = None
|
|
30
|
-
_SCANNING.methods_by_name['FolderHashScan']._serialized_options = b'\202\323\344\223\002\036\"\031/api/v2/scanning/hfh/scan:\001*'
|
|
31
|
-
_HFHREQUEST._serialized_start=196
|
|
32
|
-
_HFHREQUEST._serialized_end=451
|
|
33
|
-
_HFHREQUEST_CHILDREN._serialized_start=310
|
|
34
|
-
_HFHREQUEST_CHILDREN._serialized_end=451
|
|
35
|
-
_HFHRESPONSE._serialized_start=454
|
|
36
|
-
_HFHRESPONSE._serialized_end=775
|
|
37
|
-
_HFHRESPONSE_COMPONENT._serialized_start=586
|
|
38
|
-
_HFHRESPONSE_COMPONENT._serialized_end=643
|
|
39
|
-
_HFHRESPONSE_RESULT._serialized_start=646
|
|
40
|
-
_HFHRESPONSE_RESULT._serialized_end=775
|
|
41
|
-
_SCANNING._serialized_start=778
|
|
42
|
-
_SCANNING._serialized_end=1035
|
|
43
|
-
# @@protoc_insertion_point(module_scope)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
date: 20250626171827, utime: 1750958307
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py
RENAMED
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py
RENAMED
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py
RENAMED
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py
RENAMED
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.26.3 → scanoss-1.27.1}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|