scanoss 1.35.0__tar.gz → 1.37.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scanoss-1.35.0/src/scanoss.egg-info → scanoss-1.37.0}/PKG-INFO +1 -1
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/__init__.py +1 -1
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py +1 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/cli.py +103 -1
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/constants.py +4 -1
- scanoss-1.37.0/src/scanoss/data/build_date.txt +1 -0
- scanoss-1.37.0/src/scanoss/delta.py +197 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/file_filters.py +1 -158
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanners/folder_hasher.py +23 -9
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanners/scanner_hfh.py +22 -6
- {scanoss-1.35.0 → scanoss-1.37.0/src/scanoss.egg-info}/PKG-INFO +1 -1
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss.egg-info/SOURCES.txt +1 -0
- scanoss-1.35.0/src/scanoss/data/build_date.txt +0 -1
- {scanoss-1.35.0 → scanoss-1.37.0}/LICENSE +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/PACKAGE.md +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/README.md +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/pyproject.toml +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/setup.cfg +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/options/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/options/annotations_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/options/annotations_pb2.pyi +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/options/annotations_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/options/openapiv2_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/options/openapiv2_pb2.pyi +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/protoc_gen_swagger/options/openapiv2_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/common/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/common/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/common/v2/scanoss_common_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/components/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/components/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/components/v2/scanoss_components_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/dependencies/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/dependencies/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/geoprovenance/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/geoprovenance/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/licenses/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/licenses/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/licenses/v2/scanoss_licenses_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/scanning/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/scanning/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/semgrep/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/semgrep/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/vulnerabilities/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/vulnerabilities/v2/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/components.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/cryptography.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/csvoutput.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/cyclonedx.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/data/scanoss-settings-schema.json +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/data/spdx-exceptions.json +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/data/spdx-licenses.json +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/export/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/export/dependency_track.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/filecount.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/dependency_track/project_violation.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/policy_check.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/raw/component_summary.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/raw/copyleft.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/raw/license_summary.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/raw/raw_base.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/raw/undeclared_component.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/utils/license_utils.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/results.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scancodedeps.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanner.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanners/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanners/container_scanner.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanners/scanner_config.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanoss_settings.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanossapi.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanossbase.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanossgrpc.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scanpostprocessor.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/scantype.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/services/dependency_track_service.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/spdxlite.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/threadeddependencies.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/threadedscanning.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/utils/__init__.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/utils/abstract_presenter.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/utils/crc64.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/utils/file.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/utils/simhash.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/winnowing.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss.egg-info/dependency_links.txt +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss.egg-info/entry_points.txt +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss.egg-info/requires.txt +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss.egg-info/top_level.txt +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/tests/test_csv_output.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/tests/test_file_filters.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/tests/test_policy_inspect.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/tests/test_scan_post_processor.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/tests/test_spdxlite.py +0 -0
- {scanoss-1.35.0 → scanoss-1.37.0}/tests/test_winnowing.py +0 -0
|
@@ -33,6 +33,7 @@ from typing import List
|
|
|
33
33
|
import pypac
|
|
34
34
|
|
|
35
35
|
from scanoss.cryptography import Cryptography, create_cryptography_config_from_args
|
|
36
|
+
from scanoss.delta import Delta
|
|
36
37
|
from scanoss.export.dependency_track import DependencyTrackExporter
|
|
37
38
|
from scanoss.inspection.dependency_track.project_violation import (
|
|
38
39
|
DependencyTrackProjectViolationPolicyCheck,
|
|
@@ -59,7 +60,10 @@ from . import __version__
|
|
|
59
60
|
from .components import Components
|
|
60
61
|
from .constants import (
|
|
61
62
|
DEFAULT_API_TIMEOUT,
|
|
63
|
+
DEFAULT_HFH_DEPTH,
|
|
64
|
+
DEFAULT_HFH_MIN_ACCEPTED_SCORE,
|
|
62
65
|
DEFAULT_HFH_RANK_THRESHOLD,
|
|
66
|
+
DEFAULT_HFH_RECURSIVE_THRESHOLD,
|
|
63
67
|
DEFAULT_POST_SIZE,
|
|
64
68
|
DEFAULT_RETRY,
|
|
65
69
|
DEFAULT_TIMEOUT,
|
|
@@ -869,6 +873,27 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
869
873
|
help='Filter results to only show those with rank value at or below this threshold (e.g., --rank-threshold 3 '
|
|
870
874
|
'returns results with rank 1, 2, or 3). Lower rank values indicate higher quality matches.',
|
|
871
875
|
)
|
|
876
|
+
p_folder_scan.add_argument(
|
|
877
|
+
'--depth',
|
|
878
|
+
type=int,
|
|
879
|
+
default=DEFAULT_HFH_DEPTH,
|
|
880
|
+
help=f'Defines how deep to scan the root directory (optional - default {DEFAULT_HFH_DEPTH})',
|
|
881
|
+
)
|
|
882
|
+
p_folder_scan.add_argument(
|
|
883
|
+
'--recursive-threshold',
|
|
884
|
+
type=float,
|
|
885
|
+
default=DEFAULT_HFH_RECURSIVE_THRESHOLD,
|
|
886
|
+
help=f'Minimum score threshold to consider a match (optional - default: {DEFAULT_HFH_RECURSIVE_THRESHOLD})',
|
|
887
|
+
)
|
|
888
|
+
p_folder_scan.add_argument(
|
|
889
|
+
'--min-accepted-score',
|
|
890
|
+
type=float,
|
|
891
|
+
default=DEFAULT_HFH_MIN_ACCEPTED_SCORE,
|
|
892
|
+
help=(
|
|
893
|
+
'Only show results with a score at or above this threshold '
|
|
894
|
+
f'(optional - default: {DEFAULT_HFH_MIN_ACCEPTED_SCORE})'
|
|
895
|
+
),
|
|
896
|
+
)
|
|
872
897
|
p_folder_scan.set_defaults(func=folder_hashing_scan)
|
|
873
898
|
|
|
874
899
|
# Sub-command: folder-hash
|
|
@@ -887,8 +912,41 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
887
912
|
default='json',
|
|
888
913
|
help='Result output format (optional - default: json)',
|
|
889
914
|
)
|
|
915
|
+
p_folder_hash.add_argument(
|
|
916
|
+
'--depth',
|
|
917
|
+
type=int,
|
|
918
|
+
default=DEFAULT_HFH_DEPTH,
|
|
919
|
+
help=f'Defines how deep to hash the root directory (optional - default {DEFAULT_HFH_DEPTH})',
|
|
920
|
+
)
|
|
890
921
|
p_folder_hash.set_defaults(func=folder_hash)
|
|
891
922
|
|
|
923
|
+
# Sub-command: delta
|
|
924
|
+
p_delta = subparsers.add_parser(
|
|
925
|
+
'delta',
|
|
926
|
+
aliases=['dl'],
|
|
927
|
+
description=f'SCANOSS Delta commands: {__version__}',
|
|
928
|
+
help='Delta support commands',
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
delta_sub = p_delta.add_subparsers(
|
|
932
|
+
title='Delta Commands',
|
|
933
|
+
dest='subparsercmd',
|
|
934
|
+
description='Delta sub-commands',
|
|
935
|
+
help='Delta sub-commands'
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
# Delta Sub-command: copy
|
|
939
|
+
p_copy = delta_sub.add_parser(
|
|
940
|
+
'copy',
|
|
941
|
+
aliases=['cp'],
|
|
942
|
+
description=f'Copy file list into delta dir: {__version__}',
|
|
943
|
+
help='Copy the given list of files into a delta directory',
|
|
944
|
+
)
|
|
945
|
+
p_copy.add_argument('--input', '-i', type=str, required=True, help='Input file with diff list')
|
|
946
|
+
p_copy.add_argument('--folder', '-fd', type=str, help='Delta folder to copy into')
|
|
947
|
+
p_copy.add_argument('--root', '-rd', type=str, help='Root directory to place delta folder')
|
|
948
|
+
p_copy.set_defaults(func=delta_copy)
|
|
949
|
+
|
|
892
950
|
# Output options
|
|
893
951
|
for p in [
|
|
894
952
|
p_scan,
|
|
@@ -909,6 +967,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
909
967
|
p_crypto_hints,
|
|
910
968
|
p_crypto_versions_in_range,
|
|
911
969
|
c_licenses,
|
|
970
|
+
p_copy,
|
|
912
971
|
]:
|
|
913
972
|
p.add_argument('--output', '-o', type=str, help='Output result file name (optional - default stdout).')
|
|
914
973
|
|
|
@@ -1106,6 +1165,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
1106
1165
|
p_crypto_versions_in_range,
|
|
1107
1166
|
c_licenses,
|
|
1108
1167
|
e_dt,
|
|
1168
|
+
p_copy
|
|
1109
1169
|
]:
|
|
1110
1170
|
p.add_argument('--debug', '-d', action='store_true', help='Enable debug messages')
|
|
1111
1171
|
p.add_argument('--trace', '-t', action='store_true', help='Enable trace messages, including API posts')
|
|
@@ -1126,7 +1186,8 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
1126
1186
|
sys.exit(1)
|
|
1127
1187
|
elif (
|
|
1128
1188
|
args.subparser
|
|
1129
|
-
in ('utils', 'ut', 'component', 'comp', 'inspect', 'insp', 'ins',
|
|
1189
|
+
in ('utils', 'ut', 'component', 'comp', 'inspect', 'insp', 'ins',
|
|
1190
|
+
'crypto', 'cr', 'export', 'exp', 'delta', 'dl')
|
|
1130
1191
|
) and not args.subparsercmd:
|
|
1131
1192
|
parser.parse_args([args.subparser, '--help']) # Force utils helps to be displayed
|
|
1132
1193
|
sys.exit(1)
|
|
@@ -2456,6 +2517,9 @@ def folder_hashing_scan(parser, args):
|
|
|
2456
2517
|
client=client,
|
|
2457
2518
|
scanoss_settings=scanoss_settings,
|
|
2458
2519
|
rank_threshold=args.rank_threshold,
|
|
2520
|
+
depth=args.depth,
|
|
2521
|
+
recursive_threshold=args.recursive_threshold,
|
|
2522
|
+
min_accepted_score=args.min_accepted_score,
|
|
2459
2523
|
)
|
|
2460
2524
|
|
|
2461
2525
|
if scanner.scan():
|
|
@@ -2489,6 +2553,7 @@ def folder_hash(parser, args):
|
|
|
2489
2553
|
scan_dir=args.scan_dir,
|
|
2490
2554
|
config=folder_hasher_config,
|
|
2491
2555
|
scanoss_settings=scanoss_settings,
|
|
2556
|
+
depth=args.depth,
|
|
2492
2557
|
)
|
|
2493
2558
|
|
|
2494
2559
|
folder_hasher.hash_directory(args.scan_dir)
|
|
@@ -2569,6 +2634,43 @@ def initialise_empty_file(filename: str):
|
|
|
2569
2634
|
print_stderr(f'Error: Unable to create output file {filename}: {e}')
|
|
2570
2635
|
sys.exit(1)
|
|
2571
2636
|
|
|
2637
|
+
def delta_copy(parser, args):
|
|
2638
|
+
"""
|
|
2639
|
+
Handle delta copy command.
|
|
2640
|
+
|
|
2641
|
+
Copies files listed in an input file to a target directory while preserving
|
|
2642
|
+
their directory structure. Creates a unique delta directory if none is specified.
|
|
2643
|
+
|
|
2644
|
+
Parameters
|
|
2645
|
+
----------
|
|
2646
|
+
parser : ArgumentParser
|
|
2647
|
+
Command line parser object for help display
|
|
2648
|
+
args : Namespace
|
|
2649
|
+
Parsed command line arguments containing:
|
|
2650
|
+
- input: Path to file containing list of files to copy
|
|
2651
|
+
- folder: Optional target directory path
|
|
2652
|
+
- output: Optional output file path
|
|
2653
|
+
"""
|
|
2654
|
+
# Validate required input file parameter
|
|
2655
|
+
if args.input is None:
|
|
2656
|
+
print_stderr('ERROR: Input file is required for copying')
|
|
2657
|
+
parser.parse_args([args.subparser, args.subparsercmd, '-h'])
|
|
2658
|
+
sys.exit(1)
|
|
2659
|
+
# Initialise output file if specified
|
|
2660
|
+
if args.output:
|
|
2661
|
+
initialise_empty_file(args.output)
|
|
2662
|
+
try:
|
|
2663
|
+
# Create and configure delta copy command
|
|
2664
|
+
delta = Delta(debug=args.debug, trace=args.trace, quiet=args.quiet, filepath=args.input, folder=args.folder,
|
|
2665
|
+
output=args.output, root_dir=args.root)
|
|
2666
|
+
# Execute copy and exit with appropriate status code
|
|
2667
|
+
status, _ = delta.copy()
|
|
2668
|
+
sys.exit(status)
|
|
2669
|
+
except Exception as e:
|
|
2670
|
+
print_stderr(e)
|
|
2671
|
+
if args.debug:
|
|
2672
|
+
traceback.print_exc()
|
|
2673
|
+
sys.exit(1)
|
|
2572
2674
|
|
|
2573
2675
|
def main():
|
|
2574
2676
|
"""
|
|
@@ -13,4 +13,7 @@ DEFAULT_URL2 = 'https://api.scanoss.com' # default premium service URL
|
|
|
13
13
|
|
|
14
14
|
DEFAULT_API_TIMEOUT = 600
|
|
15
15
|
|
|
16
|
-
DEFAULT_HFH_RANK_THRESHOLD = 5
|
|
16
|
+
DEFAULT_HFH_RANK_THRESHOLD = 5
|
|
17
|
+
DEFAULT_HFH_DEPTH = 1
|
|
18
|
+
DEFAULT_HFH_RECURSIVE_THRESHOLD = 0.8
|
|
19
|
+
DEFAULT_HFH_MIN_ACCEPTED_SCORE = 0.15
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
date: 20251017144212, utime: 1760712132
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
import os
|
|
25
|
+
import shutil
|
|
26
|
+
import tempfile
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
from .scanossbase import ScanossBase
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Delta(ScanossBase):
|
|
33
|
+
"""
|
|
34
|
+
Handle delta scan operations by copying files into a dedicated delta directory.
|
|
35
|
+
|
|
36
|
+
This class manages the creation of delta directories and copying of specified files
|
|
37
|
+
while preserving the directory structure. Files are read from an input file where each
|
|
38
|
+
line contains a file path to copy.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__( # noqa: PLR0913
|
|
42
|
+
self,
|
|
43
|
+
debug: bool = False,
|
|
44
|
+
trace: bool = False,
|
|
45
|
+
quiet: bool = False,
|
|
46
|
+
filepath: str = None,
|
|
47
|
+
folder: str = None,
|
|
48
|
+
output: str = None,
|
|
49
|
+
root_dir: str = None,
|
|
50
|
+
):
|
|
51
|
+
"""
|
|
52
|
+
Initialise the Delta instance.
|
|
53
|
+
|
|
54
|
+
:param debug: Enable debug logging.
|
|
55
|
+
:param trace: Enable trace logging.
|
|
56
|
+
:param quiet: Enable quiet mode (suppress non-essential output).
|
|
57
|
+
:param filepath: Path to an input file containing a list of files to copy.
|
|
58
|
+
:param folder: A target delta directory path (auto-generated if not provided).
|
|
59
|
+
:param output: Output file path for the delta directory location (stdout if not provided).
|
|
60
|
+
"""
|
|
61
|
+
super().__init__(debug, trace, quiet)
|
|
62
|
+
self.filepath = filepath
|
|
63
|
+
self.folder = folder
|
|
64
|
+
self.output = output
|
|
65
|
+
self.root_dir = root_dir if root_dir else '.'
|
|
66
|
+
|
|
67
|
+
def copy(self, input_file: str = None):
|
|
68
|
+
"""
|
|
69
|
+
Copy files listed in the input file to the delta directory.
|
|
70
|
+
|
|
71
|
+
Reads the input file line by line, where each line contains a file path.
|
|
72
|
+
Creates the delta directory if it doesn't exist, then copies each file
|
|
73
|
+
while preserving its directory structure.
|
|
74
|
+
|
|
75
|
+
:return: Tuple of (status_code, folder_path) where status_code is 0 for success,
|
|
76
|
+
1 for error, and folder_path is the delta directory path
|
|
77
|
+
"""
|
|
78
|
+
input_file = input_file if input_file else self.filepath
|
|
79
|
+
if not input_file:
|
|
80
|
+
self.print_stderr('ERROR: No input file specified')
|
|
81
|
+
return 1, ''
|
|
82
|
+
# Validate that an input file exists
|
|
83
|
+
if not os.path.isfile(input_file):
|
|
84
|
+
self.print_stderr(f'ERROR: Input file {input_file} does not exist or is not a file')
|
|
85
|
+
return 1, ''
|
|
86
|
+
# Load the input file and validate it contains valid file paths
|
|
87
|
+
files = self.load_input_file(input_file)
|
|
88
|
+
if files is None:
|
|
89
|
+
return 1, ''
|
|
90
|
+
# Create delta dir (folder)
|
|
91
|
+
delta_folder = self.create_delta_dir(self.folder, self.root_dir)
|
|
92
|
+
if not delta_folder:
|
|
93
|
+
return 1, ''
|
|
94
|
+
# Print delta folder location to output
|
|
95
|
+
self.print_to_file_or_stdout(delta_folder, self.output)
|
|
96
|
+
# Process each file and copy it to the delta dir
|
|
97
|
+
for source_file in files:
|
|
98
|
+
# Normalise the source path to handle ".." and redundant separators
|
|
99
|
+
normalised_source = os.path.normpath(source_file)
|
|
100
|
+
if '..' in normalised_source:
|
|
101
|
+
self.print_stderr(f'WARNING: Source path escapes root directory for {source_file}. Skipping.')
|
|
102
|
+
continue
|
|
103
|
+
# Resolve to the absolute path for source validation
|
|
104
|
+
abs_source = os.path.abspath(os.path.join(self.root_dir, normalised_source))
|
|
105
|
+
# Check if the source file exists and is a file
|
|
106
|
+
if not os.path.exists(abs_source) or not os.path.isfile(abs_source):
|
|
107
|
+
self.print_stderr(f'WARNING: File {source_file} does not exist or is not a file, skipping')
|
|
108
|
+
continue
|
|
109
|
+
# Use a normalised source for destination to prevent traversal
|
|
110
|
+
dest_path = os.path.normpath(os.path.join(self.root_dir, delta_folder, normalised_source.lstrip(os.sep)))
|
|
111
|
+
# Final safety check: ensure destination is within the delta folder
|
|
112
|
+
abs_dest = os.path.abspath(dest_path)
|
|
113
|
+
abs_folder = os.path.abspath(os.path.join(self.root_dir, delta_folder))
|
|
114
|
+
if not abs_dest.startswith(abs_folder + os.sep):
|
|
115
|
+
self.print_stderr(
|
|
116
|
+
f'WARNING: Destination path ({abs_dest}) escapes delta directory for {source_file}. Skipping.')
|
|
117
|
+
continue
|
|
118
|
+
# Create the destination directory if it doesn't exist and copy the file
|
|
119
|
+
try:
|
|
120
|
+
dest_dir = os.path.dirname(dest_path)
|
|
121
|
+
if dest_dir:
|
|
122
|
+
self.print_trace(f'Creating directory {dest_dir}...')
|
|
123
|
+
os.makedirs(dest_dir, exist_ok=True)
|
|
124
|
+
self.print_debug(f'Copying {source_file} to {dest_path} ...')
|
|
125
|
+
shutil.copy(abs_source, dest_path)
|
|
126
|
+
except (OSError, shutil.Error) as e:
|
|
127
|
+
self.print_stderr(f'ERROR: Failed to copy {source_file} to {dest_path}: {e}')
|
|
128
|
+
return 1, ''
|
|
129
|
+
return 0, delta_folder
|
|
130
|
+
|
|
131
|
+
def create_delta_dir(self, folder: str, root_dir: str = '.') -> str or None:
|
|
132
|
+
"""
|
|
133
|
+
Create the delta directory.
|
|
134
|
+
|
|
135
|
+
If no folder is specified, creates a unique temporary directory with
|
|
136
|
+
a 'delta-' prefix in the current directory. If a folder is specified,
|
|
137
|
+
validates that it doesn't already exist before creating it.
|
|
138
|
+
|
|
139
|
+
:param root_dir: Root directory to create the delta directory in (default: current directory)
|
|
140
|
+
:param folder: Optional target directory
|
|
141
|
+
:return: Path to the delta directory, or None if it already exists or creation fails
|
|
142
|
+
"""
|
|
143
|
+
if folder:
|
|
144
|
+
# Resolve a relative folder under root_dir so checks/creation apply to the right place
|
|
145
|
+
resolved = folder if os.path.isabs(folder) else os.path.join(root_dir, folder)
|
|
146
|
+
resolved = os.path.normpath(resolved)
|
|
147
|
+
# Validate the target directory doesn't already exist and create it
|
|
148
|
+
if os.path.exists(resolved):
|
|
149
|
+
self.print_stderr(f'ERROR: Folder {resolved} already exists.')
|
|
150
|
+
return None
|
|
151
|
+
else:
|
|
152
|
+
try:
|
|
153
|
+
self.print_debug(f'Creating delta directory {resolved}...')
|
|
154
|
+
os.makedirs(resolved)
|
|
155
|
+
except (OSError, IOError) as e:
|
|
156
|
+
self.print_stderr(f'ERROR: Failed to create directory {resolved}: {e}')
|
|
157
|
+
return None
|
|
158
|
+
else:
|
|
159
|
+
# Create a unique temporary directory in the given root directory
|
|
160
|
+
try:
|
|
161
|
+
self.print_debug(f'Creating temporary delta directory in {root_dir} ...')
|
|
162
|
+
folder = tempfile.mkdtemp(prefix="delta-", dir=root_dir)
|
|
163
|
+
if folder:
|
|
164
|
+
folder = os.path.relpath(folder, start=root_dir) # Get the relative path from root_dir
|
|
165
|
+
self.print_debug(f'Created temporary delta directory: {folder}')
|
|
166
|
+
except (OSError, IOError) as e:
|
|
167
|
+
self.print_stderr(f'ERROR: Failed to create temporary directory in {root_dir}: {e}')
|
|
168
|
+
return None
|
|
169
|
+
return folder
|
|
170
|
+
|
|
171
|
+
def load_input_file(self, input_file: str) -> Optional[list[str]]:
|
|
172
|
+
"""
|
|
173
|
+
Loads and parses the input file line by line. Each line in the input
|
|
174
|
+
file represents a source file path, which will be stripped of trailing
|
|
175
|
+
whitespace and appended to the resulting list if it is not empty.
|
|
176
|
+
|
|
177
|
+
:param input_file: The path to the input file to be read.
|
|
178
|
+
:type input_file: String
|
|
179
|
+
:return: A list of source file paths extracted from the input file,
|
|
180
|
+
or None if an error occurs or the file path is invalid.
|
|
181
|
+
:rtype: An array list[str] or None
|
|
182
|
+
"""
|
|
183
|
+
files = []
|
|
184
|
+
if input_file:
|
|
185
|
+
try:
|
|
186
|
+
with open(input_file, 'r', encoding='utf-8') as f:
|
|
187
|
+
for line in f:
|
|
188
|
+
source_file = line.rstrip()
|
|
189
|
+
if source_file:
|
|
190
|
+
# Save the file path without any leading separators
|
|
191
|
+
files.append(source_file.lstrip(os.sep))
|
|
192
|
+
# End of for loop
|
|
193
|
+
except (OSError, IOError) as e:
|
|
194
|
+
self.print_stderr(f'ERROR: Failed to read input file; {input_file}: {e}')
|
|
195
|
+
return None
|
|
196
|
+
self.print_debug(f'Loaded {len(files)} files from input file.')
|
|
197
|
+
return files
|
|
@@ -269,162 +269,6 @@ DEFAULT_SKIPPED_EXT = {
|
|
|
269
269
|
'sqlite3',
|
|
270
270
|
}
|
|
271
271
|
|
|
272
|
-
# TODO: For hfh add the .gitignore patterns
|
|
273
|
-
DEFAULT_SKIPPED_EXT_HFH = {
|
|
274
|
-
'.1',
|
|
275
|
-
'.2',
|
|
276
|
-
'.3',
|
|
277
|
-
'.4',
|
|
278
|
-
'.5',
|
|
279
|
-
'.6',
|
|
280
|
-
'.7',
|
|
281
|
-
'.8',
|
|
282
|
-
'.9',
|
|
283
|
-
'.ac',
|
|
284
|
-
'.adoc',
|
|
285
|
-
'.am',
|
|
286
|
-
'.asciidoc',
|
|
287
|
-
'.bmp',
|
|
288
|
-
'.build',
|
|
289
|
-
'.cfg',
|
|
290
|
-
'.chm',
|
|
291
|
-
'.class',
|
|
292
|
-
'.cmake',
|
|
293
|
-
'.cnf',
|
|
294
|
-
'.conf',
|
|
295
|
-
'.config',
|
|
296
|
-
'.contributors',
|
|
297
|
-
'.copying',
|
|
298
|
-
'.crt',
|
|
299
|
-
'.csproj',
|
|
300
|
-
'.css',
|
|
301
|
-
'.csv',
|
|
302
|
-
'.dat',
|
|
303
|
-
'.data',
|
|
304
|
-
'.dtd',
|
|
305
|
-
'.dts',
|
|
306
|
-
'.iws',
|
|
307
|
-
'.c9',
|
|
308
|
-
'.c9revisions',
|
|
309
|
-
'.dtsi',
|
|
310
|
-
'.dump',
|
|
311
|
-
'.eot',
|
|
312
|
-
'.eps',
|
|
313
|
-
'.geojson',
|
|
314
|
-
'.gif',
|
|
315
|
-
'.glif',
|
|
316
|
-
'.gmo',
|
|
317
|
-
'.guess',
|
|
318
|
-
'.hex',
|
|
319
|
-
'.htm',
|
|
320
|
-
'.html',
|
|
321
|
-
'.ico',
|
|
322
|
-
'.iml',
|
|
323
|
-
'.in',
|
|
324
|
-
'.inc',
|
|
325
|
-
'.info',
|
|
326
|
-
'.ini',
|
|
327
|
-
'.ipynb',
|
|
328
|
-
'.jpeg',
|
|
329
|
-
'.jpg',
|
|
330
|
-
'.json',
|
|
331
|
-
'.jsonld',
|
|
332
|
-
'.lock',
|
|
333
|
-
'.log',
|
|
334
|
-
'.m4',
|
|
335
|
-
'.map',
|
|
336
|
-
'.md5',
|
|
337
|
-
'.meta',
|
|
338
|
-
'.mk',
|
|
339
|
-
'.mxml',
|
|
340
|
-
'.o',
|
|
341
|
-
'.otf',
|
|
342
|
-
'.out',
|
|
343
|
-
'.pbtxt',
|
|
344
|
-
'.pdf',
|
|
345
|
-
'.pem',
|
|
346
|
-
'.phtml',
|
|
347
|
-
'.plist',
|
|
348
|
-
'.png',
|
|
349
|
-
'.prefs',
|
|
350
|
-
'.properties',
|
|
351
|
-
'.pyc',
|
|
352
|
-
'.qdoc',
|
|
353
|
-
'.result',
|
|
354
|
-
'.rgb',
|
|
355
|
-
'.rst',
|
|
356
|
-
'.scss',
|
|
357
|
-
'.sha',
|
|
358
|
-
'.sha1',
|
|
359
|
-
'.sha2',
|
|
360
|
-
'.sha256',
|
|
361
|
-
'.sln',
|
|
362
|
-
'.spec',
|
|
363
|
-
'.sub',
|
|
364
|
-
'.svg',
|
|
365
|
-
'.svn-base',
|
|
366
|
-
'.tab',
|
|
367
|
-
'.template',
|
|
368
|
-
'.test',
|
|
369
|
-
'.tex',
|
|
370
|
-
'.tiff',
|
|
371
|
-
'.ttf',
|
|
372
|
-
'.txt',
|
|
373
|
-
'.utf-8',
|
|
374
|
-
'.vim',
|
|
375
|
-
'.wav',
|
|
376
|
-
'.woff',
|
|
377
|
-
'.woff2',
|
|
378
|
-
'.xht',
|
|
379
|
-
'.xhtml',
|
|
380
|
-
'.xml',
|
|
381
|
-
'.xpm',
|
|
382
|
-
'.xsd',
|
|
383
|
-
'.xul',
|
|
384
|
-
'.yaml',
|
|
385
|
-
'.yml',
|
|
386
|
-
'.wfp',
|
|
387
|
-
'.editorconfig',
|
|
388
|
-
'.dotcover',
|
|
389
|
-
'.pid',
|
|
390
|
-
'.lcov',
|
|
391
|
-
'.egg',
|
|
392
|
-
'.manifest',
|
|
393
|
-
'.cache',
|
|
394
|
-
'.coverage',
|
|
395
|
-
'.cover',
|
|
396
|
-
'.gem',
|
|
397
|
-
'.lst',
|
|
398
|
-
'.pickle',
|
|
399
|
-
'.pdb',
|
|
400
|
-
'.gml',
|
|
401
|
-
'.pot',
|
|
402
|
-
'.plt',
|
|
403
|
-
'.whml',
|
|
404
|
-
'.pom',
|
|
405
|
-
'.smtml',
|
|
406
|
-
'.min.js',
|
|
407
|
-
'.mf',
|
|
408
|
-
'.base64',
|
|
409
|
-
'.s',
|
|
410
|
-
'.diff',
|
|
411
|
-
'.patch',
|
|
412
|
-
'.rules',
|
|
413
|
-
# File endings
|
|
414
|
-
'-doc',
|
|
415
|
-
'config',
|
|
416
|
-
'news',
|
|
417
|
-
'readme',
|
|
418
|
-
'swiftdoc',
|
|
419
|
-
'texidoc',
|
|
420
|
-
'todo',
|
|
421
|
-
'version',
|
|
422
|
-
'ignore',
|
|
423
|
-
'manifest',
|
|
424
|
-
'sqlite',
|
|
425
|
-
'sqlite3',
|
|
426
|
-
}
|
|
427
|
-
|
|
428
272
|
|
|
429
273
|
class FileFilters(ScanossBase):
|
|
430
274
|
"""
|
|
@@ -707,9 +551,8 @@ class FileFilters(ScanossBase):
|
|
|
707
551
|
bool: True if file should be skipped, False otherwise
|
|
708
552
|
"""
|
|
709
553
|
file_name = os.path.basename(file_rel_path)
|
|
710
|
-
|
|
554
|
+
DEFAULT_SKIPPED_EXT_LIST = {} if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
|
|
711
555
|
DEFAULT_SKIPPED_FILES_LIST = DEFAULT_SKIPPED_FILES_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_FILES
|
|
712
|
-
DEFAULT_SKIPPED_EXT_LIST = DEFAULT_SKIPPED_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
|
|
713
556
|
|
|
714
557
|
if not self.hidden_files_folders and file_name.startswith('.'):
|
|
715
558
|
self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')
|
|
@@ -6,6 +6,7 @@ from typing import Dict, List, Literal, Optional
|
|
|
6
6
|
|
|
7
7
|
from progress.bar import Bar
|
|
8
8
|
|
|
9
|
+
from scanoss.constants import DEFAULT_HFH_DEPTH
|
|
9
10
|
from scanoss.file_filters import FileFilters
|
|
10
11
|
from scanoss.scanoss_settings import ScanossSettings
|
|
11
12
|
from scanoss.scanossbase import ScanossBase
|
|
@@ -15,8 +16,6 @@ from scanoss.utils.simhash import WordFeatureSet, fingerprint, simhash, vectoriz
|
|
|
15
16
|
|
|
16
17
|
MINIMUM_FILE_COUNT = 8
|
|
17
18
|
MINIMUM_CONCATENATED_NAME_LENGTH = 32
|
|
18
|
-
MAXIMUM_FILE_NAME_LENGTH = 32
|
|
19
|
-
|
|
20
19
|
|
|
21
20
|
class DirectoryNode:
|
|
22
21
|
"""
|
|
@@ -72,6 +71,12 @@ class FolderHasher:
|
|
|
72
71
|
|
|
73
72
|
It builds a directory tree (DirectoryNode) and computes the associated
|
|
74
73
|
hash data for the folder.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
scan_dir (str): The directory to be hashed.
|
|
77
|
+
config (FolderHasherConfig): Configuration parameters for the folder hasher.
|
|
78
|
+
scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
|
|
79
|
+
depth (int): How many levels to hash from the root directory (default: 1).
|
|
75
80
|
"""
|
|
76
81
|
|
|
77
82
|
def __init__(
|
|
@@ -79,6 +84,7 @@ class FolderHasher:
|
|
|
79
84
|
scan_dir: str,
|
|
80
85
|
config: FolderHasherConfig,
|
|
81
86
|
scanoss_settings: Optional[ScanossSettings] = None,
|
|
87
|
+
depth: int = DEFAULT_HFH_DEPTH,
|
|
82
88
|
):
|
|
83
89
|
self.base = ScanossBase(
|
|
84
90
|
debug=config.debug,
|
|
@@ -101,6 +107,7 @@ class FolderHasher:
|
|
|
101
107
|
|
|
102
108
|
self.scan_dir = scan_dir
|
|
103
109
|
self.tree = None
|
|
110
|
+
self.depth = depth
|
|
104
111
|
|
|
105
112
|
def hash_directory(self, path: str) -> dict:
|
|
106
113
|
"""
|
|
@@ -123,7 +130,10 @@ class FolderHasher:
|
|
|
123
130
|
|
|
124
131
|
return tree
|
|
125
132
|
|
|
126
|
-
def _build_root_node(
|
|
133
|
+
def _build_root_node(
|
|
134
|
+
self,
|
|
135
|
+
path: str,
|
|
136
|
+
) -> DirectoryNode:
|
|
127
137
|
"""
|
|
128
138
|
Build a directory tree from the given path with file information.
|
|
129
139
|
|
|
@@ -140,7 +150,7 @@ class FolderHasher:
|
|
|
140
150
|
root_node = DirectoryNode(str(root))
|
|
141
151
|
|
|
142
152
|
all_files = [
|
|
143
|
-
f for f in root.rglob('*') if f.is_file()
|
|
153
|
+
f for f in root.rglob('*') if f.is_file()
|
|
144
154
|
]
|
|
145
155
|
filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
|
|
146
156
|
|
|
@@ -180,7 +190,7 @@ class FolderHasher:
|
|
|
180
190
|
bar.finish()
|
|
181
191
|
return root_node
|
|
182
192
|
|
|
183
|
-
def _hash_calc_from_node(self, node: DirectoryNode) -> dict:
|
|
193
|
+
def _hash_calc_from_node(self, node: DirectoryNode, current_depth: int = 1) -> dict:
|
|
184
194
|
"""
|
|
185
195
|
Recursively compute folder hash data for a directory node.
|
|
186
196
|
|
|
@@ -189,12 +199,13 @@ class FolderHasher:
|
|
|
189
199
|
|
|
190
200
|
Args:
|
|
191
201
|
node (DirectoryNode): The directory node to compute the hash for.
|
|
202
|
+
current_depth (int): The current depth level (1-based, root is depth 1).
|
|
192
203
|
|
|
193
204
|
Returns:
|
|
194
205
|
dict: The computed hash data for the node.
|
|
195
206
|
"""
|
|
196
207
|
hash_data = self._hash_calc(node)
|
|
197
|
-
|
|
208
|
+
|
|
198
209
|
# Safely calculate relative path
|
|
199
210
|
try:
|
|
200
211
|
node_path = Path(node.path).resolve()
|
|
@@ -204,13 +215,18 @@ class FolderHasher:
|
|
|
204
215
|
# If relative_to fails, use the node path as is or a fallback
|
|
205
216
|
rel_path = Path(node.path).name if node.path else Path('.')
|
|
206
217
|
|
|
218
|
+
# Only process children if we haven't reached the depth limit
|
|
219
|
+
children = []
|
|
220
|
+
if current_depth < self.depth:
|
|
221
|
+
children = [self._hash_calc_from_node(child, current_depth + 1) for child in node.children.values()]
|
|
222
|
+
|
|
207
223
|
return {
|
|
208
224
|
'path_id': str(rel_path),
|
|
209
225
|
'sim_hash_names': f'{hash_data["name_hash"]:02x}' if hash_data['name_hash'] is not None else None,
|
|
210
226
|
'sim_hash_content': f'{hash_data["content_hash"]:02x}' if hash_data['content_hash'] is not None else None,
|
|
211
227
|
'sim_hash_dir_names': f'{hash_data["dir_hash"]:02x}' if hash_data['dir_hash'] is not None else None,
|
|
212
228
|
'lang_extensions': hash_data['lang_extensions'],
|
|
213
|
-
'children':
|
|
229
|
+
'children': children,
|
|
214
230
|
}
|
|
215
231
|
|
|
216
232
|
def _hash_calc(self, node: DirectoryNode) -> dict:
|
|
@@ -237,8 +253,6 @@ class FolderHasher:
|
|
|
237
253
|
|
|
238
254
|
for file in node.files:
|
|
239
255
|
key_str = file.key_str
|
|
240
|
-
if key_str in processed_hashes:
|
|
241
|
-
continue
|
|
242
256
|
|
|
243
257
|
file_name = os.path.basename(file.path)
|
|
244
258
|
|
|
@@ -29,7 +29,12 @@ from typing import Dict, Optional
|
|
|
29
29
|
|
|
30
30
|
from progress.spinner import Spinner
|
|
31
31
|
|
|
32
|
-
from scanoss.constants import
|
|
32
|
+
from scanoss.constants import (
|
|
33
|
+
DEFAULT_HFH_DEPTH,
|
|
34
|
+
DEFAULT_HFH_MIN_ACCEPTED_SCORE,
|
|
35
|
+
DEFAULT_HFH_RANK_THRESHOLD,
|
|
36
|
+
DEFAULT_HFH_RECURSIVE_THRESHOLD,
|
|
37
|
+
)
|
|
33
38
|
from scanoss.cyclonedx import CycloneDx
|
|
34
39
|
from scanoss.file_filters import FileFilters
|
|
35
40
|
from scanoss.scanners.folder_hasher import FolderHasher
|
|
@@ -48,13 +53,16 @@ class ScannerHFH:
|
|
|
48
53
|
and calculates simhash values based on file names and content to detect folder-level similarities.
|
|
49
54
|
"""
|
|
50
55
|
|
|
51
|
-
def __init__(
|
|
56
|
+
def __init__( # noqa: PLR0913
|
|
52
57
|
self,
|
|
53
58
|
scan_dir: str,
|
|
54
59
|
config: ScannerConfig,
|
|
55
60
|
client: Optional[ScanossGrpc] = None,
|
|
56
61
|
scanoss_settings: Optional[ScanossSettings] = None,
|
|
57
62
|
rank_threshold: int = DEFAULT_HFH_RANK_THRESHOLD,
|
|
63
|
+
depth: int = DEFAULT_HFH_DEPTH,
|
|
64
|
+
recursive_threshold: float = DEFAULT_HFH_RECURSIVE_THRESHOLD,
|
|
65
|
+
min_accepted_score: float = DEFAULT_HFH_MIN_ACCEPTED_SCORE,
|
|
58
66
|
):
|
|
59
67
|
"""
|
|
60
68
|
Initialize the ScannerHFH.
|
|
@@ -65,6 +73,9 @@ class ScannerHFH:
|
|
|
65
73
|
client (ScanossGrpc): gRPC client for communicating with the scanning service.
|
|
66
74
|
scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
|
|
67
75
|
rank_threshold (int): Get results with rank below this threshold (default: 5).
|
|
76
|
+
depth (int): How many levels to scan (default: 1).
|
|
77
|
+
recursive_threshold (float): Minimum score threshold to consider a match (default: 0.25).
|
|
78
|
+
min_accepted_score (float): Only show results with a score at or above this threshold (default: 0.15).
|
|
68
79
|
"""
|
|
69
80
|
self.base = ScanossBase(
|
|
70
81
|
debug=config.debug,
|
|
@@ -87,12 +98,15 @@ class ScannerHFH:
|
|
|
87
98
|
scan_dir=scan_dir,
|
|
88
99
|
config=config,
|
|
89
100
|
scanoss_settings=scanoss_settings,
|
|
101
|
+
depth=depth,
|
|
90
102
|
)
|
|
91
103
|
|
|
92
104
|
self.scan_dir = scan_dir
|
|
93
105
|
self.client = client
|
|
94
106
|
self.scan_results = None
|
|
95
107
|
self.rank_threshold = rank_threshold
|
|
108
|
+
self.recursive_threshold = recursive_threshold
|
|
109
|
+
self.min_accepted_score = min_accepted_score
|
|
96
110
|
|
|
97
111
|
def scan(self) -> Optional[Dict]:
|
|
98
112
|
"""
|
|
@@ -102,8 +116,10 @@ class ScannerHFH:
|
|
|
102
116
|
Optional[Dict]: The folder hash response from the gRPC client, or None if an error occurs.
|
|
103
117
|
"""
|
|
104
118
|
hfh_request = {
|
|
105
|
-
'root': self.folder_hasher.hash_directory(self.scan_dir),
|
|
119
|
+
'root': self.folder_hasher.hash_directory(path=self.scan_dir),
|
|
106
120
|
'rank_threshold': self.rank_threshold,
|
|
121
|
+
'recursive_threshold': self.recursive_threshold,
|
|
122
|
+
'min_accepted_score': self.min_accepted_score,
|
|
107
123
|
}
|
|
108
124
|
|
|
109
125
|
spinner = Spinner('Scanning folder...')
|
|
@@ -193,7 +209,7 @@ class ScannerHFHPresenter(AbstractPresenter):
|
|
|
193
209
|
}
|
|
194
210
|
]
|
|
195
211
|
}
|
|
196
|
-
|
|
212
|
+
|
|
197
213
|
get_vulnerabilities_json_request = {
|
|
198
214
|
'purls': [{'purl': purl, 'requirement': best_match_version['version']}],
|
|
199
215
|
}
|
|
@@ -210,10 +226,10 @@ class ScannerHFHPresenter(AbstractPresenter):
|
|
|
210
226
|
error_msg = 'ERROR: Failed to produce CycloneDX output'
|
|
211
227
|
self.base.print_stderr(error_msg)
|
|
212
228
|
return None
|
|
213
|
-
|
|
229
|
+
|
|
214
230
|
if vulnerabilities:
|
|
215
231
|
cdx_output = cdx.append_vulnerabilities(cdx_output, vulnerabilities, purl)
|
|
216
|
-
|
|
232
|
+
|
|
217
233
|
return json.dumps(cdx_output, indent=2)
|
|
218
234
|
except Exception as e:
|
|
219
235
|
self.base.print_stderr(f'ERROR: Failed to get license information: {e}')
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
date: 20251007151647, utime: 1759850207
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py
RENAMED
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py
RENAMED
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py
RENAMED
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py
RENAMED
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scanoss-1.35.0 → scanoss-1.37.0}/src/scanoss/inspection/dependency_track/project_violation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|