scanoss 1.35.0__py3-none-any.whl → 1.37.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scanoss/__init__.py CHANGED
@@ -22,4 +22,4 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- __version__ = '1.35.0'
25
+ __version__ = '1.37.0'
@@ -3,6 +3,7 @@
3
3
  import warnings
4
4
 
5
5
  import grpc
6
+ import warnings
6
7
 
7
8
  GRPC_GENERATED_VERSION = '1.73.1'
8
9
  GRPC_VERSION = grpc.__version__
scanoss/cli.py CHANGED
@@ -33,6 +33,7 @@ from typing import List
33
33
  import pypac
34
34
 
35
35
  from scanoss.cryptography import Cryptography, create_cryptography_config_from_args
36
+ from scanoss.delta import Delta
36
37
  from scanoss.export.dependency_track import DependencyTrackExporter
37
38
  from scanoss.inspection.dependency_track.project_violation import (
38
39
  DependencyTrackProjectViolationPolicyCheck,
@@ -59,7 +60,10 @@ from . import __version__
59
60
  from .components import Components
60
61
  from .constants import (
61
62
  DEFAULT_API_TIMEOUT,
63
+ DEFAULT_HFH_DEPTH,
64
+ DEFAULT_HFH_MIN_ACCEPTED_SCORE,
62
65
  DEFAULT_HFH_RANK_THRESHOLD,
66
+ DEFAULT_HFH_RECURSIVE_THRESHOLD,
63
67
  DEFAULT_POST_SIZE,
64
68
  DEFAULT_RETRY,
65
69
  DEFAULT_TIMEOUT,
@@ -869,6 +873,27 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
869
873
  help='Filter results to only show those with rank value at or below this threshold (e.g., --rank-threshold 3 '
870
874
  'returns results with rank 1, 2, or 3). Lower rank values indicate higher quality matches.',
871
875
  )
876
+ p_folder_scan.add_argument(
877
+ '--depth',
878
+ type=int,
879
+ default=DEFAULT_HFH_DEPTH,
880
+ help=f'Defines how deep to scan the root directory (optional - default {DEFAULT_HFH_DEPTH})',
881
+ )
882
+ p_folder_scan.add_argument(
883
+ '--recursive-threshold',
884
+ type=float,
885
+ default=DEFAULT_HFH_RECURSIVE_THRESHOLD,
886
+ help=f'Minimum score threshold to consider a match (optional - default: {DEFAULT_HFH_RECURSIVE_THRESHOLD})',
887
+ )
888
+ p_folder_scan.add_argument(
889
+ '--min-accepted-score',
890
+ type=float,
891
+ default=DEFAULT_HFH_MIN_ACCEPTED_SCORE,
892
+ help=(
893
+ 'Only show results with a score at or above this threshold '
894
+ f'(optional - default: {DEFAULT_HFH_MIN_ACCEPTED_SCORE})'
895
+ ),
896
+ )
872
897
  p_folder_scan.set_defaults(func=folder_hashing_scan)
873
898
 
874
899
  # Sub-command: folder-hash
@@ -887,8 +912,41 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
887
912
  default='json',
888
913
  help='Result output format (optional - default: json)',
889
914
  )
915
+ p_folder_hash.add_argument(
916
+ '--depth',
917
+ type=int,
918
+ default=DEFAULT_HFH_DEPTH,
919
+ help=f'Defines how deep to hash the root directory (optional - default {DEFAULT_HFH_DEPTH})',
920
+ )
890
921
  p_folder_hash.set_defaults(func=folder_hash)
891
922
 
923
+ # Sub-command: delta
924
+ p_delta = subparsers.add_parser(
925
+ 'delta',
926
+ aliases=['dl'],
927
+ description=f'SCANOSS Delta commands: {__version__}',
928
+ help='Delta support commands',
929
+ )
930
+
931
+ delta_sub = p_delta.add_subparsers(
932
+ title='Delta Commands',
933
+ dest='subparsercmd',
934
+ description='Delta sub-commands',
935
+ help='Delta sub-commands'
936
+ )
937
+
938
+ # Delta Sub-command: copy
939
+ p_copy = delta_sub.add_parser(
940
+ 'copy',
941
+ aliases=['cp'],
942
+ description=f'Copy file list into delta dir: {__version__}',
943
+ help='Copy the given list of files into a delta directory',
944
+ )
945
+ p_copy.add_argument('--input', '-i', type=str, required=True, help='Input file with diff list')
946
+ p_copy.add_argument('--folder', '-fd', type=str, help='Delta folder to copy into')
947
+ p_copy.add_argument('--root', '-rd', type=str, help='Root directory to place delta folder')
948
+ p_copy.set_defaults(func=delta_copy)
949
+
892
950
  # Output options
893
951
  for p in [
894
952
  p_scan,
@@ -909,6 +967,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
909
967
  p_crypto_hints,
910
968
  p_crypto_versions_in_range,
911
969
  c_licenses,
970
+ p_copy,
912
971
  ]:
913
972
  p.add_argument('--output', '-o', type=str, help='Output result file name (optional - default stdout).')
914
973
 
@@ -1106,6 +1165,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
1106
1165
  p_crypto_versions_in_range,
1107
1166
  c_licenses,
1108
1167
  e_dt,
1168
+ p_copy
1109
1169
  ]:
1110
1170
  p.add_argument('--debug', '-d', action='store_true', help='Enable debug messages')
1111
1171
  p.add_argument('--trace', '-t', action='store_true', help='Enable trace messages, including API posts')
@@ -1126,7 +1186,8 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
1126
1186
  sys.exit(1)
1127
1187
  elif (
1128
1188
  args.subparser
1129
- in ('utils', 'ut', 'component', 'comp', 'inspect', 'insp', 'ins', 'crypto', 'cr', 'export', 'exp')
1189
+ in ('utils', 'ut', 'component', 'comp', 'inspect', 'insp', 'ins',
1190
+ 'crypto', 'cr', 'export', 'exp', 'delta', 'dl')
1130
1191
  ) and not args.subparsercmd:
1131
1192
  parser.parse_args([args.subparser, '--help']) # Force utils helps to be displayed
1132
1193
  sys.exit(1)
@@ -2456,6 +2517,9 @@ def folder_hashing_scan(parser, args):
2456
2517
  client=client,
2457
2518
  scanoss_settings=scanoss_settings,
2458
2519
  rank_threshold=args.rank_threshold,
2520
+ depth=args.depth,
2521
+ recursive_threshold=args.recursive_threshold,
2522
+ min_accepted_score=args.min_accepted_score,
2459
2523
  )
2460
2524
 
2461
2525
  if scanner.scan():
@@ -2489,6 +2553,7 @@ def folder_hash(parser, args):
2489
2553
  scan_dir=args.scan_dir,
2490
2554
  config=folder_hasher_config,
2491
2555
  scanoss_settings=scanoss_settings,
2556
+ depth=args.depth,
2492
2557
  )
2493
2558
 
2494
2559
  folder_hasher.hash_directory(args.scan_dir)
@@ -2569,6 +2634,43 @@ def initialise_empty_file(filename: str):
2569
2634
  print_stderr(f'Error: Unable to create output file {filename}: {e}')
2570
2635
  sys.exit(1)
2571
2636
 
2637
+ def delta_copy(parser, args):
2638
+ """
2639
+ Handle delta copy command.
2640
+
2641
+ Copies files listed in an input file to a target directory while preserving
2642
+ their directory structure. Creates a unique delta directory if none is specified.
2643
+
2644
+ Parameters
2645
+ ----------
2646
+ parser : ArgumentParser
2647
+ Command line parser object for help display
2648
+ args : Namespace
2649
+ Parsed command line arguments containing:
2650
+ - input: Path to file containing list of files to copy
2651
+ - folder: Optional target directory path
2652
+ - output: Optional output file path
2653
+ """
2654
+ # Validate required input file parameter
2655
+ if args.input is None:
2656
+ print_stderr('ERROR: Input file is required for copying')
2657
+ parser.parse_args([args.subparser, args.subparsercmd, '-h'])
2658
+ sys.exit(1)
2659
+ # Initialise output file if specified
2660
+ if args.output:
2661
+ initialise_empty_file(args.output)
2662
+ try:
2663
+ # Create and configure delta copy command
2664
+ delta = Delta(debug=args.debug, trace=args.trace, quiet=args.quiet, filepath=args.input, folder=args.folder,
2665
+ output=args.output, root_dir=args.root)
2666
+ # Execute copy and exit with appropriate status code
2667
+ status, _ = delta.copy()
2668
+ sys.exit(status)
2669
+ except Exception as e:
2670
+ print_stderr(e)
2671
+ if args.debug:
2672
+ traceback.print_exc()
2673
+ sys.exit(1)
2572
2674
 
2573
2675
  def main():
2574
2676
  """
scanoss/constants.py CHANGED
@@ -13,4 +13,7 @@ DEFAULT_URL2 = 'https://api.scanoss.com' # default premium service URL
13
13
 
14
14
  DEFAULT_API_TIMEOUT = 600
15
15
 
16
- DEFAULT_HFH_RANK_THRESHOLD = 5
16
+ DEFAULT_HFH_RANK_THRESHOLD = 5
17
+ DEFAULT_HFH_DEPTH = 1
18
+ DEFAULT_HFH_RECURSIVE_THRESHOLD = 0.8
19
+ DEFAULT_HFH_MIN_ACCEPTED_SCORE = 0.15
@@ -1 +1 @@
1
- date: 20251007151647, utime: 1759850207
1
+ date: 20251017144212, utime: 1760712132
scanoss/delta.py ADDED
@@ -0,0 +1,197 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+ import os
25
+ import shutil
26
+ import tempfile
27
+ from typing import Optional
28
+
29
+ from .scanossbase import ScanossBase
30
+
31
+
32
+ class Delta(ScanossBase):
33
+ """
34
+ Handle delta scan operations by copying files into a dedicated delta directory.
35
+
36
+ This class manages the creation of delta directories and copying of specified files
37
+ while preserving the directory structure. Files are read from an input file where each
38
+ line contains a file path to copy.
39
+ """
40
+
41
+ def __init__( # noqa: PLR0913
42
+ self,
43
+ debug: bool = False,
44
+ trace: bool = False,
45
+ quiet: bool = False,
46
+ filepath: str = None,
47
+ folder: str = None,
48
+ output: str = None,
49
+ root_dir: str = None,
50
+ ):
51
+ """
52
+ Initialise the Delta instance.
53
+
54
+ :param debug: Enable debug logging.
55
+ :param trace: Enable trace logging.
56
+ :param quiet: Enable quiet mode (suppress non-essential output).
57
+ :param filepath: Path to an input file containing a list of files to copy.
58
+ :param folder: A target delta directory path (auto-generated if not provided).
59
+ :param output: Output file path for the delta directory location (stdout if not provided).
60
+ """
61
+ super().__init__(debug, trace, quiet)
62
+ self.filepath = filepath
63
+ self.folder = folder
64
+ self.output = output
65
+ self.root_dir = root_dir if root_dir else '.'
66
+
67
+ def copy(self, input_file: str = None):
68
+ """
69
+ Copy files listed in the input file to the delta directory.
70
+
71
+ Reads the input file line by line, where each line contains a file path.
72
+ Creates the delta directory if it doesn't exist, then copies each file
73
+ while preserving its directory structure.
74
+
75
+ :return: Tuple of (status_code, folder_path) where status_code is 0 for success,
76
+ 1 for error, and folder_path is the delta directory path
77
+ """
78
+ input_file = input_file if input_file else self.filepath
79
+ if not input_file:
80
+ self.print_stderr('ERROR: No input file specified')
81
+ return 1, ''
82
+ # Validate that an input file exists
83
+ if not os.path.isfile(input_file):
84
+ self.print_stderr(f'ERROR: Input file {input_file} does not exist or is not a file')
85
+ return 1, ''
86
+ # Load the input file and validate it contains valid file paths
87
+ files = self.load_input_file(input_file)
88
+ if files is None:
89
+ return 1, ''
90
+ # Create delta dir (folder)
91
+ delta_folder = self.create_delta_dir(self.folder, self.root_dir)
92
+ if not delta_folder:
93
+ return 1, ''
94
+ # Print delta folder location to output
95
+ self.print_to_file_or_stdout(delta_folder, self.output)
96
+ # Process each file and copy it to the delta dir
97
+ for source_file in files:
98
+ # Normalise the source path to handle ".." and redundant separators
99
+ normalised_source = os.path.normpath(source_file)
100
+ if '..' in normalised_source:
101
+ self.print_stderr(f'WARNING: Source path escapes root directory for {source_file}. Skipping.')
102
+ continue
103
+ # Resolve to the absolute path for source validation
104
+ abs_source = os.path.abspath(os.path.join(self.root_dir, normalised_source))
105
+ # Check if the source file exists and is a file
106
+ if not os.path.exists(abs_source) or not os.path.isfile(abs_source):
107
+ self.print_stderr(f'WARNING: File {source_file} does not exist or is not a file, skipping')
108
+ continue
109
+ # Use a normalised source for destination to prevent traversal
110
+ dest_path = os.path.normpath(os.path.join(self.root_dir, delta_folder, normalised_source.lstrip(os.sep)))
111
+ # Final safety check: ensure destination is within the delta folder
112
+ abs_dest = os.path.abspath(dest_path)
113
+ abs_folder = os.path.abspath(os.path.join(self.root_dir, delta_folder))
114
+ if not abs_dest.startswith(abs_folder + os.sep):
115
+ self.print_stderr(
116
+ f'WARNING: Destination path ({abs_dest}) escapes delta directory for {source_file}. Skipping.')
117
+ continue
118
+ # Create the destination directory if it doesn't exist and copy the file
119
+ try:
120
+ dest_dir = os.path.dirname(dest_path)
121
+ if dest_dir:
122
+ self.print_trace(f'Creating directory {dest_dir}...')
123
+ os.makedirs(dest_dir, exist_ok=True)
124
+ self.print_debug(f'Copying {source_file} to {dest_path} ...')
125
+ shutil.copy(abs_source, dest_path)
126
+ except (OSError, shutil.Error) as e:
127
+ self.print_stderr(f'ERROR: Failed to copy {source_file} to {dest_path}: {e}')
128
+ return 1, ''
129
+ return 0, delta_folder
130
+
131
+ def create_delta_dir(self, folder: str, root_dir: str = '.') -> str or None:
132
+ """
133
+ Create the delta directory.
134
+
135
+ If no folder is specified, creates a unique temporary directory with
136
+ a 'delta-' prefix in the current directory. If a folder is specified,
137
+ validates that it doesn't already exist before creating it.
138
+
139
+ :param root_dir: Root directory to create the delta directory in (default: current directory)
140
+ :param folder: Optional target directory
141
+ :return: Path to the delta directory, or None if it already exists or creation fails
142
+ """
143
+ if folder:
144
+ # Resolve a relative folder under root_dir so checks/creation apply to the right place
145
+ resolved = folder if os.path.isabs(folder) else os.path.join(root_dir, folder)
146
+ resolved = os.path.normpath(resolved)
147
+ # Validate the target directory doesn't already exist and create it
148
+ if os.path.exists(resolved):
149
+ self.print_stderr(f'ERROR: Folder {resolved} already exists.')
150
+ return None
151
+ else:
152
+ try:
153
+ self.print_debug(f'Creating delta directory {resolved}...')
154
+ os.makedirs(resolved)
155
+ except (OSError, IOError) as e:
156
+ self.print_stderr(f'ERROR: Failed to create directory {resolved}: {e}')
157
+ return None
158
+ else:
159
+ # Create a unique temporary directory in the given root directory
160
+ try:
161
+ self.print_debug(f'Creating temporary delta directory in {root_dir} ...')
162
+ folder = tempfile.mkdtemp(prefix="delta-", dir=root_dir)
163
+ if folder:
164
+ folder = os.path.relpath(folder, start=root_dir) # Get the relative path from root_dir
165
+ self.print_debug(f'Created temporary delta directory: {folder}')
166
+ except (OSError, IOError) as e:
167
+ self.print_stderr(f'ERROR: Failed to create temporary directory in {root_dir}: {e}')
168
+ return None
169
+ return folder
170
+
171
+ def load_input_file(self, input_file: str) -> Optional[list[str]]:
172
+ """
173
+ Loads and parses the input file line by line. Each line in the input
174
+ file represents a source file path, which will be stripped of trailing
175
+ whitespace and appended to the resulting list if it is not empty.
176
+
177
+ :param input_file: The path to the input file to be read.
178
+ :type input_file: String
179
+ :return: A list of source file paths extracted from the input file,
180
+ or None if an error occurs or the file path is invalid.
181
+ :rtype: An array list[str] or None
182
+ """
183
+ files = []
184
+ if input_file:
185
+ try:
186
+ with open(input_file, 'r', encoding='utf-8') as f:
187
+ for line in f:
188
+ source_file = line.rstrip()
189
+ if source_file:
190
+ # Save the file path without any leading separators
191
+ files.append(source_file.lstrip(os.sep))
192
+ # End of for loop
193
+ except (OSError, IOError) as e:
194
+ self.print_stderr(f'ERROR: Failed to read input file; {input_file}: {e}')
195
+ return None
196
+ self.print_debug(f'Loaded {len(files)} files from input file.')
197
+ return files
scanoss/file_filters.py CHANGED
@@ -269,162 +269,6 @@ DEFAULT_SKIPPED_EXT = {
269
269
  'sqlite3',
270
270
  }
271
271
 
272
- # TODO: For hfh add the .gitignore patterns
273
- DEFAULT_SKIPPED_EXT_HFH = {
274
- '.1',
275
- '.2',
276
- '.3',
277
- '.4',
278
- '.5',
279
- '.6',
280
- '.7',
281
- '.8',
282
- '.9',
283
- '.ac',
284
- '.adoc',
285
- '.am',
286
- '.asciidoc',
287
- '.bmp',
288
- '.build',
289
- '.cfg',
290
- '.chm',
291
- '.class',
292
- '.cmake',
293
- '.cnf',
294
- '.conf',
295
- '.config',
296
- '.contributors',
297
- '.copying',
298
- '.crt',
299
- '.csproj',
300
- '.css',
301
- '.csv',
302
- '.dat',
303
- '.data',
304
- '.dtd',
305
- '.dts',
306
- '.iws',
307
- '.c9',
308
- '.c9revisions',
309
- '.dtsi',
310
- '.dump',
311
- '.eot',
312
- '.eps',
313
- '.geojson',
314
- '.gif',
315
- '.glif',
316
- '.gmo',
317
- '.guess',
318
- '.hex',
319
- '.htm',
320
- '.html',
321
- '.ico',
322
- '.iml',
323
- '.in',
324
- '.inc',
325
- '.info',
326
- '.ini',
327
- '.ipynb',
328
- '.jpeg',
329
- '.jpg',
330
- '.json',
331
- '.jsonld',
332
- '.lock',
333
- '.log',
334
- '.m4',
335
- '.map',
336
- '.md5',
337
- '.meta',
338
- '.mk',
339
- '.mxml',
340
- '.o',
341
- '.otf',
342
- '.out',
343
- '.pbtxt',
344
- '.pdf',
345
- '.pem',
346
- '.phtml',
347
- '.plist',
348
- '.png',
349
- '.prefs',
350
- '.properties',
351
- '.pyc',
352
- '.qdoc',
353
- '.result',
354
- '.rgb',
355
- '.rst',
356
- '.scss',
357
- '.sha',
358
- '.sha1',
359
- '.sha2',
360
- '.sha256',
361
- '.sln',
362
- '.spec',
363
- '.sub',
364
- '.svg',
365
- '.svn-base',
366
- '.tab',
367
- '.template',
368
- '.test',
369
- '.tex',
370
- '.tiff',
371
- '.ttf',
372
- '.txt',
373
- '.utf-8',
374
- '.vim',
375
- '.wav',
376
- '.woff',
377
- '.woff2',
378
- '.xht',
379
- '.xhtml',
380
- '.xml',
381
- '.xpm',
382
- '.xsd',
383
- '.xul',
384
- '.yaml',
385
- '.yml',
386
- '.wfp',
387
- '.editorconfig',
388
- '.dotcover',
389
- '.pid',
390
- '.lcov',
391
- '.egg',
392
- '.manifest',
393
- '.cache',
394
- '.coverage',
395
- '.cover',
396
- '.gem',
397
- '.lst',
398
- '.pickle',
399
- '.pdb',
400
- '.gml',
401
- '.pot',
402
- '.plt',
403
- '.whml',
404
- '.pom',
405
- '.smtml',
406
- '.min.js',
407
- '.mf',
408
- '.base64',
409
- '.s',
410
- '.diff',
411
- '.patch',
412
- '.rules',
413
- # File endings
414
- '-doc',
415
- 'config',
416
- 'news',
417
- 'readme',
418
- 'swiftdoc',
419
- 'texidoc',
420
- 'todo',
421
- 'version',
422
- 'ignore',
423
- 'manifest',
424
- 'sqlite',
425
- 'sqlite3',
426
- }
427
-
428
272
 
429
273
  class FileFilters(ScanossBase):
430
274
  """
@@ -707,9 +551,8 @@ class FileFilters(ScanossBase):
707
551
  bool: True if file should be skipped, False otherwise
708
552
  """
709
553
  file_name = os.path.basename(file_rel_path)
710
-
554
+ DEFAULT_SKIPPED_EXT_LIST = {} if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
711
555
  DEFAULT_SKIPPED_FILES_LIST = DEFAULT_SKIPPED_FILES_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_FILES
712
- DEFAULT_SKIPPED_EXT_LIST = DEFAULT_SKIPPED_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
713
556
 
714
557
  if not self.hidden_files_folders and file_name.startswith('.'):
715
558
  self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')
@@ -6,6 +6,7 @@ from typing import Dict, List, Literal, Optional
6
6
 
7
7
  from progress.bar import Bar
8
8
 
9
+ from scanoss.constants import DEFAULT_HFH_DEPTH
9
10
  from scanoss.file_filters import FileFilters
10
11
  from scanoss.scanoss_settings import ScanossSettings
11
12
  from scanoss.scanossbase import ScanossBase
@@ -15,8 +16,6 @@ from scanoss.utils.simhash import WordFeatureSet, fingerprint, simhash, vectoriz
15
16
 
16
17
  MINIMUM_FILE_COUNT = 8
17
18
  MINIMUM_CONCATENATED_NAME_LENGTH = 32
18
- MAXIMUM_FILE_NAME_LENGTH = 32
19
-
20
19
 
21
20
  class DirectoryNode:
22
21
  """
@@ -72,6 +71,12 @@ class FolderHasher:
72
71
 
73
72
  It builds a directory tree (DirectoryNode) and computes the associated
74
73
  hash data for the folder.
74
+
75
+ Args:
76
+ scan_dir (str): The directory to be hashed.
77
+ config (FolderHasherConfig): Configuration parameters for the folder hasher.
78
+ scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
79
+ depth (int): How many levels to hash from the root directory (default: 1).
75
80
  """
76
81
 
77
82
  def __init__(
@@ -79,6 +84,7 @@ class FolderHasher:
79
84
  scan_dir: str,
80
85
  config: FolderHasherConfig,
81
86
  scanoss_settings: Optional[ScanossSettings] = None,
87
+ depth: int = DEFAULT_HFH_DEPTH,
82
88
  ):
83
89
  self.base = ScanossBase(
84
90
  debug=config.debug,
@@ -101,6 +107,7 @@ class FolderHasher:
101
107
 
102
108
  self.scan_dir = scan_dir
103
109
  self.tree = None
110
+ self.depth = depth
104
111
 
105
112
  def hash_directory(self, path: str) -> dict:
106
113
  """
@@ -123,7 +130,10 @@ class FolderHasher:
123
130
 
124
131
  return tree
125
132
 
126
- def _build_root_node(self, path: str) -> DirectoryNode:
133
+ def _build_root_node(
134
+ self,
135
+ path: str,
136
+ ) -> DirectoryNode:
127
137
  """
128
138
  Build a directory tree from the given path with file information.
129
139
 
@@ -140,7 +150,7 @@ class FolderHasher:
140
150
  root_node = DirectoryNode(str(root))
141
151
 
142
152
  all_files = [
143
- f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <= MAXIMUM_FILE_NAME_LENGTH
153
+ f for f in root.rglob('*') if f.is_file()
144
154
  ]
145
155
  filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
146
156
 
@@ -180,7 +190,7 @@ class FolderHasher:
180
190
  bar.finish()
181
191
  return root_node
182
192
 
183
- def _hash_calc_from_node(self, node: DirectoryNode) -> dict:
193
+ def _hash_calc_from_node(self, node: DirectoryNode, current_depth: int = 1) -> dict:
184
194
  """
185
195
  Recursively compute folder hash data for a directory node.
186
196
 
@@ -189,12 +199,13 @@ class FolderHasher:
189
199
 
190
200
  Args:
191
201
  node (DirectoryNode): The directory node to compute the hash for.
202
+ current_depth (int): The current depth level (1-based, root is depth 1).
192
203
 
193
204
  Returns:
194
205
  dict: The computed hash data for the node.
195
206
  """
196
207
  hash_data = self._hash_calc(node)
197
-
208
+
198
209
  # Safely calculate relative path
199
210
  try:
200
211
  node_path = Path(node.path).resolve()
@@ -204,13 +215,18 @@ class FolderHasher:
204
215
  # If relative_to fails, use the node path as is or a fallback
205
216
  rel_path = Path(node.path).name if node.path else Path('.')
206
217
 
218
+ # Only process children if we haven't reached the depth limit
219
+ children = []
220
+ if current_depth < self.depth:
221
+ children = [self._hash_calc_from_node(child, current_depth + 1) for child in node.children.values()]
222
+
207
223
  return {
208
224
  'path_id': str(rel_path),
209
225
  'sim_hash_names': f'{hash_data["name_hash"]:02x}' if hash_data['name_hash'] is not None else None,
210
226
  'sim_hash_content': f'{hash_data["content_hash"]:02x}' if hash_data['content_hash'] is not None else None,
211
227
  'sim_hash_dir_names': f'{hash_data["dir_hash"]:02x}' if hash_data['dir_hash'] is not None else None,
212
228
  'lang_extensions': hash_data['lang_extensions'],
213
- 'children': [self._hash_calc_from_node(child) for child in node.children.values()],
229
+ 'children': children,
214
230
  }
215
231
 
216
232
  def _hash_calc(self, node: DirectoryNode) -> dict:
@@ -237,8 +253,6 @@ class FolderHasher:
237
253
 
238
254
  for file in node.files:
239
255
  key_str = file.key_str
240
- if key_str in processed_hashes:
241
- continue
242
256
 
243
257
  file_name = os.path.basename(file.path)
244
258
 
@@ -29,7 +29,12 @@ from typing import Dict, Optional
29
29
 
30
30
  from progress.spinner import Spinner
31
31
 
32
- from scanoss.constants import DEFAULT_HFH_RANK_THRESHOLD
32
+ from scanoss.constants import (
33
+ DEFAULT_HFH_DEPTH,
34
+ DEFAULT_HFH_MIN_ACCEPTED_SCORE,
35
+ DEFAULT_HFH_RANK_THRESHOLD,
36
+ DEFAULT_HFH_RECURSIVE_THRESHOLD,
37
+ )
33
38
  from scanoss.cyclonedx import CycloneDx
34
39
  from scanoss.file_filters import FileFilters
35
40
  from scanoss.scanners.folder_hasher import FolderHasher
@@ -48,13 +53,16 @@ class ScannerHFH:
48
53
  and calculates simhash values based on file names and content to detect folder-level similarities.
49
54
  """
50
55
 
51
- def __init__(
56
+ def __init__( # noqa: PLR0913
52
57
  self,
53
58
  scan_dir: str,
54
59
  config: ScannerConfig,
55
60
  client: Optional[ScanossGrpc] = None,
56
61
  scanoss_settings: Optional[ScanossSettings] = None,
57
62
  rank_threshold: int = DEFAULT_HFH_RANK_THRESHOLD,
63
+ depth: int = DEFAULT_HFH_DEPTH,
64
+ recursive_threshold: float = DEFAULT_HFH_RECURSIVE_THRESHOLD,
65
+ min_accepted_score: float = DEFAULT_HFH_MIN_ACCEPTED_SCORE,
58
66
  ):
59
67
  """
60
68
  Initialize the ScannerHFH.
@@ -65,6 +73,9 @@ class ScannerHFH:
65
73
  client (ScanossGrpc): gRPC client for communicating with the scanning service.
66
74
  scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
67
75
  rank_threshold (int): Get results with rank below this threshold (default: 5).
76
+ depth (int): How many levels to scan (default: 1).
77
+ recursive_threshold (float): Minimum score threshold to consider a match (default: 0.25).
78
+ min_accepted_score (float): Only show results with a score at or above this threshold (default: 0.15).
68
79
  """
69
80
  self.base = ScanossBase(
70
81
  debug=config.debug,
@@ -87,12 +98,15 @@ class ScannerHFH:
87
98
  scan_dir=scan_dir,
88
99
  config=config,
89
100
  scanoss_settings=scanoss_settings,
101
+ depth=depth,
90
102
  )
91
103
 
92
104
  self.scan_dir = scan_dir
93
105
  self.client = client
94
106
  self.scan_results = None
95
107
  self.rank_threshold = rank_threshold
108
+ self.recursive_threshold = recursive_threshold
109
+ self.min_accepted_score = min_accepted_score
96
110
 
97
111
  def scan(self) -> Optional[Dict]:
98
112
  """
@@ -102,8 +116,10 @@ class ScannerHFH:
102
116
  Optional[Dict]: The folder hash response from the gRPC client, or None if an error occurs.
103
117
  """
104
118
  hfh_request = {
105
- 'root': self.folder_hasher.hash_directory(self.scan_dir),
119
+ 'root': self.folder_hasher.hash_directory(path=self.scan_dir),
106
120
  'rank_threshold': self.rank_threshold,
121
+ 'recursive_threshold': self.recursive_threshold,
122
+ 'min_accepted_score': self.min_accepted_score,
107
123
  }
108
124
 
109
125
  spinner = Spinner('Scanning folder...')
@@ -193,7 +209,7 @@ class ScannerHFHPresenter(AbstractPresenter):
193
209
  }
194
210
  ]
195
211
  }
196
-
212
+
197
213
  get_vulnerabilities_json_request = {
198
214
  'purls': [{'purl': purl, 'requirement': best_match_version['version']}],
199
215
  }
@@ -210,10 +226,10 @@ class ScannerHFHPresenter(AbstractPresenter):
210
226
  error_msg = 'ERROR: Failed to produce CycloneDX output'
211
227
  self.base.print_stderr(error_msg)
212
228
  return None
213
-
229
+
214
230
  if vulnerabilities:
215
231
  cdx_output = cdx.append_vulnerabilities(cdx_output, vulnerabilities, purl)
216
-
232
+
217
233
  return json.dumps(cdx_output, indent=2)
218
234
  except Exception as e:
219
235
  self.base.print_stderr(f'ERROR: Failed to get license information: {e}')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.35.0
3
+ Version: 1.37.0
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -6,14 +6,15 @@ protoc_gen_swagger/options/annotations_pb2_grpc.py,sha256=KZOW9Ciio-f9iL42FuLFnS
6
6
  protoc_gen_swagger/options/openapiv2_pb2.py,sha256=w0xDs63uyrWGgzRaQZXfJpfI7Jpyvh-i9ay_uzOR-aM,16475
7
7
  protoc_gen_swagger/options/openapiv2_pb2.pyi,sha256=hYOV6uQ2yqhP89042_V3GuAsvoBBiXf5CGuYmnFnfv4,54665
8
8
  protoc_gen_swagger/options/openapiv2_pb2_grpc.py,sha256=sje9Nh3yE7CHCUWZwtjTgwsKB4GvyGz5vOrGTnRXJfc,917
9
- scanoss/__init__.py,sha256=b9jbvOsn1fEt-O0NxNrIhAXQFnJvzF8RfjiZMpxzxo4,1146
10
- scanoss/cli.py,sha256=TXqwh5afwHN-e7OIncAoZbQZ5tbFCm6WQQxIA7NCDvs,93721
9
+ scanoss/__init__.py,sha256=XOnhkCmqwWWkii_xE2t16Jl3weqDcvzxje1mYGvZL1Q,1146
10
+ scanoss/cli.py,sha256=AV_tmWeCH_TxhKDOY3PR1zbLbDWt1yWa8CHpp-GABsY,97436
11
11
  scanoss/components.py,sha256=NFyt_w3aoMotr_ZaFU-ng00_89sruc0kgY7ERnJXkmM,15891
12
- scanoss/constants.py,sha256=On8mQ-8ardVMHSJ7WOJqeTvGXIOWPLCgUanjE7Wk-wE,351
12
+ scanoss/constants.py,sha256=GHLTaLNVxXdTXRj7ngRK4u4S653pHzM8qFy4JFLa0wQ,450
13
13
  scanoss/cryptography.py,sha256=lOoD_dW16ARQxYiYyb5R8S7gx0FqWIsnGkKfsB0nGaU,10627
14
14
  scanoss/csvoutput.py,sha256=3wdXPeIqZG84bCtXFh8fMZO3XodekeSx6RZXoOhZMFc,10551
15
15
  scanoss/cyclonedx.py,sha256=y5fI2E-95vv2iZeCCsXtzSdJJUK_piHC1THsbfbXEpA,18151
16
- scanoss/file_filters.py,sha256=VxfEBylliXReD07YczsHL0coiI3bdNPbfiLJt7GwPWs,20589
16
+ scanoss/delta.py,sha256=slmgnD7SsUOmfSE2zb0zdRAGo-JcjPJAtxyzuCSzO3I,9455
17
+ scanoss/file_filters.py,sha256=QcLqunaBKQIafjNZ9_Snh9quBX5_-fsTusVmxwjC1q8,18511
17
18
  scanoss/filecount.py,sha256=RZjKQ6M5P_RQg0_PMD2tsRe5Z8f98ke0sxYVjPDN8iQ,6538
18
19
  scanoss/results.py,sha256=47ZXXuU2sDjYa5vhtbWTmikit9jHhA0rsYKwkvZFI5w,9252
19
20
  scanoss/scancodedeps.py,sha256=JbpoGW1POtPMmowzfwa4oh8sSBeeQCqaW9onvc4UFYM,11517
@@ -32,7 +33,7 @@ scanoss/api/__init__.py,sha256=hx-P78xbDsh6WQIigewkJ7Y7y1fqc_eYnyHC5IZTKmo,1122
32
33
  scanoss/api/common/__init__.py,sha256=hx-P78xbDsh6WQIigewkJ7Y7y1fqc_eYnyHC5IZTKmo,1122
33
34
  scanoss/api/common/v2/__init__.py,sha256=hx-P78xbDsh6WQIigewkJ7Y7y1fqc_eYnyHC5IZTKmo,1122
34
35
  scanoss/api/common/v2/scanoss_common_pb2.py,sha256=uF1xIFu9o_srr8fZWkIOtdInbDk-duesJKyFKPznTRU,4646
35
- scanoss/api/common/v2/scanoss_common_pb2_grpc.py,sha256=4ZqtBiDbGyHItHUkkVba517K-n24nAEgRs3dt3cfQsw,1015
36
+ scanoss/api/common/v2/scanoss_common_pb2_grpc.py,sha256=YMOEV6H5rFsx05X1ZfTh7rbOl0ThP6_F5QIal2hJkhI,1031
36
37
  scanoss/api/components/__init__.py,sha256=hx-P78xbDsh6WQIigewkJ7Y7y1fqc_eYnyHC5IZTKmo,1122
37
38
  scanoss/api/components/v2/__init__.py,sha256=hx-P78xbDsh6WQIigewkJ7Y7y1fqc_eYnyHC5IZTKmo,1122
38
39
  scanoss/api/components/v2/scanoss_components_pb2.py,sha256=godS4LyRzJCpUkGgOK2KTdmNs8RM-7CvAL5iHTiouNI,12775
@@ -63,7 +64,7 @@ scanoss/api/vulnerabilities/__init__.py,sha256=IFrDk_DTJgKSZmmU-nuLXuq_s8sQZlrSC
63
64
  scanoss/api/vulnerabilities/v2/__init__.py,sha256=IFrDk_DTJgKSZmmU-nuLXuq_s8sQZlrSCHhIDMJT4r0,1122
64
65
  scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py,sha256=pmm0MSiXkdf8e4rCIIDRcsNRixR2vGvD1Xak4l-wdwI,16550
65
66
  scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py,sha256=BNxT5kUKQ-mgtOt5QYBM1Qrg5LNDqSpWKpfEZquIlsM,19127
66
- scanoss/data/build_date.txt,sha256=bfD4whIFHTFl82pMAxKj6p8gD2hRI5ULRF3642cQp_Q,40
67
+ scanoss/data/build_date.txt,sha256=n54sthXqFE_uitQWwA2OBIhvntuAusvCrN83jYV6dbY,40
67
68
  scanoss/data/scanoss-settings-schema.json,sha256=ClkRYAkjAN0Sk704G8BE_Ok006oQ6YnIGmX84CF8h9w,8798
68
69
  scanoss/data/spdx-exceptions.json,sha256=s7UTYxC7jqQXr11YBlIWYCNwN6lRDFTR33Y8rpN_dA4,17953
69
70
  scanoss/data/spdx-licenses.json,sha256=A6Z0q82gaTLtnopBfzeIVZjJFxkdRW1g2TuumQc-lII,228794
@@ -80,18 +81,18 @@ scanoss/inspection/raw/undeclared_component.py,sha256=uN-oVqQF8vWArTc2yDVoxudV0b
80
81
  scanoss/inspection/utils/license_utils.py,sha256=Zb6QLmVJb86lKCwZyBsmwakyAtY1SXa54kUyyKmWMqA,5093
81
82
  scanoss/scanners/__init__.py,sha256=D4C0lWLuNp8k_BjQZEc07WZcUgAvriVwQWOk063b0ZU,1122
82
83
  scanoss/scanners/container_scanner.py,sha256=fOrb64owrstX7LnTuxiIan059YgLeKXeBS6g2QaCyq0,16346
83
- scanoss/scanners/folder_hasher.py,sha256=-qvTtMC0iPj7zS8nMSZZJyt9d62MeQIK0LcrNDkt7yc,12267
84
+ scanoss/scanners/folder_hasher.py,sha256=tOlo8YXanC3sQ77oHvHcsWiu8BW2pCfTyWC07AmzHIQ,12845
84
85
  scanoss/scanners/scanner_config.py,sha256=egG7cw3S2akU-D9M1aLE5jLrfz_c8e7_DIotMnnpM84,2601
85
- scanoss/scanners/scanner_hfh.py,sha256=OvayCIq_a5iJwv7H7OCdB9K0vI9oxAz9UvgGfg7xrLU,8392
86
+ scanoss/scanners/scanner_hfh.py,sha256=M2PB4wDTi4LD1DwuAVfWiqQkjOImSpNok7vgo5H_Spg,9190
86
87
  scanoss/services/dependency_track_service.py,sha256=JIpqev4I-x_ZajMxD5W2Y3OAUvEJ_4nstzAPV90vfP8,5070
87
88
  scanoss/utils/__init__.py,sha256=0hjb5ktavp7utJzFhGMPImPaZiHWgilM2HwvTp5lXJE,1122
88
89
  scanoss/utils/abstract_presenter.py,sha256=teiDTxBj5jBMCk2T8i4l1BJPf_u4zBLWrtCTFHSSECM,3148
89
90
  scanoss/utils/crc64.py,sha256=TMrwQimSdE6imhFOUL7oAG6Kxu-8qMpGWMuMg8QpSVs,3169
90
91
  scanoss/utils/file.py,sha256=62cA9a17TU9ZvfA3FY5HY4-QOajJeSrc8S6xLA_f-3M,2980
91
92
  scanoss/utils/simhash.py,sha256=6iu8DOcecPAY36SZjCOzrrLMT9oIE7-gI6QuYwUQ7B0,5793
92
- scanoss-1.35.0.dist-info/licenses/LICENSE,sha256=LLUaXoiyOroIbr5ubAyrxBOwSRLTm35ETO2FmLpy8QQ,1074
93
- scanoss-1.35.0.dist-info/METADATA,sha256=-AO7LRmiZDfAUhDHart9bCFgX6i3ahHH1eH7TaQdbk8,6181
94
- scanoss-1.35.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
95
- scanoss-1.35.0.dist-info/entry_points.txt,sha256=Uy28xnaDL5KQ7V77sZD5VLDXPNxYYzSr5tsqtiXVzAs,48
96
- scanoss-1.35.0.dist-info/top_level.txt,sha256=V11PrQ6Pnrc-nDF9xnisnJ8e6-i7HqSIKVNqduRWcL8,27
97
- scanoss-1.35.0.dist-info/RECORD,,
93
+ scanoss-1.37.0.dist-info/licenses/LICENSE,sha256=LLUaXoiyOroIbr5ubAyrxBOwSRLTm35ETO2FmLpy8QQ,1074
94
+ scanoss-1.37.0.dist-info/METADATA,sha256=JPgOoHE1lzvW6ejzxv0CoMMxmo-Ku4zw3qSyrg2aLrM,6181
95
+ scanoss-1.37.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
96
+ scanoss-1.37.0.dist-info/entry_points.txt,sha256=Uy28xnaDL5KQ7V77sZD5VLDXPNxYYzSr5tsqtiXVzAs,48
97
+ scanoss-1.37.0.dist-info/top_level.txt,sha256=V11PrQ6Pnrc-nDF9xnisnJ8e6-i7HqSIKVNqduRWcL8,27
98
+ scanoss-1.37.0.dist-info/RECORD,,