scanoss 1.35.0__tar.gz → 1.36.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {scanoss-1.35.0/src/scanoss.egg-info → scanoss-1.36.0}/PKG-INFO +1 -1
  2. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/__init__.py +1 -1
  3. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py +1 -0
  4. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/cli.py +34 -0
  5. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/constants.py +4 -1
  6. scanoss-1.36.0/src/scanoss/data/build_date.txt +1 -0
  7. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/file_filters.py +1 -158
  8. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanners/folder_hasher.py +23 -9
  9. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanners/scanner_hfh.py +22 -6
  10. {scanoss-1.35.0 → scanoss-1.36.0/src/scanoss.egg-info}/PKG-INFO +1 -1
  11. scanoss-1.35.0/src/scanoss/data/build_date.txt +0 -1
  12. {scanoss-1.35.0 → scanoss-1.36.0}/LICENSE +0 -0
  13. {scanoss-1.35.0 → scanoss-1.36.0}/PACKAGE.md +0 -0
  14. {scanoss-1.35.0 → scanoss-1.36.0}/README.md +0 -0
  15. {scanoss-1.35.0 → scanoss-1.36.0}/pyproject.toml +0 -0
  16. {scanoss-1.35.0 → scanoss-1.36.0}/setup.cfg +0 -0
  17. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/__init__.py +0 -0
  18. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/options/__init__.py +0 -0
  19. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/options/annotations_pb2.py +0 -0
  20. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/options/annotations_pb2.pyi +0 -0
  21. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/options/annotations_pb2_grpc.py +0 -0
  22. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/options/openapiv2_pb2.py +0 -0
  23. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/options/openapiv2_pb2.pyi +0 -0
  24. {scanoss-1.35.0 → scanoss-1.36.0}/src/protoc_gen_swagger/options/openapiv2_pb2_grpc.py +0 -0
  25. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/__init__.py +0 -0
  26. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/common/__init__.py +0 -0
  27. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/common/v2/__init__.py +0 -0
  28. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/common/v2/scanoss_common_pb2.py +0 -0
  29. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/components/__init__.py +0 -0
  30. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/components/v2/__init__.py +0 -0
  31. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/components/v2/scanoss_components_pb2.py +0 -0
  32. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py +0 -0
  33. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +0 -0
  34. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +0 -0
  35. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/dependencies/__init__.py +0 -0
  36. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/dependencies/v2/__init__.py +0 -0
  37. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +0 -0
  38. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +0 -0
  39. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/geoprovenance/__init__.py +0 -0
  40. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/geoprovenance/v2/__init__.py +0 -0
  41. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +0 -0
  42. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +0 -0
  43. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/licenses/__init__.py +0 -0
  44. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/licenses/v2/__init__.py +0 -0
  45. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/licenses/v2/scanoss_licenses_pb2.py +0 -0
  46. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +0 -0
  47. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/scanning/__init__.py +0 -0
  48. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/scanning/v2/__init__.py +0 -0
  49. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +0 -0
  50. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +0 -0
  51. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/semgrep/__init__.py +0 -0
  52. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/semgrep/v2/__init__.py +0 -0
  53. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +0 -0
  54. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +0 -0
  55. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/vulnerabilities/__init__.py +0 -0
  56. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/vulnerabilities/v2/__init__.py +0 -0
  57. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +0 -0
  58. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +0 -0
  59. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/components.py +0 -0
  60. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/cryptography.py +0 -0
  61. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/csvoutput.py +0 -0
  62. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/cyclonedx.py +0 -0
  63. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/data/scanoss-settings-schema.json +0 -0
  64. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/data/spdx-exceptions.json +0 -0
  65. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/data/spdx-licenses.json +0 -0
  66. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/export/__init__.py +0 -0
  67. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/export/dependency_track.py +0 -0
  68. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/filecount.py +0 -0
  69. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/__init__.py +0 -0
  70. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/dependency_track/project_violation.py +0 -0
  71. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/policy_check.py +0 -0
  72. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/raw/component_summary.py +0 -0
  73. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/raw/copyleft.py +0 -0
  74. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/raw/license_summary.py +0 -0
  75. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/raw/raw_base.py +0 -0
  76. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/raw/undeclared_component.py +0 -0
  77. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/inspection/utils/license_utils.py +0 -0
  78. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/results.py +0 -0
  79. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scancodedeps.py +0 -0
  80. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanner.py +0 -0
  81. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanners/__init__.py +0 -0
  82. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanners/container_scanner.py +0 -0
  83. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanners/scanner_config.py +0 -0
  84. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanoss_settings.py +0 -0
  85. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanossapi.py +0 -0
  86. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanossbase.py +0 -0
  87. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanossgrpc.py +0 -0
  88. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scanpostprocessor.py +0 -0
  89. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/scantype.py +0 -0
  90. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/services/dependency_track_service.py +0 -0
  91. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/spdxlite.py +0 -0
  92. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/threadeddependencies.py +0 -0
  93. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/threadedscanning.py +0 -0
  94. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/utils/__init__.py +0 -0
  95. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/utils/abstract_presenter.py +0 -0
  96. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/utils/crc64.py +0 -0
  97. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/utils/file.py +0 -0
  98. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/utils/simhash.py +0 -0
  99. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss/winnowing.py +0 -0
  100. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss.egg-info/SOURCES.txt +0 -0
  101. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss.egg-info/dependency_links.txt +0 -0
  102. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss.egg-info/entry_points.txt +0 -0
  103. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss.egg-info/requires.txt +0 -0
  104. {scanoss-1.35.0 → scanoss-1.36.0}/src/scanoss.egg-info/top_level.txt +0 -0
  105. {scanoss-1.35.0 → scanoss-1.36.0}/tests/test_csv_output.py +0 -0
  106. {scanoss-1.35.0 → scanoss-1.36.0}/tests/test_file_filters.py +0 -0
  107. {scanoss-1.35.0 → scanoss-1.36.0}/tests/test_policy_inspect.py +0 -0
  108. {scanoss-1.35.0 → scanoss-1.36.0}/tests/test_scan_post_processor.py +0 -0
  109. {scanoss-1.35.0 → scanoss-1.36.0}/tests/test_spdxlite.py +0 -0
  110. {scanoss-1.35.0 → scanoss-1.36.0}/tests/test_winnowing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.35.0
3
+ Version: 1.36.0
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -22,4 +22,4 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- __version__ = '1.35.0'
25
+ __version__ = '1.36.0'
@@ -3,6 +3,7 @@
3
3
  import warnings
4
4
 
5
5
  import grpc
6
+ import warnings
6
7
 
7
8
  GRPC_GENERATED_VERSION = '1.73.1'
8
9
  GRPC_VERSION = grpc.__version__
@@ -59,7 +59,10 @@ from . import __version__
59
59
  from .components import Components
60
60
  from .constants import (
61
61
  DEFAULT_API_TIMEOUT,
62
+ DEFAULT_HFH_DEPTH,
63
+ DEFAULT_HFH_MIN_ACCEPTED_SCORE,
62
64
  DEFAULT_HFH_RANK_THRESHOLD,
65
+ DEFAULT_HFH_RECURSIVE_THRESHOLD,
63
66
  DEFAULT_POST_SIZE,
64
67
  DEFAULT_RETRY,
65
68
  DEFAULT_TIMEOUT,
@@ -869,6 +872,27 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
869
872
  help='Filter results to only show those with rank value at or below this threshold (e.g., --rank-threshold 3 '
870
873
  'returns results with rank 1, 2, or 3). Lower rank values indicate higher quality matches.',
871
874
  )
875
+ p_folder_scan.add_argument(
876
+ '--depth',
877
+ type=int,
878
+ default=DEFAULT_HFH_DEPTH,
879
+ help=f'Defines how deep to scan the root directory (optional - default {DEFAULT_HFH_DEPTH})',
880
+ )
881
+ p_folder_scan.add_argument(
882
+ '--recursive-threshold',
883
+ type=float,
884
+ default=DEFAULT_HFH_RECURSIVE_THRESHOLD,
885
+ help=f'Minimum score threshold to consider a match (optional - default: {DEFAULT_HFH_RECURSIVE_THRESHOLD})',
886
+ )
887
+ p_folder_scan.add_argument(
888
+ '--min-accepted-score',
889
+ type=float,
890
+ default=DEFAULT_HFH_MIN_ACCEPTED_SCORE,
891
+ help=(
892
+ 'Only show results with a score at or above this threshold '
893
+ f'(optional - default: {DEFAULT_HFH_MIN_ACCEPTED_SCORE})'
894
+ ),
895
+ )
872
896
  p_folder_scan.set_defaults(func=folder_hashing_scan)
873
897
 
874
898
  # Sub-command: folder-hash
@@ -887,6 +911,12 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
887
911
  default='json',
888
912
  help='Result output format (optional - default: json)',
889
913
  )
914
+ p_folder_hash.add_argument(
915
+ '--depth',
916
+ type=int,
917
+ default=DEFAULT_HFH_DEPTH,
918
+ help=f'Defines how deep to hash the root directory (optional - default {DEFAULT_HFH_DEPTH})',
919
+ )
890
920
  p_folder_hash.set_defaults(func=folder_hash)
891
921
 
892
922
  # Output options
@@ -2456,6 +2486,9 @@ def folder_hashing_scan(parser, args):
2456
2486
  client=client,
2457
2487
  scanoss_settings=scanoss_settings,
2458
2488
  rank_threshold=args.rank_threshold,
2489
+ depth=args.depth,
2490
+ recursive_threshold=args.recursive_threshold,
2491
+ min_accepted_score=args.min_accepted_score,
2459
2492
  )
2460
2493
 
2461
2494
  if scanner.scan():
@@ -2489,6 +2522,7 @@ def folder_hash(parser, args):
2489
2522
  scan_dir=args.scan_dir,
2490
2523
  config=folder_hasher_config,
2491
2524
  scanoss_settings=scanoss_settings,
2525
+ depth=args.depth,
2492
2526
  )
2493
2527
 
2494
2528
  folder_hasher.hash_directory(args.scan_dir)
@@ -13,4 +13,7 @@ DEFAULT_URL2 = 'https://api.scanoss.com' # default premium service URL
13
13
 
14
14
  DEFAULT_API_TIMEOUT = 600
15
15
 
16
- DEFAULT_HFH_RANK_THRESHOLD = 5
16
+ DEFAULT_HFH_RANK_THRESHOLD = 5
17
+ DEFAULT_HFH_DEPTH = 1
18
+ DEFAULT_HFH_RECURSIVE_THRESHOLD = 0.8
19
+ DEFAULT_HFH_MIN_ACCEPTED_SCORE = 0.15
@@ -0,0 +1 @@
1
+ date: 20251013130805, utime: 1760360885
@@ -269,162 +269,6 @@ DEFAULT_SKIPPED_EXT = {
269
269
  'sqlite3',
270
270
  }
271
271
 
272
- # TODO: For hfh add the .gitignore patterns
273
- DEFAULT_SKIPPED_EXT_HFH = {
274
- '.1',
275
- '.2',
276
- '.3',
277
- '.4',
278
- '.5',
279
- '.6',
280
- '.7',
281
- '.8',
282
- '.9',
283
- '.ac',
284
- '.adoc',
285
- '.am',
286
- '.asciidoc',
287
- '.bmp',
288
- '.build',
289
- '.cfg',
290
- '.chm',
291
- '.class',
292
- '.cmake',
293
- '.cnf',
294
- '.conf',
295
- '.config',
296
- '.contributors',
297
- '.copying',
298
- '.crt',
299
- '.csproj',
300
- '.css',
301
- '.csv',
302
- '.dat',
303
- '.data',
304
- '.dtd',
305
- '.dts',
306
- '.iws',
307
- '.c9',
308
- '.c9revisions',
309
- '.dtsi',
310
- '.dump',
311
- '.eot',
312
- '.eps',
313
- '.geojson',
314
- '.gif',
315
- '.glif',
316
- '.gmo',
317
- '.guess',
318
- '.hex',
319
- '.htm',
320
- '.html',
321
- '.ico',
322
- '.iml',
323
- '.in',
324
- '.inc',
325
- '.info',
326
- '.ini',
327
- '.ipynb',
328
- '.jpeg',
329
- '.jpg',
330
- '.json',
331
- '.jsonld',
332
- '.lock',
333
- '.log',
334
- '.m4',
335
- '.map',
336
- '.md5',
337
- '.meta',
338
- '.mk',
339
- '.mxml',
340
- '.o',
341
- '.otf',
342
- '.out',
343
- '.pbtxt',
344
- '.pdf',
345
- '.pem',
346
- '.phtml',
347
- '.plist',
348
- '.png',
349
- '.prefs',
350
- '.properties',
351
- '.pyc',
352
- '.qdoc',
353
- '.result',
354
- '.rgb',
355
- '.rst',
356
- '.scss',
357
- '.sha',
358
- '.sha1',
359
- '.sha2',
360
- '.sha256',
361
- '.sln',
362
- '.spec',
363
- '.sub',
364
- '.svg',
365
- '.svn-base',
366
- '.tab',
367
- '.template',
368
- '.test',
369
- '.tex',
370
- '.tiff',
371
- '.ttf',
372
- '.txt',
373
- '.utf-8',
374
- '.vim',
375
- '.wav',
376
- '.woff',
377
- '.woff2',
378
- '.xht',
379
- '.xhtml',
380
- '.xml',
381
- '.xpm',
382
- '.xsd',
383
- '.xul',
384
- '.yaml',
385
- '.yml',
386
- '.wfp',
387
- '.editorconfig',
388
- '.dotcover',
389
- '.pid',
390
- '.lcov',
391
- '.egg',
392
- '.manifest',
393
- '.cache',
394
- '.coverage',
395
- '.cover',
396
- '.gem',
397
- '.lst',
398
- '.pickle',
399
- '.pdb',
400
- '.gml',
401
- '.pot',
402
- '.plt',
403
- '.whml',
404
- '.pom',
405
- '.smtml',
406
- '.min.js',
407
- '.mf',
408
- '.base64',
409
- '.s',
410
- '.diff',
411
- '.patch',
412
- '.rules',
413
- # File endings
414
- '-doc',
415
- 'config',
416
- 'news',
417
- 'readme',
418
- 'swiftdoc',
419
- 'texidoc',
420
- 'todo',
421
- 'version',
422
- 'ignore',
423
- 'manifest',
424
- 'sqlite',
425
- 'sqlite3',
426
- }
427
-
428
272
 
429
273
  class FileFilters(ScanossBase):
430
274
  """
@@ -707,9 +551,8 @@ class FileFilters(ScanossBase):
707
551
  bool: True if file should be skipped, False otherwise
708
552
  """
709
553
  file_name = os.path.basename(file_rel_path)
710
-
554
+ DEFAULT_SKIPPED_EXT_LIST = {} if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
711
555
  DEFAULT_SKIPPED_FILES_LIST = DEFAULT_SKIPPED_FILES_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_FILES
712
- DEFAULT_SKIPPED_EXT_LIST = DEFAULT_SKIPPED_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
713
556
 
714
557
  if not self.hidden_files_folders and file_name.startswith('.'):
715
558
  self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')
@@ -6,6 +6,7 @@ from typing import Dict, List, Literal, Optional
6
6
 
7
7
  from progress.bar import Bar
8
8
 
9
+ from scanoss.constants import DEFAULT_HFH_DEPTH
9
10
  from scanoss.file_filters import FileFilters
10
11
  from scanoss.scanoss_settings import ScanossSettings
11
12
  from scanoss.scanossbase import ScanossBase
@@ -15,8 +16,6 @@ from scanoss.utils.simhash import WordFeatureSet, fingerprint, simhash, vectoriz
15
16
 
16
17
  MINIMUM_FILE_COUNT = 8
17
18
  MINIMUM_CONCATENATED_NAME_LENGTH = 32
18
- MAXIMUM_FILE_NAME_LENGTH = 32
19
-
20
19
 
21
20
  class DirectoryNode:
22
21
  """
@@ -72,6 +71,12 @@ class FolderHasher:
72
71
 
73
72
  It builds a directory tree (DirectoryNode) and computes the associated
74
73
  hash data for the folder.
74
+
75
+ Args:
76
+ scan_dir (str): The directory to be hashed.
77
+ config (FolderHasherConfig): Configuration parameters for the folder hasher.
78
+ scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
79
+ depth (int): How many levels to hash from the root directory (default: 1).
75
80
  """
76
81
 
77
82
  def __init__(
@@ -79,6 +84,7 @@ class FolderHasher:
79
84
  scan_dir: str,
80
85
  config: FolderHasherConfig,
81
86
  scanoss_settings: Optional[ScanossSettings] = None,
87
+ depth: int = DEFAULT_HFH_DEPTH,
82
88
  ):
83
89
  self.base = ScanossBase(
84
90
  debug=config.debug,
@@ -101,6 +107,7 @@ class FolderHasher:
101
107
 
102
108
  self.scan_dir = scan_dir
103
109
  self.tree = None
110
+ self.depth = depth
104
111
 
105
112
  def hash_directory(self, path: str) -> dict:
106
113
  """
@@ -123,7 +130,10 @@ class FolderHasher:
123
130
 
124
131
  return tree
125
132
 
126
- def _build_root_node(self, path: str) -> DirectoryNode:
133
+ def _build_root_node(
134
+ self,
135
+ path: str,
136
+ ) -> DirectoryNode:
127
137
  """
128
138
  Build a directory tree from the given path with file information.
129
139
 
@@ -140,7 +150,7 @@ class FolderHasher:
140
150
  root_node = DirectoryNode(str(root))
141
151
 
142
152
  all_files = [
143
- f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <= MAXIMUM_FILE_NAME_LENGTH
153
+ f for f in root.rglob('*') if f.is_file()
144
154
  ]
145
155
  filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
146
156
 
@@ -180,7 +190,7 @@ class FolderHasher:
180
190
  bar.finish()
181
191
  return root_node
182
192
 
183
- def _hash_calc_from_node(self, node: DirectoryNode) -> dict:
193
+ def _hash_calc_from_node(self, node: DirectoryNode, current_depth: int = 1) -> dict:
184
194
  """
185
195
  Recursively compute folder hash data for a directory node.
186
196
 
@@ -189,12 +199,13 @@ class FolderHasher:
189
199
 
190
200
  Args:
191
201
  node (DirectoryNode): The directory node to compute the hash for.
202
+ current_depth (int): The current depth level (1-based, root is depth 1).
192
203
 
193
204
  Returns:
194
205
  dict: The computed hash data for the node.
195
206
  """
196
207
  hash_data = self._hash_calc(node)
197
-
208
+
198
209
  # Safely calculate relative path
199
210
  try:
200
211
  node_path = Path(node.path).resolve()
@@ -204,13 +215,18 @@ class FolderHasher:
204
215
  # If relative_to fails, use the node path as is or a fallback
205
216
  rel_path = Path(node.path).name if node.path else Path('.')
206
217
 
218
+ # Only process children if we haven't reached the depth limit
219
+ children = []
220
+ if current_depth < self.depth:
221
+ children = [self._hash_calc_from_node(child, current_depth + 1) for child in node.children.values()]
222
+
207
223
  return {
208
224
  'path_id': str(rel_path),
209
225
  'sim_hash_names': f'{hash_data["name_hash"]:02x}' if hash_data['name_hash'] is not None else None,
210
226
  'sim_hash_content': f'{hash_data["content_hash"]:02x}' if hash_data['content_hash'] is not None else None,
211
227
  'sim_hash_dir_names': f'{hash_data["dir_hash"]:02x}' if hash_data['dir_hash'] is not None else None,
212
228
  'lang_extensions': hash_data['lang_extensions'],
213
- 'children': [self._hash_calc_from_node(child) for child in node.children.values()],
229
+ 'children': children,
214
230
  }
215
231
 
216
232
  def _hash_calc(self, node: DirectoryNode) -> dict:
@@ -237,8 +253,6 @@ class FolderHasher:
237
253
 
238
254
  for file in node.files:
239
255
  key_str = file.key_str
240
- if key_str in processed_hashes:
241
- continue
242
256
 
243
257
  file_name = os.path.basename(file.path)
244
258
 
@@ -29,7 +29,12 @@ from typing import Dict, Optional
29
29
 
30
30
  from progress.spinner import Spinner
31
31
 
32
- from scanoss.constants import DEFAULT_HFH_RANK_THRESHOLD
32
+ from scanoss.constants import (
33
+ DEFAULT_HFH_DEPTH,
34
+ DEFAULT_HFH_MIN_ACCEPTED_SCORE,
35
+ DEFAULT_HFH_RANK_THRESHOLD,
36
+ DEFAULT_HFH_RECURSIVE_THRESHOLD,
37
+ )
33
38
  from scanoss.cyclonedx import CycloneDx
34
39
  from scanoss.file_filters import FileFilters
35
40
  from scanoss.scanners.folder_hasher import FolderHasher
@@ -48,13 +53,16 @@ class ScannerHFH:
48
53
  and calculates simhash values based on file names and content to detect folder-level similarities.
49
54
  """
50
55
 
51
- def __init__(
56
+ def __init__( # noqa: PLR0913
52
57
  self,
53
58
  scan_dir: str,
54
59
  config: ScannerConfig,
55
60
  client: Optional[ScanossGrpc] = None,
56
61
  scanoss_settings: Optional[ScanossSettings] = None,
57
62
  rank_threshold: int = DEFAULT_HFH_RANK_THRESHOLD,
63
+ depth: int = DEFAULT_HFH_DEPTH,
64
+ recursive_threshold: float = DEFAULT_HFH_RECURSIVE_THRESHOLD,
65
+ min_accepted_score: float = DEFAULT_HFH_MIN_ACCEPTED_SCORE,
58
66
  ):
59
67
  """
60
68
  Initialize the ScannerHFH.
@@ -65,6 +73,9 @@ class ScannerHFH:
65
73
  client (ScanossGrpc): gRPC client for communicating with the scanning service.
66
74
  scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
67
75
  rank_threshold (int): Get results with rank below this threshold (default: 5).
76
+ depth (int): How many levels to scan (default: 1).
77
+ recursive_threshold (float): Minimum score threshold to consider a match (default: 0.25).
78
+ min_accepted_score (float): Only show results with a score at or above this threshold (default: 0.15).
68
79
  """
69
80
  self.base = ScanossBase(
70
81
  debug=config.debug,
@@ -87,12 +98,15 @@ class ScannerHFH:
87
98
  scan_dir=scan_dir,
88
99
  config=config,
89
100
  scanoss_settings=scanoss_settings,
101
+ depth=depth,
90
102
  )
91
103
 
92
104
  self.scan_dir = scan_dir
93
105
  self.client = client
94
106
  self.scan_results = None
95
107
  self.rank_threshold = rank_threshold
108
+ self.recursive_threshold = recursive_threshold
109
+ self.min_accepted_score = min_accepted_score
96
110
 
97
111
  def scan(self) -> Optional[Dict]:
98
112
  """
@@ -102,8 +116,10 @@ class ScannerHFH:
102
116
  Optional[Dict]: The folder hash response from the gRPC client, or None if an error occurs.
103
117
  """
104
118
  hfh_request = {
105
- 'root': self.folder_hasher.hash_directory(self.scan_dir),
119
+ 'root': self.folder_hasher.hash_directory(path=self.scan_dir),
106
120
  'rank_threshold': self.rank_threshold,
121
+ 'recursive_threshold': self.recursive_threshold,
122
+ 'min_accepted_score': self.min_accepted_score,
107
123
  }
108
124
 
109
125
  spinner = Spinner('Scanning folder...')
@@ -193,7 +209,7 @@ class ScannerHFHPresenter(AbstractPresenter):
193
209
  }
194
210
  ]
195
211
  }
196
-
212
+
197
213
  get_vulnerabilities_json_request = {
198
214
  'purls': [{'purl': purl, 'requirement': best_match_version['version']}],
199
215
  }
@@ -210,10 +226,10 @@ class ScannerHFHPresenter(AbstractPresenter):
210
226
  error_msg = 'ERROR: Failed to produce CycloneDX output'
211
227
  self.base.print_stderr(error_msg)
212
228
  return None
213
-
229
+
214
230
  if vulnerabilities:
215
231
  cdx_output = cdx.append_vulnerabilities(cdx_output, vulnerabilities, purl)
216
-
232
+
217
233
  return json.dumps(cdx_output, indent=2)
218
234
  except Exception as e:
219
235
  self.base.print_stderr(f'ERROR: Failed to get license information: {e}')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.35.0
3
+ Version: 1.36.0
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -1 +0,0 @@
1
- date: 20251007151647, utime: 1759850207
File without changes
File without changes
File without changes
File without changes
File without changes