scanoss 1.40.1__py3-none-any.whl → 1.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scanoss/osadl.py ADDED
@@ -0,0 +1,125 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import json
26
+ import sys
27
+
28
+ import importlib_resources
29
+
30
+ from scanoss.scanossbase import ScanossBase
31
+
32
+
33
+ class Osadl(ScanossBase):
34
+ """
35
+ OSADL data accessor class.
36
+
37
+ Provides access to OSADL (Open Source Automation Development Lab) authoritative
38
+ checklist data for license analysis.
39
+
40
+ Data is loaded once at class level and shared across all instances for efficiency.
41
+
42
+ Data source: https://www.osadl.org/fileadmin/checklists/copyleft.json
43
+ License: CC-BY-4.0
44
+ """
45
+
46
+ _shared_copyleft_data = {}
47
+ _data_loaded = False
48
+
49
+ def __init__(self, debug: bool = False, trace: bool = True, quiet: bool = False):
50
+ """
51
+ Initialize the Osadl class.
52
+ Data is loaded once at class level and shared across all instances.
53
+ """
54
+ super().__init__(debug, trace, quiet)
55
+ self._load_copyleft_data()
56
+
57
+
58
+ def _load_copyleft_data(self) -> bool:
59
+ """
60
+ Load the embedded OSADL copyleft JSON file into class-level shared data.
61
+ Data is loaded only once and shared across all instances.
62
+
63
+ :return: True if successful, False otherwise
64
+ """
65
+ if Osadl._data_loaded:
66
+ return True
67
+
68
+ # OSADL copyleft license checklist from: https://www.osadl.org/Checklists
69
+ # Data source: https://www.osadl.org/fileadmin/checklists/copyleft.json
70
+ # License: CC-BY-4.0 (Creative Commons Attribution 4.0 International)
71
+ # Copyright: (C) 2017 - 2024 Open Source Automation Development Lab (OSADL) eG
72
+ try:
73
+ f_name = importlib_resources.files(__name__) / 'data/osadl-copyleft.json'
74
+ with importlib_resources.as_file(f_name) as f:
75
+ with open(f, 'r', encoding='utf-8') as file:
76
+ data = json.load(file)
77
+ except Exception as e:
78
+ self.print_stderr(f'ERROR: Problem loading OSADL copyleft data: {e}')
79
+ return False
80
+
81
+ # Process copyleft data
82
+ copyleft = data.get('copyleft', {})
83
+ if not copyleft:
84
+ self.print_stderr('ERROR: No copyleft data found in OSADL JSON')
85
+ return False
86
+
87
+ # Store in class-level shared dictionary
88
+ for lic_id, status in copyleft.items():
89
+ # Normalize license ID (lowercase) for consistent lookup
90
+ lic_id_lc = lic_id.lower()
91
+ Osadl._shared_copyleft_data[lic_id_lc] = status
92
+
93
+ Osadl._data_loaded = True
94
+ self.print_debug(f'Loaded {len(Osadl._shared_copyleft_data)} OSADL copyleft entries')
95
+ return True
96
+
97
+ def is_copyleft(self, spdx_id: str) -> bool:
98
+ """
99
+ Check if a license is copyleft according to OSADL data.
100
+
101
+ Returns True for both strong copyleft ("Yes") and weak/restricted copyleft ("Yes (restricted)").
102
+
103
+ :param spdx_id: SPDX license identifier
104
+ :return: True if copyleft, False otherwise
105
+ """
106
+ if not spdx_id:
107
+ self.print_debug('No license ID provided for copyleft check')
108
+ return False
109
+
110
+ # Normalize lookup
111
+ spdx_id_lc = spdx_id.lower()
112
+ # Use class-level shared data
113
+ status = Osadl._shared_copyleft_data.get(spdx_id_lc)
114
+
115
+ if not status:
116
+ self.print_debug(f'No OSADL copyleft data for license: {spdx_id}')
117
+ return False
118
+
119
+ # Consider both "Yes" and "Yes (restricted)" as copyleft (case-insensitive)
120
+ return status.lower().startswith('yes')
121
+
122
+
123
+ #
124
+ # End of Osadl Class
125
+ #
scanoss/scanner.py CHANGED
@@ -26,6 +26,7 @@ import datetime
26
26
  import json
27
27
  import os
28
28
  import sys
29
+ from contextlib import nullcontext
29
30
  from pathlib import Path
30
31
  from typing import Any, Dict, List, Optional
31
32
 
@@ -363,62 +364,64 @@ class Scanner(ScanossBase):
363
364
  operation_type='scanning',
364
365
  )
365
366
  self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
366
- spinner = None
367
- if not self.quiet and self.isatty:
368
- spinner = Spinner('Fingerprinting ')
369
- save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
370
- wfp_list = []
371
- scan_block = ''
372
- scan_size = 0
373
- queue_size = 0
374
- file_count = 0 # count all files fingerprinted
375
- wfp_file_count = 0 # count number of files in each queue post
376
- scan_started = False
377
-
378
- to_scan_files = file_filters.get_filtered_files_from_folder(scan_dir)
379
- for to_scan_file in to_scan_files:
380
- if self.threaded_scan and self.threaded_scan.stop_scanning():
381
- self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
382
- break
383
- self.print_debug(f'Fingerprinting {to_scan_file}...')
384
- if spinner:
385
- spinner.next()
386
- abs_path = Path(scan_dir, to_scan_file).resolve()
387
- wfp = self.winnowing.wfp_for_file(str(abs_path), to_scan_file)
388
- if wfp is None or wfp == '':
389
- self.print_debug(f'No WFP returned for {to_scan_file}. Skipping.')
390
- continue
391
- if save_wfps_for_print:
392
- wfp_list.append(wfp)
393
- file_count += 1
394
- if self.threaded_scan:
395
- wfp_size = len(wfp.encode('utf-8'))
396
- # If the WFP is bigger than the max post size and we already have something stored in the scan block,
397
- # add it to the queue
398
- if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
399
- self.threaded_scan.queue_add(scan_block)
400
- queue_size += 1
401
- scan_block = ''
402
- wfp_file_count = 0
403
- scan_block += wfp
404
- scan_size = len(scan_block.encode('utf-8'))
405
- wfp_file_count += 1
406
- # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue # noqa: E501
407
- if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
408
- self.threaded_scan.queue_add(scan_block)
409
- queue_size += 1
410
- scan_block = ''
411
- wfp_file_count = 0
412
- if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
413
- scan_started = True
414
- if not self.threaded_scan.run(wait=False):
415
- self.print_stderr('Warning: Some errors encounted while scanning. Results might be incomplete.')
416
- success = False
417
- # End for loop
418
- if self.threaded_scan and scan_block != '':
419
- self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
420
- if spinner:
421
- spinner.finish()
367
+ spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
368
+
369
+ with spinner_ctx as spinner:
370
+ save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
371
+ wfp_list = []
372
+ scan_block = ''
373
+ scan_size = 0
374
+ queue_size = 0
375
+ file_count = 0 # count all files fingerprinted
376
+ wfp_file_count = 0 # count number of files in each queue post
377
+ scan_started = False
378
+
379
+ to_scan_files = file_filters.get_filtered_files_from_folder(scan_dir)
380
+ for to_scan_file in to_scan_files:
381
+ if self.threaded_scan and self.threaded_scan.stop_scanning():
382
+ self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
383
+ break
384
+ self.print_debug(f'Fingerprinting {to_scan_file}...')
385
+ if spinner:
386
+ spinner.next()
387
+ abs_path = Path(scan_dir, to_scan_file).resolve()
388
+ wfp = self.winnowing.wfp_for_file(str(abs_path), to_scan_file)
389
+ if wfp is None or wfp == '':
390
+ self.print_debug(f'No WFP returned for {to_scan_file}. Skipping.')
391
+ continue
392
+ if save_wfps_for_print:
393
+ wfp_list.append(wfp)
394
+ file_count += 1
395
+ if self.threaded_scan:
396
+ wfp_size = len(wfp.encode('utf-8'))
397
+ # If the WFP is bigger than the max post size and we already have something
398
+ # stored in the scan block, add it to the queue
399
+ if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
400
+ self.threaded_scan.queue_add(scan_block)
401
+ queue_size += 1
402
+ scan_block = ''
403
+ wfp_file_count = 0
404
+ scan_block += wfp
405
+ scan_size = len(scan_block.encode('utf-8'))
406
+ wfp_file_count += 1
407
+ # If the scan request block (group of WFPs) is larger than the POST size
408
+ # or we have reached the file limit, add it to the queue
409
+ if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
410
+ self.threaded_scan.queue_add(scan_block)
411
+ queue_size += 1
412
+ scan_block = ''
413
+ wfp_file_count = 0
414
+ if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
415
+ scan_started = True
416
+ if not self.threaded_scan.run(wait=False):
417
+ self.print_stderr(
418
+ 'Warning: Some errors encountered while scanning. '
419
+ 'Results might be incomplete.'
420
+ )
421
+ success = False
422
+ # End for loop
423
+ if self.threaded_scan and scan_block != '':
424
+ self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
422
425
 
423
426
  if file_count > 0:
424
427
  if save_wfps_for_print: # Write a WFP file if no threading is requested
@@ -631,63 +634,64 @@ class Scanner(ScanossBase):
631
634
  skip_extensions=self.skip_extensions,
632
635
  operation_type='scanning',
633
636
  )
634
- spinner = None
635
- if not self.quiet and self.isatty:
636
- spinner = Spinner('Fingerprinting ')
637
- save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
638
- wfp_list = []
639
- scan_block = ''
640
- scan_size = 0
641
- queue_size = 0
642
- file_count = 0 # count all files fingerprinted
643
- wfp_file_count = 0 # count number of files in each queue post
644
- scan_started = False
645
-
646
- to_scan_files = file_filters.get_filtered_files_from_files(files)
647
- for file in to_scan_files:
648
- if self.threaded_scan and self.threaded_scan.stop_scanning():
649
- self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
650
- break
651
- self.print_debug(f'Fingerprinting {file}...')
652
- if spinner:
653
- spinner.next()
654
- wfp = self.winnowing.wfp_for_file(file, file)
655
- if wfp is None or wfp == '':
656
- self.print_debug(f'No WFP returned for {file}. Skipping.')
657
- continue
658
- if save_wfps_for_print:
659
- wfp_list.append(wfp)
660
- file_count += 1
661
- if self.threaded_scan:
662
- wfp_size = len(wfp.encode('utf-8'))
663
- # If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue # noqa: E501
664
- if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
665
- self.threaded_scan.queue_add(scan_block)
666
- queue_size += 1
667
- scan_block = ''
668
- wfp_file_count = 0
669
- scan_block += wfp
670
- scan_size = len(scan_block.encode('utf-8'))
671
- wfp_file_count += 1
672
- # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue # noqa: E501
673
- if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
674
- self.threaded_scan.queue_add(scan_block)
675
- queue_size += 1
676
- scan_block = ''
677
- wfp_file_count = 0
678
- if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
679
- scan_started = True
680
- if not self.threaded_scan.run(wait=False):
681
- self.print_stderr(
682
- 'Warning: Some errors encounted while scanning. Results might be incomplete.'
683
- )
684
- success = False
637
+ spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
638
+
639
+ with spinner_ctx as spinner:
640
+ save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
641
+ wfp_list = []
642
+ scan_block = ''
643
+ scan_size = 0
644
+ queue_size = 0
645
+ file_count = 0 # count all files fingerprinted
646
+ wfp_file_count = 0 # count number of files in each queue post
647
+ scan_started = False
648
+
649
+ to_scan_files = file_filters.get_filtered_files_from_files(files)
650
+ for file in to_scan_files:
651
+ if self.threaded_scan and self.threaded_scan.stop_scanning():
652
+ self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
653
+ break
654
+ self.print_debug(f'Fingerprinting {file}...')
655
+ if spinner:
656
+ spinner.next()
657
+ wfp = self.winnowing.wfp_for_file(file, file)
658
+ if wfp is None or wfp == '':
659
+ self.print_debug(f'No WFP returned for {file}. Skipping.')
660
+ continue
661
+ if save_wfps_for_print:
662
+ wfp_list.append(wfp)
663
+ file_count += 1
664
+ if self.threaded_scan:
665
+ wfp_size = len(wfp.encode('utf-8'))
666
+ # If the WFP is bigger than the max post size and we already have something
667
+ # stored in the scan block, add it to the queue
668
+ if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
669
+ self.threaded_scan.queue_add(scan_block)
670
+ queue_size += 1
671
+ scan_block = ''
672
+ wfp_file_count = 0
673
+ scan_block += wfp
674
+ scan_size = len(scan_block.encode('utf-8'))
675
+ wfp_file_count += 1
676
+ # If the scan request block (group of WFPs) is larger than the POST size
677
+ # or we have reached the file limit, add it to the queue
678
+ if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
679
+ self.threaded_scan.queue_add(scan_block)
680
+ queue_size += 1
681
+ scan_block = ''
682
+ wfp_file_count = 0
683
+ if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
684
+ scan_started = True
685
+ if not self.threaded_scan.run(wait=False):
686
+ self.print_stderr(
687
+ 'Warning: Some errors encountered while scanning. '
688
+ 'Results might be incomplete.'
689
+ )
690
+ success = False
685
691
 
686
- # End for loop
687
- if self.threaded_scan and scan_block != '':
688
- self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
689
- if spinner:
690
- spinner.finish()
692
+ # End for loop
693
+ if self.threaded_scan and scan_block != '':
694
+ self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
691
695
 
692
696
  if file_count > 0:
693
697
  if save_wfps_for_print: # Write a WFP file if no threading is requested
@@ -778,73 +782,74 @@ class Scanner(ScanossBase):
778
782
  self.print_debug(f'Found {file_count} files to process.')
779
783
  raw_output = '{\n'
780
784
  file_print = ''
781
- bar = None
782
- if not self.quiet and self.isatty:
783
- bar = Bar('Scanning', max=file_count)
784
- bar.next(0)
785
- with open(wfp_file) as f:
786
- for line in f:
787
- if line.startswith(WFP_FILE_START):
788
- if file_print:
789
- wfp += file_print # Store the WFP for the current file
790
- cur_size = len(wfp.encode('utf-8'))
791
- file_print = line # Start storing the next file
792
- cur_files += 1
793
- batch_files += 1
794
- else:
795
- file_print += line # Store the rest of the WFP for this file
796
- l_size = cur_size + len(file_print.encode('utf-8'))
797
- # Hit the max post size, so sending the current batch and continue processing
798
- if l_size >= self.max_post_size and wfp:
799
- self.print_debug(
800
- f'Sending {batch_files} ({cur_files}) of'
801
- f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
802
- )
803
- if self.debug and cur_size > self.max_post_size:
804
- Scanner.print_stderr(f'Warning: Post size {cur_size} greater than limit {self.max_post_size}')
805
- scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
806
- if bar:
807
- bar.next(batch_files)
808
- if scan_resp is not None:
809
- for key, value in scan_resp.items():
810
- raw_output += ' "%s":%s,' % (key, json.dumps(value, indent=2))
811
- for v in value:
812
- if hasattr(v, 'get'):
813
- if v.get('id') != 'none':
814
- vcv = '%s:%s:%s' % (v.get('vendor'), v.get('component'), v.get('version'))
815
- components[vcv] = components[vcv] + 1 if vcv in components else 1
816
- if max_component['hits'] < components[vcv]:
817
- max_component['name'] = v.get('component')
818
- max_component['hits'] = components[vcv]
819
- else:
820
- Scanner.print_stderr(f'Warning: Unknown value: {v}')
821
- else:
822
- success = False
823
- batch_files = 0
824
- wfp = ''
825
- if file_print:
826
- wfp += file_print # Store the WFP for the current file
827
- if wfp:
828
- self.print_debug(
829
- f'Sending {batch_files} ({cur_files}) of'
830
- f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
831
- )
832
- scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
785
+ bar_ctx = Bar('Scanning', max=file_count) if (not self.quiet and self.isatty) else nullcontext()
786
+
787
+ with bar_ctx as bar:
833
788
  if bar:
834
- bar.next(batch_files)
835
- first = True
836
- if scan_resp is not None:
837
- for key, value in scan_resp.items():
838
- if first:
839
- raw_output += ' "%s":%s' % (key, json.dumps(value, indent=2))
840
- first = False
789
+ bar.next(0)
790
+ with open(wfp_file) as f:
791
+ for line in f:
792
+ if line.startswith(WFP_FILE_START):
793
+ if file_print:
794
+ wfp += file_print # Store the WFP for the current file
795
+ cur_size = len(wfp.encode('utf-8'))
796
+ file_print = line # Start storing the next file
797
+ cur_files += 1
798
+ batch_files += 1
841
799
  else:
842
- raw_output += ',\n "%s":%s' % (key, json.dumps(value, indent=2))
843
- else:
844
- success = False
800
+ file_print += line # Store the rest of the WFP for this file
801
+ l_size = cur_size + len(file_print.encode('utf-8'))
802
+ # Hit the max post size, so sending the current batch and continue processing
803
+ if l_size >= self.max_post_size and wfp:
804
+ self.print_debug(
805
+ f'Sending {batch_files} ({cur_files}) of'
806
+ f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
807
+ )
808
+ if self.debug and cur_size > self.max_post_size:
809
+ Scanner.print_stderr(
810
+ f'Warning: Post size {cur_size} greater than limit {self.max_post_size}'
811
+ )
812
+ scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
813
+ if bar:
814
+ bar.next(batch_files)
815
+ if scan_resp is not None:
816
+ for key, value in scan_resp.items():
817
+ raw_output += ' "%s":%s,' % (key, json.dumps(value, indent=2))
818
+ for v in value:
819
+ if hasattr(v, 'get'):
820
+ if v.get('id') != 'none':
821
+ vcv = '%s:%s:%s' % (v.get('vendor'), v.get('component'), v.get('version'))
822
+ components[vcv] = components[vcv] + 1 if vcv in components else 1
823
+ if max_component['hits'] < components[vcv]:
824
+ max_component['name'] = v.get('component')
825
+ max_component['hits'] = components[vcv]
826
+ else:
827
+ Scanner.print_stderr(f'Warning: Unknown value: {v}')
828
+ else:
829
+ success = False
830
+ batch_files = 0
831
+ wfp = ''
832
+ if file_print:
833
+ wfp += file_print # Store the WFP for the current file
834
+ if wfp:
835
+ self.print_debug(
836
+ f'Sending {batch_files} ({cur_files}) of'
837
+ f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.'
838
+ )
839
+ scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
840
+ if bar:
841
+ bar.next(batch_files)
842
+ first = True
843
+ if scan_resp is not None:
844
+ for key, value in scan_resp.items():
845
+ if first:
846
+ raw_output += ' "%s":%s' % (key, json.dumps(value, indent=2))
847
+ first = False
848
+ else:
849
+ raw_output += ',\n "%s":%s' % (key, json.dumps(value, indent=2))
850
+ else:
851
+ success = False
845
852
  raw_output += '\n}'
846
- if bar:
847
- bar.finish()
848
853
  if self.output_format == 'plain':
849
854
  self.__log_result(raw_output)
850
855
  elif self.output_format == 'cyclonedx':
@@ -1052,19 +1057,16 @@ class Scanner(ScanossBase):
1052
1057
  )
1053
1058
  wfps = ''
1054
1059
  self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
1055
- spinner = None
1056
- if not self.quiet and self.isatty:
1057
- spinner = Spinner('Fingerprinting ')
1058
-
1059
- to_fingerprint_files = file_filters.get_filtered_files_from_folder(scan_dir)
1060
- for file in to_fingerprint_files:
1061
- if spinner:
1062
- spinner.next()
1063
- abs_path = Path(scan_dir, file).resolve()
1064
- self.print_debug(f'Fingerprinting {file}...')
1065
- wfps += self.winnowing.wfp_for_file(str(abs_path), file)
1066
- if spinner:
1067
- spinner.finish()
1060
+ spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
1061
+
1062
+ with spinner_ctx as spinner:
1063
+ to_fingerprint_files = file_filters.get_filtered_files_from_folder(scan_dir)
1064
+ for file in to_fingerprint_files:
1065
+ if spinner:
1066
+ spinner.next()
1067
+ abs_path = Path(scan_dir, file).resolve()
1068
+ self.print_debug(f'Fingerprinting {file}...')
1069
+ wfps += self.winnowing.wfp_for_file(str(abs_path), file)
1068
1070
  if wfps:
1069
1071
  if wfp_file:
1070
1072
  self.print_stderr(f'Writing fingerprints to {wfp_file}')
@@ -157,38 +157,38 @@ class FolderHasher:
157
157
  # Sort the files by name to ensure the hash is the same for the same folder
158
158
  filtered_files.sort()
159
159
 
160
- bar = Bar('Hashing files...', max=len(filtered_files))
161
- full_file_path = ''
162
- for file_path in filtered_files:
163
- try:
164
- file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
165
- full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
160
+ bar_ctx = Bar('Hashing files...', max=len(filtered_files))
166
161
 
167
- self.base.print_debug(f'\nHashing file {str(full_file_path)}')
162
+ with bar_ctx as bar:
163
+ full_file_path = ''
164
+ for file_path in filtered_files:
165
+ try:
166
+ file_path_obj = Path(file_path) if isinstance(file_path, str) else file_path
167
+ full_file_path = file_path_obj if file_path_obj.is_absolute() else root / file_path_obj
168
168
 
169
- file_bytes = full_file_path.read_bytes()
170
- key = CRC64.get_hash_buff(file_bytes)
171
- key_str = ''.join(f'{b:02x}' for b in key)
172
- rel_path = str(full_file_path.relative_to(root))
169
+ self.base.print_debug(f'\nHashing file {str(full_file_path)}')
173
170
 
174
- file_item = DirectoryFile(rel_path, key, key_str)
171
+ file_bytes = full_file_path.read_bytes()
172
+ key = CRC64.get_hash_buff(file_bytes)
173
+ key_str = ''.join(f'{b:02x}' for b in key)
174
+ rel_path = str(full_file_path.relative_to(root))
175
175
 
176
- current_node = root_node
177
- for part in Path(rel_path).parent.parts:
178
- child_path = str(Path(current_node.path) / part)
179
- if child_path not in current_node.children:
180
- current_node.children[child_path] = DirectoryNode(child_path)
181
- current_node = current_node.children[child_path]
182
- current_node.files.append(file_item)
176
+ file_item = DirectoryFile(rel_path, key, key_str)
183
177
 
184
- root_node.files.append(file_item)
178
+ current_node = root_node
179
+ for part in Path(rel_path).parent.parts:
180
+ child_path = str(Path(current_node.path) / part)
181
+ if child_path not in current_node.children:
182
+ current_node.children[child_path] = DirectoryNode(child_path)
183
+ current_node = current_node.children[child_path]
184
+ current_node.files.append(file_item)
185
185
 
186
- except Exception as e:
187
- self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
186
+ root_node.files.append(file_item)
188
187
 
189
- bar.next()
188
+ except Exception as e:
189
+ self.base.print_debug(f'Skipping file {full_file_path}: {str(e)}')
190
190
 
191
- bar.finish()
191
+ bar.next()
192
192
  return root_node
193
193
 
194
194
  def _hash_calc_from_node(self, node: DirectoryNode, current_depth: int = 1) -> dict: