scanoss 1.12.2__py3-none-any.whl → 1.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. protoc_gen_swagger/__init__.py +13 -13
  2. protoc_gen_swagger/options/__init__.py +13 -13
  3. protoc_gen_swagger/options/annotations_pb2.py +18 -12
  4. protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
  5. protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
  6. protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
  7. protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
  8. protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
  9. scanoss/__init__.py +18 -18
  10. scanoss/api/__init__.py +17 -17
  11. scanoss/api/common/__init__.py +17 -17
  12. scanoss/api/common/v2/__init__.py +17 -17
  13. scanoss/api/common/v2/scanoss_common_pb2.py +49 -20
  14. scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
  15. scanoss/api/components/__init__.py +17 -17
  16. scanoss/api/components/v2/__init__.py +17 -17
  17. scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
  18. scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
  19. scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -21
  20. scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +766 -13
  21. scanoss/api/dependencies/__init__.py +17 -17
  22. scanoss/api/dependencies/v2/__init__.py +17 -17
  23. scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -29
  24. scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +94 -8
  25. scanoss/api/geoprovenance/__init__.py +23 -0
  26. scanoss/api/geoprovenance/v2/__init__.py +23 -0
  27. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +92 -0
  28. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +381 -0
  29. scanoss/api/licenses/__init__.py +23 -0
  30. scanoss/api/licenses/v2/__init__.py +23 -0
  31. scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
  32. scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
  33. scanoss/api/scanning/__init__.py +17 -17
  34. scanoss/api/scanning/v2/__init__.py +17 -17
  35. scanoss/api/scanning/v2/scanoss_scanning_pb2.py +42 -13
  36. scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +86 -7
  37. scanoss/api/semgrep/__init__.py +17 -17
  38. scanoss/api/semgrep/v2/__init__.py +17 -17
  39. scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
  40. scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
  41. scanoss/api/vulnerabilities/__init__.py +17 -17
  42. scanoss/api/vulnerabilities/v2/__init__.py +17 -17
  43. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
  44. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
  45. scanoss/cli.py +2359 -370
  46. scanoss/components.py +187 -94
  47. scanoss/constants.py +22 -0
  48. scanoss/cryptography.py +308 -0
  49. scanoss/csvoutput.py +91 -58
  50. scanoss/cyclonedx.py +221 -63
  51. scanoss/data/build_date.txt +1 -1
  52. scanoss/data/osadl-copyleft.json +133 -0
  53. scanoss/data/scanoss-settings-schema.json +254 -0
  54. scanoss/delta.py +197 -0
  55. scanoss/export/__init__.py +23 -0
  56. scanoss/export/dependency_track.py +227 -0
  57. scanoss/file_filters.py +582 -0
  58. scanoss/filecount.py +75 -69
  59. scanoss/gitlabqualityreport.py +214 -0
  60. scanoss/header_filter.py +563 -0
  61. scanoss/inspection/__init__.py +23 -0
  62. scanoss/inspection/policy_check/__init__.py +0 -0
  63. scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
  64. scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
  65. scanoss/inspection/policy_check/policy_check.py +222 -0
  66. scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
  67. scanoss/inspection/policy_check/scanoss/copyleft.py +243 -0
  68. scanoss/inspection/policy_check/scanoss/undeclared_component.py +309 -0
  69. scanoss/inspection/summary/__init__.py +0 -0
  70. scanoss/inspection/summary/component_summary.py +170 -0
  71. scanoss/inspection/summary/license_summary.py +191 -0
  72. scanoss/inspection/summary/match_summary.py +341 -0
  73. scanoss/inspection/utils/file_utils.py +44 -0
  74. scanoss/inspection/utils/license_utils.py +123 -0
  75. scanoss/inspection/utils/markdown_utils.py +63 -0
  76. scanoss/inspection/utils/scan_result_processor.py +417 -0
  77. scanoss/osadl.py +125 -0
  78. scanoss/results.py +275 -0
  79. scanoss/scancodedeps.py +87 -38
  80. scanoss/scanner.py +431 -539
  81. scanoss/scanners/__init__.py +23 -0
  82. scanoss/scanners/container_scanner.py +476 -0
  83. scanoss/scanners/folder_hasher.py +358 -0
  84. scanoss/scanners/scanner_config.py +73 -0
  85. scanoss/scanners/scanner_hfh.py +252 -0
  86. scanoss/scanoss_settings.py +337 -0
  87. scanoss/scanossapi.py +140 -101
  88. scanoss/scanossbase.py +59 -22
  89. scanoss/scanossgrpc.py +799 -251
  90. scanoss/scanpostprocessor.py +294 -0
  91. scanoss/scantype.py +22 -21
  92. scanoss/services/dependency_track_service.py +132 -0
  93. scanoss/spdxlite.py +532 -174
  94. scanoss/threadeddependencies.py +148 -47
  95. scanoss/threadedscanning.py +53 -37
  96. scanoss/utils/__init__.py +23 -0
  97. scanoss/utils/abstract_presenter.py +103 -0
  98. scanoss/utils/crc64.py +96 -0
  99. scanoss/utils/file.py +84 -0
  100. scanoss/utils/scanoss_scan_results_utils.py +41 -0
  101. scanoss/utils/simhash.py +198 -0
  102. scanoss/winnowing.py +241 -63
  103. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/METADATA +18 -9
  104. scanoss-1.43.1.dist-info/RECORD +110 -0
  105. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/WHEEL +1 -1
  106. scanoss-1.12.2.dist-info/RECORD +0 -58
  107. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
  108. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info/licenses}/LICENSE +0 -0
  109. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,582 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2024, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import os
26
+ import sys
27
+ from pathlib import Path
28
+ from typing import List, Optional
29
+
30
+ from pathspec import GitIgnoreSpec
31
+
32
+ from .scanossbase import ScanossBase
33
+
34
+ # Files to skip
35
+ DEFAULT_SKIPPED_FILES = {
36
+ 'gradlew',
37
+ 'gradlew.bat',
38
+ 'mvnw',
39
+ 'mvnw.cmd',
40
+ 'gradle-wrapper.jar',
41
+ 'maven-wrapper.jar',
42
+ 'thumbs.db',
43
+ 'babel.config.js',
44
+ 'license.txt',
45
+ 'license.md',
46
+ 'copying.lib',
47
+ 'makefile',
48
+ }
49
+
50
+ DEFAULT_SKIPPED_FILES_HFH = {
51
+ 'gradlew',
52
+ 'gradlew.bat',
53
+ 'mvnw',
54
+ 'mvnw.cmd',
55
+ 'gradle-wrapper.jar',
56
+ 'maven-wrapper.jar',
57
+ 'thumbs.db',
58
+ 'babel.config.js',
59
+ }
60
+
61
+
62
+ # Folders to skip
63
+ DEFAULT_SKIPPED_DIRS = {
64
+ 'nbproject',
65
+ 'nbbuild',
66
+ 'nbdist',
67
+ '__pycache__',
68
+ 'venv',
69
+ '_yardoc',
70
+ 'eggs',
71
+ 'wheels',
72
+ 'htmlcov',
73
+ '__pypackages__',
74
+ 'example',
75
+ 'examples'
76
+ }
77
+
78
+ DEFAULT_SKIPPED_DIRS_HFH = {
79
+ 'nbproject',
80
+ 'nbbuild',
81
+ 'nbdist',
82
+ '__pycache__',
83
+ 'venv',
84
+ '_yardoc',
85
+ 'eggs',
86
+ 'wheels',
87
+ 'htmlcov',
88
+ '__pypackages__',
89
+ 'example',
90
+ 'examples',
91
+ }
92
+
93
+
94
+ # Folder endings to skip
95
+ DEFAULT_SKIPPED_DIR_EXT = {'.egg-info'}
96
+ DEFAULT_SKIPPED_DIR_EXT_HFH = {'.egg-info'}
97
+
98
+ # File extensions to skip
99
+ DEFAULT_SKIPPED_EXT = {
100
+ '.1',
101
+ '.2',
102
+ '.3',
103
+ '.4',
104
+ '.5',
105
+ '.6',
106
+ '.7',
107
+ '.8',
108
+ '.9',
109
+ '.ac',
110
+ '.adoc',
111
+ '.am',
112
+ '.asciidoc',
113
+ '.bmp',
114
+ '.build',
115
+ '.cfg',
116
+ '.chm',
117
+ '.class',
118
+ '.cmake',
119
+ '.cnf',
120
+ '.conf',
121
+ '.config',
122
+ '.contributors',
123
+ '.copying',
124
+ '.crt',
125
+ '.csproj',
126
+ '.css',
127
+ '.csv',
128
+ '.dat',
129
+ '.data',
130
+ '.doc',
131
+ '.docx',
132
+ '.dtd',
133
+ '.dts',
134
+ '.iws',
135
+ '.c9',
136
+ '.c9revisions',
137
+ '.dtsi',
138
+ '.dump',
139
+ '.eot',
140
+ '.eps',
141
+ '.geojson',
142
+ '.gdoc',
143
+ '.gif',
144
+ '.glif',
145
+ '.gmo',
146
+ '.gradle',
147
+ '.guess',
148
+ '.hex',
149
+ '.htm',
150
+ '.html',
151
+ '.ico',
152
+ '.iml',
153
+ '.in',
154
+ '.inc',
155
+ '.info',
156
+ '.ini',
157
+ '.ipynb',
158
+ '.jpeg',
159
+ '.jpg',
160
+ '.json',
161
+ '.jsonld',
162
+ '.lock',
163
+ '.log',
164
+ '.m4',
165
+ '.map',
166
+ '.markdown',
167
+ '.md',
168
+ '.md5',
169
+ '.meta',
170
+ '.mk',
171
+ '.mxml',
172
+ '.o',
173
+ '.otf',
174
+ '.out',
175
+ '.pbtxt',
176
+ '.pdf',
177
+ '.pem',
178
+ '.phtml',
179
+ '.plist',
180
+ '.png',
181
+ '.po',
182
+ '.ppt',
183
+ '.prefs',
184
+ '.properties',
185
+ '.pyc',
186
+ '.qdoc',
187
+ '.result',
188
+ '.rgb',
189
+ '.rst',
190
+ '.scss',
191
+ '.sha',
192
+ '.sha1',
193
+ '.sha2',
194
+ '.sha256',
195
+ '.sln',
196
+ '.spec',
197
+ '.sql',
198
+ '.sub',
199
+ '.svg',
200
+ '.svn-base',
201
+ '.tab',
202
+ '.template',
203
+ '.test',
204
+ '.tex',
205
+ '.tiff',
206
+ '.toml',
207
+ '.ttf',
208
+ '.txt',
209
+ '.utf-8',
210
+ '.vim',
211
+ '.wav',
212
+ '.woff',
213
+ '.woff2',
214
+ '.xht',
215
+ '.xhtml',
216
+ '.xls',
217
+ '.xlsx',
218
+ '.xml',
219
+ '.xpm',
220
+ '.xsd',
221
+ '.xul',
222
+ '.yaml',
223
+ '.yml',
224
+ '.wfp',
225
+ '.editorconfig',
226
+ '.dotcover',
227
+ '.pid',
228
+ '.lcov',
229
+ '.egg',
230
+ '.manifest',
231
+ '.cache',
232
+ '.coverage',
233
+ '.cover',
234
+ '.gem',
235
+ '.lst',
236
+ '.pickle',
237
+ '.pdb',
238
+ '.gml',
239
+ '.pot',
240
+ '.plt',
241
+ '.whml',
242
+ '.pom',
243
+ '.smtml',
244
+ '.min.js',
245
+ '.mf',
246
+ '.base64',
247
+ '.s',
248
+ '.diff',
249
+ '.patch',
250
+ '.rules',
251
+ # File endings
252
+ '-doc',
253
+ 'changelog',
254
+ 'config',
255
+ 'copying',
256
+ 'license',
257
+ 'authors',
258
+ 'news',
259
+ 'licenses',
260
+ 'notice',
261
+ 'readme',
262
+ 'swiftdoc',
263
+ 'texidoc',
264
+ 'todo',
265
+ 'version',
266
+ 'ignore',
267
+ 'manifest',
268
+ 'sqlite',
269
+ 'sqlite3',
270
+ }
271
+
272
+
273
+ class FileFilters(ScanossBase):
274
+ """
275
+ Filter for determining which files to process during scanning, fingerprinting, etc.
276
+ Handles both inclusion and exclusion rules based on file paths, extensions, and sizes.
277
+ """
278
+
279
+ def __init__(self, debug: bool = False, trace: bool = False, quiet: bool = False, **kwargs):
280
+ """
281
+ Initialize scan filters based on default settings. Optionally append custom settings.
282
+
283
+ Args:
284
+ debug (bool): Enable debug output
285
+ trace (bool): Enable trace output
286
+ quiet (bool): Suppress output
287
+ **kwargs: Additional arguments including:
288
+ scanoss_settings (ScanossSettings): Custom settings to override defaults
289
+ all_extensions (bool): Include all file extensions
290
+ all_folders (bool): Include all folders
291
+ hidden_files_folders (bool): Include hidden files and folders
292
+ operation_type (str): Operation type ('scanning' or 'fingerprinting')
293
+ skip_size (int): Size to skip
294
+ skip_extensions (list): Extensions to skip
295
+ skip_folders (list): Folders to skip
296
+ is_folder_hashing_scan (bool): Whether the operation is a folder hashing scan
297
+ """
298
+ super().__init__(debug, trace, quiet)
299
+
300
+ self.hidden_files_folders = kwargs.get('hidden_files_folders', False)
301
+ self.scanoss_settings = kwargs.get('scanoss_settings')
302
+ self.all_extensions = kwargs.get('all_extensions', False)
303
+ self.all_folders = kwargs.get('all_folders', False)
304
+ self.skip_folders = kwargs.get('skip_folders', [])
305
+ self.skip_size = kwargs.get('skip_size', 0)
306
+ self.skip_extensions = kwargs.get('skip_extensions', [])
307
+ self.is_folder_hashing_scan = kwargs.get('is_folder_hashing_scan', False)
308
+ self.file_folder_pat_spec = self._get_file_folder_pattern_spec(kwargs.get('operation_type', 'scanning'))
309
+ self.size_pat_rules = self._get_size_limit_pattern_rules(kwargs.get('operation_type', 'scanning'))
310
+
311
+ def get_filtered_files_from_folder(self, root: str) -> List[str]:
312
+ """
313
+ Retrieve a list of files to scan or fingerprint from a given directory root based on filter settings.
314
+
315
+ Args:
316
+ root (str): Root directory to scan or fingerprint
317
+
318
+ Returns:
319
+ list[str]: Filtered list of files to scan or fingerprint
320
+ """
321
+ if self.debug:
322
+ if self.file_folder_pat_spec:
323
+ self.print_stderr(f'Running with {len(self.file_folder_pat_spec)} pattern filters.')
324
+ if self.size_pat_rules:
325
+ self.print_stderr(f'Running with {len(self.size_pat_rules)} size pattern rules.')
326
+ if self.skip_size:
327
+ self.print_stderr(f'Running with global skip size: {self.skip_size}')
328
+ if self.skip_extensions:
329
+ self.print_stderr(f'Running with extra global skip extensions: {self.skip_extensions}')
330
+ if self.skip_folders:
331
+ self.print_stderr(f'Running with extra global skip folders: {self.skip_folders}')
332
+ all_files = []
333
+ root_path = Path(root).resolve()
334
+ if not root_path.exists() or not root_path.is_dir():
335
+ self.print_stderr(f'ERROR: Specified root directory {root} does not exist or is not a directory.')
336
+ return all_files
337
+ # Walk the tree looking for files to process. While taking into account files/folders to skip
338
+ for dirpath, dirnames, filenames in os.walk(root_path):
339
+ dir_path = Path(dirpath)
340
+ rel_path = dir_path.relative_to(root_path)
341
+ if dir_path.is_symlink(): # TODO should we skip symlink folders?
342
+ self.print_msg(f'WARNING: Found symbolic link folder: {dir_path}')
343
+
344
+ if self.should_skip_dir(str(rel_path)): # Current directory should be skipped
345
+ dirnames.clear()
346
+ continue
347
+ for filename in filenames:
348
+ file_path = dir_path / filename
349
+ all_files.append(str(file_path))
350
+ # End os.walk loop
351
+ # Now filter the files and return the reduced list
352
+ return self.get_filtered_files_from_files(all_files, str(root_path))
353
+
354
+ def get_filtered_files_from_files(self, files: List[str], scan_root: Optional[str] = None) -> List[str]:
355
+ """
356
+ Retrieve a list of files to scan or fingerprint from a given list of files based on filter settings.
357
+
358
+ Args:
359
+ files (List[str]): List of files to scan or fingerprint
360
+ scan_root (str): Root directory to scan or fingerprint
361
+
362
+ Returns:
363
+ list[str]: Filtered list of files to scan or fingerprint
364
+ """
365
+ filtered_files = []
366
+ for file_path in files:
367
+ path_obj = Path(file_path)
368
+ try:
369
+ if scan_root:
370
+ rel_path = path_obj.relative_to(scan_root)
371
+ else:
372
+ rel_path = str(path_obj)
373
+ except ValueError:
374
+ self.print_debug(f'Ignoring file: {file_path} (broken symlink)')
375
+ continue
376
+
377
+ if not path_obj.exists() or not path_obj.is_file() or path_obj.is_symlink():
378
+ self.print_debug(
379
+ f'WARNING: File {rel_path} does not exist, is not a file, or is a symbolic link. Ignoring.'
380
+ )
381
+ continue
382
+
383
+ if not self.hidden_files_folders and any(part.startswith('.') for part in path_obj.parts):
384
+ self.print_debug(f'Skipping file: {rel_path} (in hidden directory or is hidden file)')
385
+ continue
386
+
387
+ if self._should_skip_file(rel_path):
388
+ continue
389
+ try:
390
+ file_size = path_obj.stat().st_size
391
+ if file_size == 0:
392
+ self.print_debug(f'Skipping file: {rel_path} (empty file)')
393
+ continue
394
+ min_size, max_size = self._get_operation_size_limits(file_path)
395
+ if min_size <= file_size <= max_size:
396
+ filtered_files.append(str(rel_path))
397
+ else:
398
+ self.print_debug(
399
+ f'Skipping file: {rel_path} (size {file_size} outside limits {min_size}-{max_size})'
400
+ )
401
+ except OSError as e:
402
+ self.print_debug(f'Error getting size for {rel_path}: {e}')
403
+ # End file loop
404
+ return filtered_files
405
+
406
+ def _get_file_folder_pattern_spec(self, operation_type: str = 'scanning'):
407
+ """
408
+ Get file path pattern specification.
409
+
410
+ Args:
411
+ operation_type (str): Type of operation ('scanning' or 'fingerprinting')
412
+
413
+ Returns:
414
+ GitIgnoreSpec: GitIgnoreSpec object containing the file path patterns
415
+ """
416
+ patterns = self._get_operation_patterns(operation_type)
417
+ if patterns:
418
+ return GitIgnoreSpec.from_lines(patterns)
419
+ return None
420
+
421
+ def _get_size_limit_pattern_rules(self, operation_type: str = 'scanning'):
422
+ """
423
+ Get size limit pattern rules.
424
+
425
+ Args:
426
+ operation_type (str): Type of operation ('scanning' or 'fingerprinting')
427
+
428
+ Returns:
429
+ List of size limit pattern rules
430
+ """
431
+ if self.scanoss_settings:
432
+ size_rules = self.scanoss_settings.get_skip_sizes(operation_type)
433
+ if size_rules:
434
+ size_rules_with_patterns = []
435
+ for rule in size_rules:
436
+ patterns = rule.get('patterns', [])
437
+ if not patterns:
438
+ continue
439
+ size_rules_with_patterns.append(rule)
440
+ return size_rules_with_patterns
441
+ return None
442
+
443
+ def _get_operation_patterns(self, operation_type: str) -> List[str]:
444
+ """
445
+ Get patterns specific to the operation type, combining defaults with settings.
446
+
447
+ Args:
448
+ operation_type (str): Type of operation ('scanning' or 'fingerprinting')
449
+
450
+ Returns:
451
+ List[str]: Combined list of patterns to skip
452
+ """
453
+ patterns = []
454
+
455
+ # Default patterns for skipping directories
456
+ if not self.all_folders:
457
+ DEFAULT_SKIPPED_DIR_LIST = DEFAULT_SKIPPED_DIRS_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_DIRS
458
+ DEFAULT_SKIPPED_DIR_EXT_LIST = (
459
+ DEFAULT_SKIPPED_DIR_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_DIR_EXT
460
+ )
461
+ for dir_name in DEFAULT_SKIPPED_DIR_LIST:
462
+ patterns.append(f'{dir_name}/')
463
+ for dir_extension in DEFAULT_SKIPPED_DIR_EXT_LIST:
464
+ patterns.append(f'*{dir_extension}/')
465
+
466
+ # Custom patterns added in SCANOSS settings file
467
+ if self.scanoss_settings:
468
+ patterns.extend(self.scanoss_settings.get_skip_patterns(operation_type))
469
+ return patterns
470
+
471
+ def _get_operation_size_limits(self, file_path: str = None) -> tuple:
472
+ """
473
+ Get size limits specific to the operation type and file path.
474
+
475
+ Args:
476
+ file_path (str, optional): Path to the file to check against patterns. If None, returns default limits.
477
+
478
+ Returns:
479
+ tuple: (min_size, max_size) tuple for the given file path and operation type
480
+ """
481
+ min_size = 0
482
+ max_size = sys.maxsize
483
+ # Apply global minimum file size if specified
484
+ if self.skip_size > 0:
485
+ min_size = self.skip_size
486
+ return min_size, max_size
487
+ # Return default size limits if no settings specified
488
+ if not self.scanoss_settings or not file_path or not self.size_pat_rules:
489
+ return min_size, max_size
490
+ try:
491
+ rel_path = os.path.relpath(file_path)
492
+ except ValueError:
493
+ rel_path = os.path.basename(file_path)
494
+ rel_path_lower = rel_path.lower()
495
+ # Cycle through each rule looking for a match
496
+ for rule in self.size_pat_rules:
497
+ patterns = rule.get('patterns', [])
498
+ if patterns:
499
+ path_spec = GitIgnoreSpec.from_lines(patterns)
500
+ if path_spec.match_file(rel_path_lower):
501
+ return rule.get('min', min_size), rule.get('max', max_size)
502
+ # End rules loop
503
+ return min_size, max_size
504
+
505
+ def should_skip_dir(self, dir_rel_path: str) -> bool: # noqa: PLR0911
506
+ """
507
+ Check if a directory should be skipped based on operation type and default rules.
508
+
509
+ Args:
510
+ dir_rel_path (str): Relative path to the directory
511
+
512
+ Returns:
513
+ bool: True if directory should be skipped, False otherwise
514
+ """
515
+ dir_name = os.path.basename(dir_rel_path)
516
+ dir_path = Path(dir_rel_path)
517
+ if (
518
+ not self.hidden_files_folders
519
+ and dir_path != Path('.')
520
+ and any(part.startswith('.') for part in dir_path.parts)
521
+ ):
522
+ self.print_debug(f'Skipping directory: {dir_rel_path} (hidden directory)')
523
+ return True
524
+ if self.all_folders:
525
+ return False
526
+ dir_name_lower = dir_name.lower()
527
+ if dir_name_lower in DEFAULT_SKIPPED_DIRS:
528
+ self.print_debug(f'Skipping directory: {dir_rel_path} (matches default skip directory)')
529
+ return True
530
+ if self.skip_folders and dir_name in self.skip_folders:
531
+ self.print_debug(f'Skipping directory: {dir_rel_path} (matches skip folder)')
532
+ return True
533
+ for ext in DEFAULT_SKIPPED_DIR_EXT:
534
+ if dir_name_lower.endswith(ext):
535
+ self.print_debug(f'Skipping directory: {dir_rel_path} (matches default skip extension: {ext})')
536
+ return True
537
+
538
+ if self.file_folder_pat_spec and self.file_folder_pat_spec.match_file(dir_rel_path):
539
+ self.print_debug(f'Skipping directory: {dir_rel_path} (matches custom pattern)')
540
+ return True
541
+ return False
542
+
543
+ def _should_skip_file(self, file_rel_path: str) -> bool: # noqa: PLR0911
544
+ """
545
+ Check if a file should be skipped based on operation type and default rules.
546
+
547
+ Args:
548
+ file_rel_path (str): Relative path to the file
549
+
550
+ Returns:
551
+ bool: True if file should be skipped, False otherwise
552
+ """
553
+ file_name = os.path.basename(file_rel_path)
554
+ DEFAULT_SKIPPED_EXT_LIST = {} if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
555
+ DEFAULT_SKIPPED_FILES_LIST = DEFAULT_SKIPPED_FILES_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_FILES
556
+
557
+ if not self.hidden_files_folders and file_name.startswith('.'):
558
+ self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')
559
+ return True
560
+ if self.all_extensions:
561
+ return False
562
+ file_name_lower = file_name.lower()
563
+ # Look for exact files
564
+ if file_name_lower in DEFAULT_SKIPPED_FILES_LIST:
565
+ self.print_debug(f'Skipping file: {file_rel_path} (matches default skip file)')
566
+ return True
567
+ # Look for file endings
568
+ for ending in DEFAULT_SKIPPED_EXT_LIST:
569
+ if file_name_lower.endswith(ending):
570
+ self.print_debug(f'Skipping file: {file_rel_path} (matches default skip ending: {ending})')
571
+ return True
572
+ # Look for custom (extra) endings
573
+ if self.skip_extensions:
574
+ for ending in self.skip_extensions:
575
+ if file_name_lower.endswith(ending):
576
+ self.print_debug(f'Skipping file: {file_rel_path} (matches skip extension)')
577
+ return True
578
+ # Check for file patterns
579
+ if self.file_folder_pat_spec and self.file_folder_pat_spec.match_file(file_rel_path):
580
+ self.print_debug(f'Skipping file: {file_rel_path} (matches custom pattern)')
581
+ return True
582
+ return False