scanoss 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scanoss/__init__.py +1 -1
- scanoss/cli.py +63 -79
- scanoss/cyclonedx.py +5 -0
- scanoss/data/build_date.txt +1 -1
- scanoss/data/scanoss-settings-schema.json +254 -0
- scanoss/file_filters.py +525 -0
- scanoss/scanner.py +163 -266
- scanoss/scanoss_settings.py +105 -40
- scanoss/scanpostprocessor.py +89 -81
- scanoss/utils/__init__.py +23 -0
- scanoss/utils/file.py +57 -0
- {scanoss-1.18.0.dist-info → scanoss-1.19.0.dist-info}/METADATA +6 -4
- {scanoss-1.18.0.dist-info → scanoss-1.19.0.dist-info}/RECORD +17 -13
- {scanoss-1.18.0.dist-info → scanoss-1.19.0.dist-info}/WHEEL +1 -1
- {scanoss-1.18.0.dist-info → scanoss-1.19.0.dist-info}/LICENSE +0 -0
- {scanoss-1.18.0.dist-info → scanoss-1.19.0.dist-info}/entry_points.txt +0 -0
- {scanoss-1.18.0.dist-info → scanoss-1.19.0.dist-info}/top_level.txt +0 -0
scanoss/file_filters.py
ADDED
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2024, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import os
|
|
26
|
+
import sys
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import List
|
|
29
|
+
|
|
30
|
+
from pathspec import GitIgnoreSpec
|
|
31
|
+
|
|
32
|
+
from .scanoss_settings import ScanossSettings
|
|
33
|
+
from .scanossbase import ScanossBase
|
|
34
|
+
|
|
35
|
+
# Files to skip
|
|
36
|
+
DEFAULT_SKIPPED_FILES = {
|
|
37
|
+
'gradlew',
|
|
38
|
+
'gradlew.bat',
|
|
39
|
+
'mvnw',
|
|
40
|
+
'mvnw.cmd',
|
|
41
|
+
'gradle-wrapper.jar',
|
|
42
|
+
'maven-wrapper.jar',
|
|
43
|
+
'thumbs.db',
|
|
44
|
+
'babel.config.js',
|
|
45
|
+
'license.txt',
|
|
46
|
+
'license.md',
|
|
47
|
+
'copying.lib',
|
|
48
|
+
'makefile',
|
|
49
|
+
}
|
|
50
|
+
# Folders to skip
|
|
51
|
+
DEFAULT_SKIPPED_DIRS = {
|
|
52
|
+
'nbproject',
|
|
53
|
+
'nbbuild',
|
|
54
|
+
'nbdist',
|
|
55
|
+
'__pycache__',
|
|
56
|
+
'venv',
|
|
57
|
+
'_yardoc',
|
|
58
|
+
'eggs',
|
|
59
|
+
'wheels',
|
|
60
|
+
'htmlcov',
|
|
61
|
+
'__pypackages__',
|
|
62
|
+
}
|
|
63
|
+
# Folder endings to skip
|
|
64
|
+
DEFAULT_SKIPPED_DIR_EXT = {
|
|
65
|
+
'.egg-info'
|
|
66
|
+
}
|
|
67
|
+
# File extensions to skip
|
|
68
|
+
DEFAULT_SKIPPED_EXT = {
|
|
69
|
+
'.1',
|
|
70
|
+
'.2',
|
|
71
|
+
'.3',
|
|
72
|
+
'.4',
|
|
73
|
+
'.5',
|
|
74
|
+
'.6',
|
|
75
|
+
'.7',
|
|
76
|
+
'.8',
|
|
77
|
+
'.9',
|
|
78
|
+
'.ac',
|
|
79
|
+
'.adoc',
|
|
80
|
+
'.am',
|
|
81
|
+
'.asciidoc',
|
|
82
|
+
'.bmp',
|
|
83
|
+
'.build',
|
|
84
|
+
'.cfg',
|
|
85
|
+
'.chm',
|
|
86
|
+
'.class',
|
|
87
|
+
'.cmake',
|
|
88
|
+
'.cnf',
|
|
89
|
+
'.conf',
|
|
90
|
+
'.config',
|
|
91
|
+
'.contributors',
|
|
92
|
+
'.copying',
|
|
93
|
+
'.crt',
|
|
94
|
+
'.csproj',
|
|
95
|
+
'.css',
|
|
96
|
+
'.csv',
|
|
97
|
+
'.dat',
|
|
98
|
+
'.data',
|
|
99
|
+
'.doc',
|
|
100
|
+
'.docx',
|
|
101
|
+
'.dtd',
|
|
102
|
+
'.dts',
|
|
103
|
+
'.iws',
|
|
104
|
+
'.c9',
|
|
105
|
+
'.c9revisions',
|
|
106
|
+
'.dtsi',
|
|
107
|
+
'.dump',
|
|
108
|
+
'.eot',
|
|
109
|
+
'.eps',
|
|
110
|
+
'.geojson',
|
|
111
|
+
'.gdoc',
|
|
112
|
+
'.gif',
|
|
113
|
+
'.glif',
|
|
114
|
+
'.gmo',
|
|
115
|
+
'.gradle',
|
|
116
|
+
'.guess',
|
|
117
|
+
'.hex',
|
|
118
|
+
'.htm',
|
|
119
|
+
'.html',
|
|
120
|
+
'.ico',
|
|
121
|
+
'.iml',
|
|
122
|
+
'.in',
|
|
123
|
+
'.inc',
|
|
124
|
+
'.info',
|
|
125
|
+
'.ini',
|
|
126
|
+
'.ipynb',
|
|
127
|
+
'.jpeg',
|
|
128
|
+
'.jpg',
|
|
129
|
+
'.json',
|
|
130
|
+
'.jsonld',
|
|
131
|
+
'.lock',
|
|
132
|
+
'.log',
|
|
133
|
+
'.m4',
|
|
134
|
+
'.map',
|
|
135
|
+
'.markdown',
|
|
136
|
+
'.md',
|
|
137
|
+
'.md5',
|
|
138
|
+
'.meta',
|
|
139
|
+
'.mk',
|
|
140
|
+
'.mxml',
|
|
141
|
+
'.o',
|
|
142
|
+
'.otf',
|
|
143
|
+
'.out',
|
|
144
|
+
'.pbtxt',
|
|
145
|
+
'.pdf',
|
|
146
|
+
'.pem',
|
|
147
|
+
'.phtml',
|
|
148
|
+
'.plist',
|
|
149
|
+
'.png',
|
|
150
|
+
'.po',
|
|
151
|
+
'.ppt',
|
|
152
|
+
'.prefs',
|
|
153
|
+
'.properties',
|
|
154
|
+
'.pyc',
|
|
155
|
+
'.qdoc',
|
|
156
|
+
'.result',
|
|
157
|
+
'.rgb',
|
|
158
|
+
'.rst',
|
|
159
|
+
'.scss',
|
|
160
|
+
'.sha',
|
|
161
|
+
'.sha1',
|
|
162
|
+
'.sha2',
|
|
163
|
+
'.sha256',
|
|
164
|
+
'.sln',
|
|
165
|
+
'.spec',
|
|
166
|
+
'.sql',
|
|
167
|
+
'.sub',
|
|
168
|
+
'.svg',
|
|
169
|
+
'.svn-base',
|
|
170
|
+
'.tab',
|
|
171
|
+
'.template',
|
|
172
|
+
'.test',
|
|
173
|
+
'.tex',
|
|
174
|
+
'.tiff',
|
|
175
|
+
'.toml',
|
|
176
|
+
'.ttf',
|
|
177
|
+
'.txt',
|
|
178
|
+
'.utf-8',
|
|
179
|
+
'.vim',
|
|
180
|
+
'.wav',
|
|
181
|
+
'.woff',
|
|
182
|
+
'.woff2',
|
|
183
|
+
'.xht',
|
|
184
|
+
'.xhtml',
|
|
185
|
+
'.xls',
|
|
186
|
+
'.xlsx',
|
|
187
|
+
'.xml',
|
|
188
|
+
'.xpm',
|
|
189
|
+
'.xsd',
|
|
190
|
+
'.xul',
|
|
191
|
+
'.yaml',
|
|
192
|
+
'.yml',
|
|
193
|
+
'.wfp',
|
|
194
|
+
'.editorconfig',
|
|
195
|
+
'.dotcover',
|
|
196
|
+
'.pid',
|
|
197
|
+
'.lcov',
|
|
198
|
+
'.egg',
|
|
199
|
+
'.manifest',
|
|
200
|
+
'.cache',
|
|
201
|
+
'.coverage',
|
|
202
|
+
'.cover',
|
|
203
|
+
'.gem',
|
|
204
|
+
'.lst',
|
|
205
|
+
'.pickle',
|
|
206
|
+
'.pdb',
|
|
207
|
+
'.gml',
|
|
208
|
+
'.pot',
|
|
209
|
+
'.plt',
|
|
210
|
+
# File endings
|
|
211
|
+
'-doc',
|
|
212
|
+
'changelog',
|
|
213
|
+
'config',
|
|
214
|
+
'copying',
|
|
215
|
+
'license',
|
|
216
|
+
'authors',
|
|
217
|
+
'news',
|
|
218
|
+
'licenses',
|
|
219
|
+
'notice',
|
|
220
|
+
'readme',
|
|
221
|
+
'swiftdoc',
|
|
222
|
+
'texidoc',
|
|
223
|
+
'todo',
|
|
224
|
+
'version',
|
|
225
|
+
'ignore',
|
|
226
|
+
'manifest',
|
|
227
|
+
'sqlite',
|
|
228
|
+
'sqlite3',
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class FileFilters(ScanossBase):
|
|
233
|
+
"""
|
|
234
|
+
Filter for determining which files to process during scanning, fingerprinting, etc.
|
|
235
|
+
Handles both inclusion and exclusion rules based on file paths, extensions, and sizes.
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
def __init__(
|
|
239
|
+
self,
|
|
240
|
+
debug: bool = False,
|
|
241
|
+
trace: bool = False,
|
|
242
|
+
quiet: bool = False,
|
|
243
|
+
scanoss_settings: 'ScanossSettings | None' = None,
|
|
244
|
+
all_extensions: bool = False,
|
|
245
|
+
all_folders: bool = False,
|
|
246
|
+
hidden_files_folders: bool = False,
|
|
247
|
+
operation_type: str = 'scanning',
|
|
248
|
+
skip_size: int = 0,
|
|
249
|
+
skip_extensions = None,
|
|
250
|
+
skip_folders = None
|
|
251
|
+
):
|
|
252
|
+
"""
|
|
253
|
+
Initialize scan filters based on default settings. Optionally append custom settings.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
debug (bool): Enable debug output
|
|
257
|
+
trace (bool): Enable trace output
|
|
258
|
+
quiet (bool): Suppress output
|
|
259
|
+
scanoss_settings (ScanossSettings): Custom settings to override defaults
|
|
260
|
+
all_extensions (bool): Include all file extensions
|
|
261
|
+
all_folders (bool): Include all folders
|
|
262
|
+
hidden_files_folders (bool): Include hidden files and folders
|
|
263
|
+
operation_type: operation type. can be either 'scanning' or 'fingerprinting'
|
|
264
|
+
"""
|
|
265
|
+
super().__init__(debug, trace, quiet)
|
|
266
|
+
|
|
267
|
+
if skip_folders is None:
|
|
268
|
+
skip_folders = []
|
|
269
|
+
if skip_extensions is None:
|
|
270
|
+
skip_extensions = []
|
|
271
|
+
self.hidden_files_folders = hidden_files_folders
|
|
272
|
+
self.scanoss_settings = scanoss_settings
|
|
273
|
+
self.all_extensions = all_extensions
|
|
274
|
+
self.all_folders = all_folders
|
|
275
|
+
self.skip_folders = skip_folders
|
|
276
|
+
self.skip_size = skip_size
|
|
277
|
+
self.skip_extensions = skip_extensions
|
|
278
|
+
self.file_folder_pat_spec = self._get_file_folder_pattern_spec(operation_type)
|
|
279
|
+
self.size_pat_rules = self._get_size_limit_pattern_rules(operation_type)
|
|
280
|
+
|
|
281
|
+
def get_filtered_files_from_folder(self, root: str) -> List[str]:
|
|
282
|
+
"""
|
|
283
|
+
Retrieve a list of files to scan or fingerprint from a given directory root based on filter settings.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
root (str): Root directory to scan or fingerprint
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
list[str]: Filtered list of files to scan or fingerprint
|
|
290
|
+
"""
|
|
291
|
+
if self.debug:
|
|
292
|
+
if self.file_folder_pat_spec:
|
|
293
|
+
self.print_stderr(f'Running with {len(self.file_folder_pat_spec)} pattern filters.')
|
|
294
|
+
if self.size_pat_rules:
|
|
295
|
+
self.print_stderr(f'Running with {len(self.size_pat_rules)} size pattern rules.')
|
|
296
|
+
if self.skip_size:
|
|
297
|
+
self.print_stderr(f'Running with global skip size: {self.skip_size}')
|
|
298
|
+
if self.skip_extensions:
|
|
299
|
+
self.print_stderr(f'Running with extra global skip extensions: {self.skip_extensions}')
|
|
300
|
+
if self.skip_folders:
|
|
301
|
+
self.print_stderr(f'Running with extra global skip folders: {self.skip_folders}')
|
|
302
|
+
all_files = []
|
|
303
|
+
root_path = Path(root).resolve()
|
|
304
|
+
if not root_path.exists() or not root_path.is_dir():
|
|
305
|
+
self.print_stderr(f'ERROR: Specified root directory {root} does not exist or is not a directory.')
|
|
306
|
+
return all_files
|
|
307
|
+
# Walk the tree looking for files to process. While taking into account files/folders to skip
|
|
308
|
+
for dirpath, dirnames, filenames in os.walk(root_path):
|
|
309
|
+
dirpath = Path(dirpath)
|
|
310
|
+
rel_path = dirpath.relative_to(root_path)
|
|
311
|
+
if dirpath.is_symlink(): # TODO should we skip symlink folders?
|
|
312
|
+
self.print_msg(f'WARNING: Found symbolic link folder: {dirpath}')
|
|
313
|
+
|
|
314
|
+
if self._should_skip_dir(str(rel_path)): # Current directory should be skipped
|
|
315
|
+
dirnames.clear()
|
|
316
|
+
continue
|
|
317
|
+
for filename in filenames:
|
|
318
|
+
file_path = dirpath / filename
|
|
319
|
+
all_files.append(str(file_path))
|
|
320
|
+
# End os.walk loop
|
|
321
|
+
# Now filter the files and return the reduced list
|
|
322
|
+
return self.get_filtered_files_from_files(all_files, str(root_path))
|
|
323
|
+
|
|
324
|
+
def get_filtered_files_from_files(self, files: List[str], scan_root: str = None) -> List[str]:
|
|
325
|
+
"""
|
|
326
|
+
Retrieve a list of files to scan or fingerprint from a given list of files based on filter settings.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
files (List[str]): List of files to scan or fingerprint
|
|
330
|
+
scan_root (str): Root directory to scan or fingerprint
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
list[str]: Filtered list of files to scan or fingerprint
|
|
334
|
+
"""
|
|
335
|
+
filtered_files = []
|
|
336
|
+
for file_path in files:
|
|
337
|
+
if not os.path.exists(file_path) or not os.path.isfile(file_path) or os.path.islink(file_path):
|
|
338
|
+
self.print_debug(
|
|
339
|
+
f'WARNING: File {file_path} does not exist, is not a file, or is a symbolic link. Ignoring.'
|
|
340
|
+
)
|
|
341
|
+
continue
|
|
342
|
+
try:
|
|
343
|
+
if scan_root:
|
|
344
|
+
rel_path = os.path.relpath(file_path, scan_root)
|
|
345
|
+
else:
|
|
346
|
+
rel_path = os.path.relpath(file_path)
|
|
347
|
+
except ValueError:
|
|
348
|
+
# If file_path is broken, symlink ignore it
|
|
349
|
+
self.print_debug(f'Ignoring file: {file_path} (broken symlink)')
|
|
350
|
+
continue
|
|
351
|
+
if self._should_skip_file(rel_path):
|
|
352
|
+
continue
|
|
353
|
+
try:
|
|
354
|
+
file_size = os.path.getsize(file_path)
|
|
355
|
+
if file_size == 0:
|
|
356
|
+
self.print_debug(f'Skipping file: {rel_path} (empty file)')
|
|
357
|
+
continue
|
|
358
|
+
min_size, max_size = self._get_operation_size_limits(file_path)
|
|
359
|
+
if min_size <= file_size <= max_size:
|
|
360
|
+
filtered_files.append(rel_path)
|
|
361
|
+
else:
|
|
362
|
+
self.print_debug(
|
|
363
|
+
f'Skipping file: {rel_path} (size {file_size} outside limits {min_size}-{max_size})'
|
|
364
|
+
)
|
|
365
|
+
except OSError as e:
|
|
366
|
+
self.print_debug(f'Error getting size for {rel_path}: {e}')
|
|
367
|
+
# End file loop
|
|
368
|
+
return filtered_files
|
|
369
|
+
|
|
370
|
+
def _get_file_folder_pattern_spec(self, operation_type: str = 'scanning'):
|
|
371
|
+
"""
|
|
372
|
+
Get file path pattern specification.
|
|
373
|
+
|
|
374
|
+
:param operation_type: which operation is being performed
|
|
375
|
+
:return: List of file path patterns
|
|
376
|
+
"""
|
|
377
|
+
patterns = self._get_operation_patterns(operation_type)
|
|
378
|
+
if patterns:
|
|
379
|
+
return GitIgnoreSpec.from_lines(patterns)
|
|
380
|
+
return None
|
|
381
|
+
|
|
382
|
+
def _get_size_limit_pattern_rules(self, operation_type: str = 'scanning'):
|
|
383
|
+
"""
|
|
384
|
+
Get size limit pattern rules.
|
|
385
|
+
|
|
386
|
+
:param operation_type: which operation is being performed
|
|
387
|
+
:return: List of size limit pattern rules
|
|
388
|
+
"""
|
|
389
|
+
if self.scanoss_settings:
|
|
390
|
+
size_rules = self.scanoss_settings.get_skip_sizes(operation_type)
|
|
391
|
+
if size_rules:
|
|
392
|
+
size_rules_with_patterns = []
|
|
393
|
+
for rule in size_rules:
|
|
394
|
+
patterns = rule.get('patterns', [])
|
|
395
|
+
if not patterns:
|
|
396
|
+
continue
|
|
397
|
+
size_rules_with_patterns.append(rule)
|
|
398
|
+
return size_rules_with_patterns
|
|
399
|
+
return None
|
|
400
|
+
|
|
401
|
+
def _get_operation_patterns(self, operation_type: str) -> List[str]:
|
|
402
|
+
"""
|
|
403
|
+
Get patterns specific to the operation type, combining defaults with settings.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
operation_type (str): Type of operation ('scanning' or 'fingerprinting')
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
List[str]: Combined list of patterns to skip
|
|
410
|
+
"""
|
|
411
|
+
patterns = []
|
|
412
|
+
if self.scanoss_settings:
|
|
413
|
+
patterns.extend(self.scanoss_settings.get_skip_patterns(operation_type))
|
|
414
|
+
return patterns
|
|
415
|
+
|
|
416
|
+
def _get_operation_size_limits(self, file_path: str = None) -> tuple:
|
|
417
|
+
"""
|
|
418
|
+
Get size limits specific to the operation type and file path.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
file_path (str, optional): Path to the file to check against patterns. If None, returns default limits.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
tuple: (min_size, max_size) tuple for the given file path and operation type
|
|
425
|
+
"""
|
|
426
|
+
min_size = 0
|
|
427
|
+
max_size = sys.maxsize
|
|
428
|
+
# Apply global minimum file size if specified
|
|
429
|
+
if self.skip_size > 0:
|
|
430
|
+
min_size = self.skip_size
|
|
431
|
+
return min_size, max_size
|
|
432
|
+
# Return default size limits if no settings specified
|
|
433
|
+
if not self.scanoss_settings or not file_path or not self.size_pat_rules:
|
|
434
|
+
return min_size, max_size
|
|
435
|
+
try:
|
|
436
|
+
rel_path = os.path.relpath(file_path)
|
|
437
|
+
except ValueError:
|
|
438
|
+
rel_path = os.path.basename(file_path)
|
|
439
|
+
rel_path_lower = rel_path.lower()
|
|
440
|
+
# Cycle through each rule looking for a match
|
|
441
|
+
for rule in self.size_pat_rules:
|
|
442
|
+
patterns = rule.get('patterns', [])
|
|
443
|
+
if patterns:
|
|
444
|
+
path_spec = GitIgnoreSpec.from_lines(patterns)
|
|
445
|
+
if path_spec.match_file(rel_path_lower):
|
|
446
|
+
return rule.get('min', min_size), rule.get('max', max_size)
|
|
447
|
+
# End rules loop
|
|
448
|
+
return min_size, max_size
|
|
449
|
+
|
|
450
|
+
def _should_skip_dir(self, dir_rel_path: str) -> bool:
|
|
451
|
+
"""
|
|
452
|
+
Check if a directory should be skipped based on operation type and default rules.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
dir_rel_path (str): Relative path to the directory
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
bool: True if directory should be skipped, False otherwise
|
|
459
|
+
"""
|
|
460
|
+
dir_name = os.path.basename(dir_rel_path)
|
|
461
|
+
dir_path = Path(dir_rel_path)
|
|
462
|
+
if (
|
|
463
|
+
not self.hidden_files_folders
|
|
464
|
+
and dir_path != Path('.')
|
|
465
|
+
and any(part.startswith('.') for part in dir_path.parts)
|
|
466
|
+
):
|
|
467
|
+
self.print_debug(f'Skipping directory: {dir_rel_path} (hidden directory)')
|
|
468
|
+
return True
|
|
469
|
+
if self.all_folders:
|
|
470
|
+
return False
|
|
471
|
+
dir_name_lower = dir_name.lower()
|
|
472
|
+
if dir_name_lower in DEFAULT_SKIPPED_DIRS:
|
|
473
|
+
self.print_debug(f'Skipping directory: {dir_rel_path} (matches default skip directory)')
|
|
474
|
+
return True
|
|
475
|
+
if self.skip_folders and dir_name in self.skip_folders:
|
|
476
|
+
self.print_debug(f'Skipping directory: {dir_rel_path} (matches skip folder)')
|
|
477
|
+
return True
|
|
478
|
+
for ext in DEFAULT_SKIPPED_DIR_EXT:
|
|
479
|
+
if dir_name_lower.endswith(ext):
|
|
480
|
+
self.print_debug(f'Skipping directory: {dir_rel_path} (matches default skip extension: {ext})')
|
|
481
|
+
return True
|
|
482
|
+
|
|
483
|
+
if self.file_folder_pat_spec and self.file_folder_pat_spec.match_file(dir_rel_path):
|
|
484
|
+
self.print_debug(f'Skipping directory: {dir_rel_path} (matches custom pattern)')
|
|
485
|
+
return True
|
|
486
|
+
return False
|
|
487
|
+
|
|
488
|
+
def _should_skip_file(self, file_rel_path: str) -> bool:
|
|
489
|
+
"""
|
|
490
|
+
Check if a file should be skipped based on operation type and default rules.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
file_rel_path (str): Relative path to the file
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
bool: True if file should be skipped, False otherwise
|
|
497
|
+
"""
|
|
498
|
+
file_name = os.path.basename(file_rel_path)
|
|
499
|
+
|
|
500
|
+
if not self.hidden_files_folders and file_name.startswith('.'):
|
|
501
|
+
self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')
|
|
502
|
+
return True
|
|
503
|
+
if self.all_extensions:
|
|
504
|
+
return False
|
|
505
|
+
file_name_lower = file_name.lower()
|
|
506
|
+
# Look for exact files
|
|
507
|
+
if file_name_lower in DEFAULT_SKIPPED_FILES:
|
|
508
|
+
self.print_debug(f'Skipping file: {file_rel_path} (matches default skip file)')
|
|
509
|
+
return True
|
|
510
|
+
# Look for file endings
|
|
511
|
+
for ending in DEFAULT_SKIPPED_EXT:
|
|
512
|
+
if file_name_lower.endswith(ending):
|
|
513
|
+
self.print_debug(f'Skipping file: {file_rel_path} (matches default skip ending: {ending})')
|
|
514
|
+
return True
|
|
515
|
+
# Look for custom (extra) endings
|
|
516
|
+
if self.skip_extensions:
|
|
517
|
+
for ending in self.skip_extensions:
|
|
518
|
+
if file_name_lower.endswith(ending):
|
|
519
|
+
self.print_debug(f'Skipping file: {file_rel_path} (matches skip extension)')
|
|
520
|
+
return True
|
|
521
|
+
# Check for file patterns
|
|
522
|
+
if self.file_folder_pat_spec and self.file_folder_pat_spec.match_file(file_rel_path):
|
|
523
|
+
self.print_debug(f'Skipping file: {file_rel_path} (matches custom pattern)')
|
|
524
|
+
return True
|
|
525
|
+
return False
|