scanoss 1.20.6__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protoc_gen_swagger/options/annotations_pb2.py +9 -12
- protoc_gen_swagger/options/annotations_pb2_grpc.py +1 -1
- protoc_gen_swagger/options/openapiv2_pb2.py +96 -98
- protoc_gen_swagger/options/openapiv2_pb2_grpc.py +1 -1
- scanoss/__init__.py +1 -1
- scanoss/api/common/v2/scanoss_common_pb2.py +20 -18
- scanoss/api/common/v2/scanoss_common_pb2_grpc.py +1 -1
- scanoss/api/components/v2/scanoss_components_pb2.py +38 -48
- scanoss/api/components/v2/scanoss_components_pb2_grpc.py +96 -142
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +42 -22
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +185 -75
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +32 -30
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +83 -75
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +49 -0
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +142 -0
- scanoss/api/scanning/v2/scanoss_scanning_pb2.py +20 -10
- scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +70 -40
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +18 -22
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +49 -71
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +27 -37
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +72 -109
- scanoss/cli.py +393 -84
- scanoss/components.py +21 -11
- scanoss/constants.py +12 -0
- scanoss/data/build_date.txt +1 -1
- scanoss/file_filters.py +272 -57
- scanoss/results.py +92 -109
- scanoss/scanners/__init__.py +23 -0
- scanoss/scanners/container_scanner.py +474 -0
- scanoss/scanners/folder_hasher.py +302 -0
- scanoss/scanners/scanner_config.py +73 -0
- scanoss/scanners/scanner_hfh.py +173 -0
- scanoss/scanoss_settings.py +9 -5
- scanoss/scanossbase.py +9 -3
- scanoss/scanossgrpc.py +143 -18
- scanoss/threadedscanning.py +6 -6
- scanoss/utils/abstract_presenter.py +103 -0
- scanoss/utils/crc64.py +96 -0
- scanoss/utils/simhash.py +198 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/METADATA +2 -1
- scanoss-1.23.0.dist-info/RECORD +83 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/WHEEL +1 -1
- scanoss/api/provenance/v2/scanoss_provenance_pb2.py +0 -42
- scanoss/api/provenance/v2/scanoss_provenance_pb2_grpc.py +0 -108
- scanoss-1.20.6.dist-info/RECORD +0 -74
- /scanoss/api/{provenance → geoprovenance}/__init__.py +0 -0
- /scanoss/api/{provenance → geoprovenance}/v2/__init__.py +0 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/entry_points.txt +0 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/licenses/LICENSE +0 -0
- {scanoss-1.20.6.dist-info → scanoss-1.23.0.dist-info}/top_level.txt +0 -0
scanoss/components.py
CHANGED
|
@@ -39,7 +39,7 @@ class Components(ScanossBase):
|
|
|
39
39
|
Class for Component functionality
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
|
-
def __init__(
|
|
42
|
+
def __init__( # noqa: PLR0913, PLR0915
|
|
43
43
|
self,
|
|
44
44
|
debug: bool = False,
|
|
45
45
|
trace: bool = False,
|
|
@@ -244,7 +244,7 @@ class Components(ScanossBase):
|
|
|
244
244
|
self._close_file(output_file, file)
|
|
245
245
|
return success
|
|
246
246
|
|
|
247
|
-
def search_components(
|
|
247
|
+
def search_components( # noqa: PLR0913, PLR0915
|
|
248
248
|
self,
|
|
249
249
|
output_file: str = None,
|
|
250
250
|
json_file: str = None,
|
|
@@ -330,14 +330,20 @@ class Components(ScanossBase):
|
|
|
330
330
|
self._close_file(output_file, file)
|
|
331
331
|
return success
|
|
332
332
|
|
|
333
|
-
def get_provenance_details(
|
|
333
|
+
def get_provenance_details(
|
|
334
|
+
self, json_file: str = None, purls: [] = None, output_file: str = None, origin: bool = False
|
|
335
|
+
) -> bool:
|
|
334
336
|
"""
|
|
335
|
-
Retrieve the
|
|
337
|
+
Retrieve the provenance details for the supplied PURLs
|
|
336
338
|
|
|
337
|
-
:
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
339
|
+
Args:
|
|
340
|
+
json_file (str, optional): Input JSON file. Defaults to None.
|
|
341
|
+
purls (None, optional): PURLs to retrieve provenance details for. Defaults to None.
|
|
342
|
+
output_file (str, optional): Output file. Defaults to None.
|
|
343
|
+
origin (bool, optional): Retrieve origin details. Defaults to False.
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
bool: True on success, False otherwise
|
|
341
347
|
"""
|
|
342
348
|
success = False
|
|
343
349
|
purls_request = self.load_purls(json_file, purls)
|
|
@@ -346,12 +352,16 @@ class Components(ScanossBase):
|
|
|
346
352
|
file = self._open_file_or_sdtout(output_file)
|
|
347
353
|
if file is None:
|
|
348
354
|
return False
|
|
349
|
-
|
|
350
|
-
|
|
355
|
+
if origin:
|
|
356
|
+
self.print_msg('Sending PURLs to Geo Provenance Origin API for decoration...')
|
|
357
|
+
response = self.grpc_api.get_provenance_origin(purls_request)
|
|
358
|
+
else:
|
|
359
|
+
self.print_msg('Sending PURLs to Geo Provenance Declared API for decoration...')
|
|
360
|
+
response = self.grpc_api.get_provenance_json(purls_request)
|
|
351
361
|
if response:
|
|
352
362
|
print(json.dumps(response, indent=2, sort_keys=True), file=file)
|
|
353
363
|
success = True
|
|
354
364
|
if output_file:
|
|
355
365
|
self.print_msg(f'Results written to: {output_file}')
|
|
356
366
|
self._close_file(output_file, file)
|
|
357
|
-
return success
|
|
367
|
+
return success
|
scanoss/constants.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
DEFAULT_POST_SIZE = 32
|
|
2
|
+
DEFAULT_TIMEOUT = 180
|
|
3
|
+
DEFAULT_RETRY = 5
|
|
4
|
+
MIN_TIMEOUT = 5
|
|
5
|
+
|
|
6
|
+
PYTHON_MAJOR_VERSION = 3
|
|
7
|
+
|
|
8
|
+
DEFAULT_SC_TIMEOUT = 600
|
|
9
|
+
DEFAULT_NB_THREADS = 5
|
|
10
|
+
|
|
11
|
+
DEFAULT_URL = 'https://api.osskb.org' # default free service URL
|
|
12
|
+
DEFAULT_URL2 = 'https://api.scanoss.com' # default premium service URL
|
scanoss/data/build_date.txt
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
date:
|
|
1
|
+
date: 20250425091203, utime: 1745572323
|
scanoss/file_filters.py
CHANGED
|
@@ -29,7 +29,6 @@ from typing import List
|
|
|
29
29
|
|
|
30
30
|
from pathspec import GitIgnoreSpec
|
|
31
31
|
|
|
32
|
-
from .scanoss_settings import ScanossSettings
|
|
33
32
|
from .scanossbase import ScanossBase
|
|
34
33
|
|
|
35
34
|
# Files to skip
|
|
@@ -47,6 +46,19 @@ DEFAULT_SKIPPED_FILES = {
|
|
|
47
46
|
'copying.lib',
|
|
48
47
|
'makefile',
|
|
49
48
|
}
|
|
49
|
+
|
|
50
|
+
DEFAULT_SKIPPED_FILES_HFH = {
|
|
51
|
+
'gradlew',
|
|
52
|
+
'gradlew.bat',
|
|
53
|
+
'mvnw',
|
|
54
|
+
'mvnw.cmd',
|
|
55
|
+
'gradle-wrapper.jar',
|
|
56
|
+
'maven-wrapper.jar',
|
|
57
|
+
'thumbs.db',
|
|
58
|
+
'babel.config.js',
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
50
62
|
# Folders to skip
|
|
51
63
|
DEFAULT_SKIPPED_DIRS = {
|
|
52
64
|
'nbproject',
|
|
@@ -59,9 +71,34 @@ DEFAULT_SKIPPED_DIRS = {
|
|
|
59
71
|
'wheels',
|
|
60
72
|
'htmlcov',
|
|
61
73
|
'__pypackages__',
|
|
74
|
+
'example',
|
|
75
|
+
'examples',
|
|
76
|
+
'docs',
|
|
77
|
+
'tests',
|
|
78
|
+
'doc',
|
|
79
|
+
'test',
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
DEFAULT_SKIPPED_DIRS_HFH = {
|
|
83
|
+
'nbproject',
|
|
84
|
+
'nbbuild',
|
|
85
|
+
'nbdist',
|
|
86
|
+
'__pycache__',
|
|
87
|
+
'venv',
|
|
88
|
+
'_yardoc',
|
|
89
|
+
'eggs',
|
|
90
|
+
'wheels',
|
|
91
|
+
'htmlcov',
|
|
92
|
+
'__pypackages__',
|
|
93
|
+
'example',
|
|
94
|
+
'examples',
|
|
62
95
|
}
|
|
96
|
+
|
|
97
|
+
|
|
63
98
|
# Folder endings to skip
|
|
64
99
|
DEFAULT_SKIPPED_DIR_EXT = {'.egg-info'}
|
|
100
|
+
DEFAULT_SKIPPED_DIR_EXT_HFH = {'.egg-info'}
|
|
101
|
+
|
|
65
102
|
# File extensions to skip
|
|
66
103
|
DEFAULT_SKIPPED_EXT = {
|
|
67
104
|
'.1',
|
|
@@ -205,6 +242,16 @@ DEFAULT_SKIPPED_EXT = {
|
|
|
205
242
|
'.gml',
|
|
206
243
|
'.pot',
|
|
207
244
|
'.plt',
|
|
245
|
+
'.whml',
|
|
246
|
+
'.pom',
|
|
247
|
+
'.smtml',
|
|
248
|
+
'.min.js',
|
|
249
|
+
'.mf',
|
|
250
|
+
'.base64',
|
|
251
|
+
'.s',
|
|
252
|
+
'.diff',
|
|
253
|
+
'.patch',
|
|
254
|
+
'.rules',
|
|
208
255
|
# File endings
|
|
209
256
|
'-doc',
|
|
210
257
|
'changelog',
|
|
@@ -226,6 +273,162 @@ DEFAULT_SKIPPED_EXT = {
|
|
|
226
273
|
'sqlite3',
|
|
227
274
|
}
|
|
228
275
|
|
|
276
|
+
# TODO: For hfh add the .gitignore patterns
|
|
277
|
+
DEFAULT_SKIPPED_EXT_HFH = {
|
|
278
|
+
'.1',
|
|
279
|
+
'.2',
|
|
280
|
+
'.3',
|
|
281
|
+
'.4',
|
|
282
|
+
'.5',
|
|
283
|
+
'.6',
|
|
284
|
+
'.7',
|
|
285
|
+
'.8',
|
|
286
|
+
'.9',
|
|
287
|
+
'.ac',
|
|
288
|
+
'.adoc',
|
|
289
|
+
'.am',
|
|
290
|
+
'.asciidoc',
|
|
291
|
+
'.bmp',
|
|
292
|
+
'.build',
|
|
293
|
+
'.cfg',
|
|
294
|
+
'.chm',
|
|
295
|
+
'.class',
|
|
296
|
+
'.cmake',
|
|
297
|
+
'.cnf',
|
|
298
|
+
'.conf',
|
|
299
|
+
'.config',
|
|
300
|
+
'.contributors',
|
|
301
|
+
'.copying',
|
|
302
|
+
'.crt',
|
|
303
|
+
'.csproj',
|
|
304
|
+
'.css',
|
|
305
|
+
'.csv',
|
|
306
|
+
'.dat',
|
|
307
|
+
'.data',
|
|
308
|
+
'.dtd',
|
|
309
|
+
'.dts',
|
|
310
|
+
'.iws',
|
|
311
|
+
'.c9',
|
|
312
|
+
'.c9revisions',
|
|
313
|
+
'.dtsi',
|
|
314
|
+
'.dump',
|
|
315
|
+
'.eot',
|
|
316
|
+
'.eps',
|
|
317
|
+
'.geojson',
|
|
318
|
+
'.gif',
|
|
319
|
+
'.glif',
|
|
320
|
+
'.gmo',
|
|
321
|
+
'.guess',
|
|
322
|
+
'.hex',
|
|
323
|
+
'.htm',
|
|
324
|
+
'.html',
|
|
325
|
+
'.ico',
|
|
326
|
+
'.iml',
|
|
327
|
+
'.in',
|
|
328
|
+
'.inc',
|
|
329
|
+
'.info',
|
|
330
|
+
'.ini',
|
|
331
|
+
'.ipynb',
|
|
332
|
+
'.jpeg',
|
|
333
|
+
'.jpg',
|
|
334
|
+
'.json',
|
|
335
|
+
'.jsonld',
|
|
336
|
+
'.lock',
|
|
337
|
+
'.log',
|
|
338
|
+
'.m4',
|
|
339
|
+
'.map',
|
|
340
|
+
'.md5',
|
|
341
|
+
'.meta',
|
|
342
|
+
'.mk',
|
|
343
|
+
'.mxml',
|
|
344
|
+
'.o',
|
|
345
|
+
'.otf',
|
|
346
|
+
'.out',
|
|
347
|
+
'.pbtxt',
|
|
348
|
+
'.pdf',
|
|
349
|
+
'.pem',
|
|
350
|
+
'.phtml',
|
|
351
|
+
'.plist',
|
|
352
|
+
'.png',
|
|
353
|
+
'.prefs',
|
|
354
|
+
'.properties',
|
|
355
|
+
'.pyc',
|
|
356
|
+
'.qdoc',
|
|
357
|
+
'.result',
|
|
358
|
+
'.rgb',
|
|
359
|
+
'.rst',
|
|
360
|
+
'.scss',
|
|
361
|
+
'.sha',
|
|
362
|
+
'.sha1',
|
|
363
|
+
'.sha2',
|
|
364
|
+
'.sha256',
|
|
365
|
+
'.sln',
|
|
366
|
+
'.spec',
|
|
367
|
+
'.sub',
|
|
368
|
+
'.svg',
|
|
369
|
+
'.svn-base',
|
|
370
|
+
'.tab',
|
|
371
|
+
'.template',
|
|
372
|
+
'.test',
|
|
373
|
+
'.tex',
|
|
374
|
+
'.tiff',
|
|
375
|
+
'.ttf',
|
|
376
|
+
'.txt',
|
|
377
|
+
'.utf-8',
|
|
378
|
+
'.vim',
|
|
379
|
+
'.wav',
|
|
380
|
+
'.woff',
|
|
381
|
+
'.woff2',
|
|
382
|
+
'.xht',
|
|
383
|
+
'.xhtml',
|
|
384
|
+
'.xml',
|
|
385
|
+
'.xpm',
|
|
386
|
+
'.xsd',
|
|
387
|
+
'.xul',
|
|
388
|
+
'.yaml',
|
|
389
|
+
'.yml',
|
|
390
|
+
'.wfp',
|
|
391
|
+
'.editorconfig',
|
|
392
|
+
'.dotcover',
|
|
393
|
+
'.pid',
|
|
394
|
+
'.lcov',
|
|
395
|
+
'.egg',
|
|
396
|
+
'.manifest',
|
|
397
|
+
'.cache',
|
|
398
|
+
'.coverage',
|
|
399
|
+
'.cover',
|
|
400
|
+
'.gem',
|
|
401
|
+
'.lst',
|
|
402
|
+
'.pickle',
|
|
403
|
+
'.pdb',
|
|
404
|
+
'.gml',
|
|
405
|
+
'.pot',
|
|
406
|
+
'.plt',
|
|
407
|
+
'.whml',
|
|
408
|
+
'.pom',
|
|
409
|
+
'.smtml',
|
|
410
|
+
'.min.js',
|
|
411
|
+
'.mf',
|
|
412
|
+
'.base64',
|
|
413
|
+
'.s',
|
|
414
|
+
'.diff',
|
|
415
|
+
'.patch',
|
|
416
|
+
'.rules',
|
|
417
|
+
# File endings
|
|
418
|
+
'-doc',
|
|
419
|
+
'config',
|
|
420
|
+
'news',
|
|
421
|
+
'readme',
|
|
422
|
+
'swiftdoc',
|
|
423
|
+
'texidoc',
|
|
424
|
+
'todo',
|
|
425
|
+
'version',
|
|
426
|
+
'ignore',
|
|
427
|
+
'manifest',
|
|
428
|
+
'sqlite',
|
|
429
|
+
'sqlite3',
|
|
430
|
+
}
|
|
431
|
+
|
|
229
432
|
|
|
230
433
|
class FileFilters(ScanossBase):
|
|
231
434
|
"""
|
|
@@ -233,20 +436,7 @@ class FileFilters(ScanossBase):
|
|
|
233
436
|
Handles both inclusion and exclusion rules based on file paths, extensions, and sizes.
|
|
234
437
|
"""
|
|
235
438
|
|
|
236
|
-
def __init__(
|
|
237
|
-
self,
|
|
238
|
-
debug: bool = False,
|
|
239
|
-
trace: bool = False,
|
|
240
|
-
quiet: bool = False,
|
|
241
|
-
scanoss_settings: 'ScanossSettings | None' = None,
|
|
242
|
-
all_extensions: bool = False,
|
|
243
|
-
all_folders: bool = False,
|
|
244
|
-
hidden_files_folders: bool = False,
|
|
245
|
-
operation_type: str = 'scanning',
|
|
246
|
-
skip_size: int = 0,
|
|
247
|
-
skip_extensions=None,
|
|
248
|
-
skip_folders=None,
|
|
249
|
-
):
|
|
439
|
+
def __init__(self, debug: bool = False, trace: bool = False, quiet: bool = False, **kwargs):
|
|
250
440
|
"""
|
|
251
441
|
Initialize scan filters based on default settings. Optionally append custom settings.
|
|
252
442
|
|
|
@@ -254,27 +444,29 @@ class FileFilters(ScanossBase):
|
|
|
254
444
|
debug (bool): Enable debug output
|
|
255
445
|
trace (bool): Enable trace output
|
|
256
446
|
quiet (bool): Suppress output
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
447
|
+
**kwargs: Additional arguments including:
|
|
448
|
+
scanoss_settings (ScanossSettings): Custom settings to override defaults
|
|
449
|
+
all_extensions (bool): Include all file extensions
|
|
450
|
+
all_folders (bool): Include all folders
|
|
451
|
+
hidden_files_folders (bool): Include hidden files and folders
|
|
452
|
+
operation_type (str): Operation type ('scanning' or 'fingerprinting')
|
|
453
|
+
skip_size (int): Size to skip
|
|
454
|
+
skip_extensions (list): Extensions to skip
|
|
455
|
+
skip_folders (list): Folders to skip
|
|
456
|
+
is_folder_hashing_scan (bool): Whether the operation is a folder hashing scan
|
|
262
457
|
"""
|
|
263
458
|
super().__init__(debug, trace, quiet)
|
|
264
459
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
self.
|
|
270
|
-
self.
|
|
271
|
-
self.
|
|
272
|
-
self.
|
|
273
|
-
self.
|
|
274
|
-
self.
|
|
275
|
-
self.skip_extensions = skip_extensions
|
|
276
|
-
self.file_folder_pat_spec = self._get_file_folder_pattern_spec(operation_type)
|
|
277
|
-
self.size_pat_rules = self._get_size_limit_pattern_rules(operation_type)
|
|
460
|
+
self.hidden_files_folders = kwargs.get('hidden_files_folders', False)
|
|
461
|
+
self.scanoss_settings = kwargs.get('scanoss_settings')
|
|
462
|
+
self.all_extensions = kwargs.get('all_extensions', False)
|
|
463
|
+
self.all_folders = kwargs.get('all_folders', False)
|
|
464
|
+
self.skip_folders = kwargs.get('skip_folders', [])
|
|
465
|
+
self.skip_size = kwargs.get('skip_size', 0)
|
|
466
|
+
self.skip_extensions = kwargs.get('skip_extensions', [])
|
|
467
|
+
self.is_folder_hashing_scan = kwargs.get('is_folder_hashing_scan', False)
|
|
468
|
+
self.file_folder_pat_spec = self._get_file_folder_pattern_spec(kwargs.get('operation_type', 'scanning'))
|
|
469
|
+
self.size_pat_rules = self._get_size_limit_pattern_rules(kwargs.get('operation_type', 'scanning'))
|
|
278
470
|
|
|
279
471
|
def get_filtered_files_from_folder(self, root: str) -> List[str]:
|
|
280
472
|
"""
|
|
@@ -304,16 +496,16 @@ class FileFilters(ScanossBase):
|
|
|
304
496
|
return all_files
|
|
305
497
|
# Walk the tree looking for files to process. While taking into account files/folders to skip
|
|
306
498
|
for dirpath, dirnames, filenames in os.walk(root_path):
|
|
307
|
-
|
|
308
|
-
rel_path =
|
|
309
|
-
if
|
|
310
|
-
self.print_msg(f'WARNING: Found symbolic link folder: {
|
|
499
|
+
dir_path = Path(dirpath)
|
|
500
|
+
rel_path = dir_path.relative_to(root_path)
|
|
501
|
+
if dir_path.is_symlink(): # TODO should we skip symlink folders?
|
|
502
|
+
self.print_msg(f'WARNING: Found symbolic link folder: {dir_path}')
|
|
311
503
|
|
|
312
|
-
if self.
|
|
504
|
+
if self.should_skip_dir(str(rel_path)): # Current directory should be skipped
|
|
313
505
|
dirnames.clear()
|
|
314
506
|
continue
|
|
315
507
|
for filename in filenames:
|
|
316
|
-
file_path =
|
|
508
|
+
file_path = dir_path / filename
|
|
317
509
|
all_files.append(str(file_path))
|
|
318
510
|
# End os.walk loop
|
|
319
511
|
# Now filter the files and return the reduced list
|
|
@@ -332,30 +524,36 @@ class FileFilters(ScanossBase):
|
|
|
332
524
|
"""
|
|
333
525
|
filtered_files = []
|
|
334
526
|
for file_path in files:
|
|
335
|
-
|
|
336
|
-
self.print_debug(
|
|
337
|
-
f'WARNING: File {file_path} does not exist, is not a file, or is a symbolic link. Ignoring.'
|
|
338
|
-
)
|
|
339
|
-
continue
|
|
527
|
+
path_obj = Path(file_path)
|
|
340
528
|
try:
|
|
341
529
|
if scan_root:
|
|
342
|
-
rel_path =
|
|
530
|
+
rel_path = path_obj.relative_to(scan_root)
|
|
343
531
|
else:
|
|
344
|
-
rel_path =
|
|
532
|
+
rel_path = str(path_obj)
|
|
345
533
|
except ValueError:
|
|
346
|
-
# If file_path is broken, symlink ignore it
|
|
347
534
|
self.print_debug(f'Ignoring file: {file_path} (broken symlink)')
|
|
348
535
|
continue
|
|
536
|
+
|
|
537
|
+
if not path_obj.exists() or not path_obj.is_file() or path_obj.is_symlink():
|
|
538
|
+
self.print_debug(
|
|
539
|
+
f'WARNING: File {rel_path} does not exist, is not a file, or is a symbolic link. Ignoring.'
|
|
540
|
+
)
|
|
541
|
+
continue
|
|
542
|
+
|
|
543
|
+
if not self.hidden_files_folders and any(part.startswith('.') for part in path_obj.parts):
|
|
544
|
+
self.print_debug(f'Skipping file: {rel_path} (in hidden directory or is hidden file)')
|
|
545
|
+
continue
|
|
546
|
+
|
|
349
547
|
if self._should_skip_file(rel_path):
|
|
350
548
|
continue
|
|
351
549
|
try:
|
|
352
|
-
file_size =
|
|
550
|
+
file_size = path_obj.stat().st_size
|
|
353
551
|
if file_size == 0:
|
|
354
552
|
self.print_debug(f'Skipping file: {rel_path} (empty file)')
|
|
355
553
|
continue
|
|
356
554
|
min_size, max_size = self._get_operation_size_limits(file_path)
|
|
357
555
|
if min_size <= file_size <= max_size:
|
|
358
|
-
filtered_files.append(rel_path)
|
|
556
|
+
filtered_files.append(str(rel_path))
|
|
359
557
|
else:
|
|
360
558
|
self.print_debug(
|
|
361
559
|
f'Skipping file: {rel_path} (size {file_size} outside limits {min_size}-{max_size})'
|
|
@@ -369,8 +567,11 @@ class FileFilters(ScanossBase):
|
|
|
369
567
|
"""
|
|
370
568
|
Get file path pattern specification.
|
|
371
569
|
|
|
372
|
-
:
|
|
373
|
-
|
|
570
|
+
Args:
|
|
571
|
+
operation_type (str): Type of operation ('scanning' or 'fingerprinting')
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
GitIgnoreSpec: GitIgnoreSpec object containing the file path patterns
|
|
374
575
|
"""
|
|
375
576
|
patterns = self._get_operation_patterns(operation_type)
|
|
376
577
|
if patterns:
|
|
@@ -381,8 +582,11 @@ class FileFilters(ScanossBase):
|
|
|
381
582
|
"""
|
|
382
583
|
Get size limit pattern rules.
|
|
383
584
|
|
|
384
|
-
:
|
|
385
|
-
|
|
585
|
+
Args:
|
|
586
|
+
operation_type (str): Type of operation ('scanning' or 'fingerprinting')
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
List of size limit pattern rules
|
|
386
590
|
"""
|
|
387
591
|
if self.scanoss_settings:
|
|
388
592
|
size_rules = self.scanoss_settings.get_skip_sizes(operation_type)
|
|
@@ -407,6 +611,14 @@ class FileFilters(ScanossBase):
|
|
|
407
611
|
List[str]: Combined list of patterns to skip
|
|
408
612
|
"""
|
|
409
613
|
patterns = []
|
|
614
|
+
|
|
615
|
+
# Default patterns for skipping directories
|
|
616
|
+
if not self.all_folders:
|
|
617
|
+
DEFAULT_SKIPPED_DIR_LIST = DEFAULT_SKIPPED_DIRS_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_DIRS
|
|
618
|
+
for dir_name in DEFAULT_SKIPPED_DIR_LIST:
|
|
619
|
+
patterns.append(f'{dir_name}/')
|
|
620
|
+
|
|
621
|
+
# Custom patterns added in SCANOSS settings file
|
|
410
622
|
if self.scanoss_settings:
|
|
411
623
|
patterns.extend(self.scanoss_settings.get_skip_patterns(operation_type))
|
|
412
624
|
return patterns
|
|
@@ -445,7 +657,7 @@ class FileFilters(ScanossBase):
|
|
|
445
657
|
# End rules loop
|
|
446
658
|
return min_size, max_size
|
|
447
659
|
|
|
448
|
-
def
|
|
660
|
+
def should_skip_dir(self, dir_rel_path: str) -> bool: # noqa: PLR0911
|
|
449
661
|
"""
|
|
450
662
|
Check if a directory should be skipped based on operation type and default rules.
|
|
451
663
|
|
|
@@ -483,7 +695,7 @@ class FileFilters(ScanossBase):
|
|
|
483
695
|
return True
|
|
484
696
|
return False
|
|
485
697
|
|
|
486
|
-
def _should_skip_file(self, file_rel_path: str) -> bool:
|
|
698
|
+
def _should_skip_file(self, file_rel_path: str) -> bool: # noqa: PLR0911
|
|
487
699
|
"""
|
|
488
700
|
Check if a file should be skipped based on operation type and default rules.
|
|
489
701
|
|
|
@@ -495,6 +707,9 @@ class FileFilters(ScanossBase):
|
|
|
495
707
|
"""
|
|
496
708
|
file_name = os.path.basename(file_rel_path)
|
|
497
709
|
|
|
710
|
+
DEFAULT_SKIPPED_FILES_LIST = DEFAULT_SKIPPED_FILES_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_FILES
|
|
711
|
+
DEFAULT_SKIPPED_EXT_LIST = DEFAULT_SKIPPED_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
|
|
712
|
+
|
|
498
713
|
if not self.hidden_files_folders and file_name.startswith('.'):
|
|
499
714
|
self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')
|
|
500
715
|
return True
|
|
@@ -502,11 +717,11 @@ class FileFilters(ScanossBase):
|
|
|
502
717
|
return False
|
|
503
718
|
file_name_lower = file_name.lower()
|
|
504
719
|
# Look for exact files
|
|
505
|
-
if file_name_lower in
|
|
720
|
+
if file_name_lower in DEFAULT_SKIPPED_FILES_LIST:
|
|
506
721
|
self.print_debug(f'Skipping file: {file_rel_path} (matches default skip file)')
|
|
507
722
|
return True
|
|
508
723
|
# Look for file endings
|
|
509
|
-
for ending in
|
|
724
|
+
for ending in DEFAULT_SKIPPED_EXT_LIST:
|
|
510
725
|
if file_name_lower.endswith(ending):
|
|
511
726
|
self.print_debug(f'Skipping file: {file_rel_path} (matches default skip ending: {ending})')
|
|
512
727
|
return True
|