scanoss 1.12.2__py3-none-any.whl → 1.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. protoc_gen_swagger/__init__.py +13 -13
  2. protoc_gen_swagger/options/__init__.py +13 -13
  3. protoc_gen_swagger/options/annotations_pb2.py +18 -12
  4. protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
  5. protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
  6. protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
  7. protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
  8. protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
  9. scanoss/__init__.py +18 -18
  10. scanoss/api/__init__.py +17 -17
  11. scanoss/api/common/__init__.py +17 -17
  12. scanoss/api/common/v2/__init__.py +17 -17
  13. scanoss/api/common/v2/scanoss_common_pb2.py +49 -20
  14. scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
  15. scanoss/api/components/__init__.py +17 -17
  16. scanoss/api/components/v2/__init__.py +17 -17
  17. scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
  18. scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
  19. scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -21
  20. scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +766 -13
  21. scanoss/api/dependencies/__init__.py +17 -17
  22. scanoss/api/dependencies/v2/__init__.py +17 -17
  23. scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -29
  24. scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +94 -8
  25. scanoss/api/geoprovenance/__init__.py +23 -0
  26. scanoss/api/geoprovenance/v2/__init__.py +23 -0
  27. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +92 -0
  28. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +381 -0
  29. scanoss/api/licenses/__init__.py +23 -0
  30. scanoss/api/licenses/v2/__init__.py +23 -0
  31. scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
  32. scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
  33. scanoss/api/scanning/__init__.py +17 -17
  34. scanoss/api/scanning/v2/__init__.py +17 -17
  35. scanoss/api/scanning/v2/scanoss_scanning_pb2.py +42 -13
  36. scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +86 -7
  37. scanoss/api/semgrep/__init__.py +17 -17
  38. scanoss/api/semgrep/v2/__init__.py +17 -17
  39. scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
  40. scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
  41. scanoss/api/vulnerabilities/__init__.py +17 -17
  42. scanoss/api/vulnerabilities/v2/__init__.py +17 -17
  43. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
  44. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
  45. scanoss/cli.py +2359 -370
  46. scanoss/components.py +187 -94
  47. scanoss/constants.py +22 -0
  48. scanoss/cryptography.py +308 -0
  49. scanoss/csvoutput.py +91 -58
  50. scanoss/cyclonedx.py +221 -63
  51. scanoss/data/build_date.txt +1 -1
  52. scanoss/data/osadl-copyleft.json +133 -0
  53. scanoss/data/scanoss-settings-schema.json +254 -0
  54. scanoss/delta.py +197 -0
  55. scanoss/export/__init__.py +23 -0
  56. scanoss/export/dependency_track.py +227 -0
  57. scanoss/file_filters.py +582 -0
  58. scanoss/filecount.py +75 -69
  59. scanoss/gitlabqualityreport.py +214 -0
  60. scanoss/header_filter.py +563 -0
  61. scanoss/inspection/__init__.py +23 -0
  62. scanoss/inspection/policy_check/__init__.py +0 -0
  63. scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
  64. scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
  65. scanoss/inspection/policy_check/policy_check.py +222 -0
  66. scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
  67. scanoss/inspection/policy_check/scanoss/copyleft.py +243 -0
  68. scanoss/inspection/policy_check/scanoss/undeclared_component.py +309 -0
  69. scanoss/inspection/summary/__init__.py +0 -0
  70. scanoss/inspection/summary/component_summary.py +170 -0
  71. scanoss/inspection/summary/license_summary.py +191 -0
  72. scanoss/inspection/summary/match_summary.py +341 -0
  73. scanoss/inspection/utils/file_utils.py +44 -0
  74. scanoss/inspection/utils/license_utils.py +123 -0
  75. scanoss/inspection/utils/markdown_utils.py +63 -0
  76. scanoss/inspection/utils/scan_result_processor.py +417 -0
  77. scanoss/osadl.py +125 -0
  78. scanoss/results.py +275 -0
  79. scanoss/scancodedeps.py +87 -38
  80. scanoss/scanner.py +431 -539
  81. scanoss/scanners/__init__.py +23 -0
  82. scanoss/scanners/container_scanner.py +476 -0
  83. scanoss/scanners/folder_hasher.py +358 -0
  84. scanoss/scanners/scanner_config.py +73 -0
  85. scanoss/scanners/scanner_hfh.py +252 -0
  86. scanoss/scanoss_settings.py +337 -0
  87. scanoss/scanossapi.py +140 -101
  88. scanoss/scanossbase.py +59 -22
  89. scanoss/scanossgrpc.py +799 -251
  90. scanoss/scanpostprocessor.py +294 -0
  91. scanoss/scantype.py +22 -21
  92. scanoss/services/dependency_track_service.py +132 -0
  93. scanoss/spdxlite.py +532 -174
  94. scanoss/threadeddependencies.py +148 -47
  95. scanoss/threadedscanning.py +53 -37
  96. scanoss/utils/__init__.py +23 -0
  97. scanoss/utils/abstract_presenter.py +103 -0
  98. scanoss/utils/crc64.py +96 -0
  99. scanoss/utils/file.py +84 -0
  100. scanoss/utils/scanoss_scan_results_utils.py +41 -0
  101. scanoss/utils/simhash.py +198 -0
  102. scanoss/winnowing.py +241 -63
  103. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/METADATA +18 -9
  104. scanoss-1.43.1.dist-info/RECORD +110 -0
  105. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/WHEEL +1 -1
  106. scanoss-1.12.2.dist-info/RECORD +0 -58
  107. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
  108. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info/licenses}/LICENSE +0 -0
  109. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0
scanoss/scanner.py CHANGED
@@ -1,89 +1,65 @@
1
1
  """
2
- SPDX-License-Identifier: MIT
3
-
4
- Copyright (c) 2021, SCANOSS
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining a copy
7
- of this software and associated documentation files (the "Software"), to deal
8
- in the Software without restriction, including without limitation the rights
9
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- copies of the Software, and to permit persons to whom the Software is
11
- furnished to do so, subject to the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be included in
14
- all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
- THE SOFTWARE.
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2021, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
23
  """
24
+
25
+ import datetime
24
26
  import json
25
27
  import os
26
28
  import sys
27
- import datetime
28
- import pkg_resources
29
+ from contextlib import nullcontext
30
+ from pathlib import Path
31
+ from typing import Any, Dict, List, Optional
29
32
 
30
- from progress.bar import Bar
33
+ import importlib_resources
31
34
  from progress.spinner import Spinner
32
35
  from pypac.parser import PACFile
33
36
 
34
- from .scanossapi import ScanossApi
35
- from .cyclonedx import CycloneDx
36
- from .spdxlite import SpdxLite
37
+ from scanoss.file_filters import FileFilters
38
+
39
+ from . import __version__
37
40
  from .csvoutput import CsvOutput
38
- from .threadedscanning import ThreadedScanning
41
+ from .cyclonedx import CycloneDx
39
42
  from .scancodedeps import ScancodeDeps
40
- from .threadeddependencies import ThreadedDependencies
43
+ from .scanoss_settings import ScanossSettings
44
+ from .scanossapi import ScanossApi
45
+ from .scanossbase import ScanossBase
41
46
  from .scanossgrpc import ScanossGrpc
47
+ from .scanpostprocessor import ScanPostProcessor
42
48
  from .scantype import ScanType
43
- from .scanossbase import ScanossBase
44
- from . import __version__
49
+ from .spdxlite import SpdxLite
50
+ from .threadeddependencies import SCOPE, ThreadedDependencies
51
+ from .threadedscanning import ThreadedScanning
45
52
 
46
53
  FAST_WINNOWING = False
47
54
  try:
48
55
  from scanoss_winnowing.winnowing import Winnowing
49
56
 
50
57
  FAST_WINNOWING = True
51
- except ModuleNotFoundError or ImportError:
58
+ except (ModuleNotFoundError, ImportError):
52
59
  FAST_WINNOWING = False
53
60
  from .winnowing import Winnowing
54
61
 
55
- FILTERED_DIRS = { # Folders to skip
56
- "nbproject", "nbbuild", "nbdist", "__pycache__", "venv", "_yardoc", "eggs", "wheels", "htmlcov", "__pypackages__"
57
- }
58
- FILTERED_DIR_EXT = { # Folder endings to skip
59
- ".egg-info"
60
- }
61
- FILTERED_EXT = [ # File extensions to skip
62
- ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".ac", ".adoc", ".am",
63
- ".asciidoc", ".bmp", ".build", ".cfg", ".chm", ".class", ".cmake", ".cnf",
64
- ".conf", ".config", ".contributors", ".copying", ".crt", ".csproj", ".css",
65
- ".csv", ".dat", ".data", ".doc", ".docx", ".dtd", ".dts", ".iws", ".c9", ".c9revisions",
66
- ".dtsi", ".dump", ".eot", ".eps", ".geojson", ".gdoc", ".gif",
67
- ".glif", ".gmo", ".gradle", ".guess", ".hex", ".htm", ".html", ".ico", ".iml",
68
- ".in", ".inc", ".info", ".ini", ".ipynb", ".jpeg", ".jpg", ".json", ".jsonld", ".lock",
69
- ".log", ".m4", ".map", ".markdown", ".md", ".md5", ".meta", ".mk", ".mxml",
70
- ".o", ".otf", ".out", ".pbtxt", ".pdf", ".pem", ".phtml", ".plist", ".png",
71
- ".po", ".ppt", ".prefs", ".properties", ".pyc", ".qdoc", ".result", ".rgb",
72
- ".rst", ".scss", ".sha", ".sha1", ".sha2", ".sha256", ".sln", ".spec", ".sql",
73
- ".sub", ".svg", ".svn-base", ".tab", ".template", ".test", ".tex", ".tiff",
74
- ".toml", ".ttf", ".txt", ".utf-8", ".vim", ".wav", ".woff", ".woff2", ".xht",
75
- ".xhtml", ".xls", ".xlsx", ".xml", ".xpm", ".xsd", ".xul", ".yaml", ".yml", ".wfp",
76
- ".editorconfig", ".dotcover", ".pid", ".lcov", ".egg", ".manifest", ".cache", ".coverage", ".cover",
77
- ".gem", ".lst", ".pickle", ".pdb", ".gml", ".pot", ".plt",
78
- # File endings
79
- "-doc", "changelog", "config", "copying", "license", "authors", "news", "licenses", "notice",
80
- "readme", "swiftdoc", "texidoc", "todo", "version", "ignore", "manifest", "sqlite", "sqlite3"
81
- ]
82
- FILTERED_FILES = { # Files to skip
83
- "gradlew", "gradlew.bat", "mvnw", "mvnw.cmd", "gradle-wrapper.jar", "maven-wrapper.jar",
84
- "thumbs.db", "babel.config.js", "license.txt", "license.md", "copying.lib", "makefile"
85
- }
86
- WFP_FILE_START = "file="
62
+ WFP_FILE_START = 'file='
87
63
  MAX_POST_SIZE = 64 * 1024 # 64k Max post size
88
64
 
89
65
 
@@ -93,29 +69,56 @@ class Scanner(ScanossBase):
93
69
  Handle the scanning of files, snippets and dependencies
94
70
  """
95
71
 
96
- def __init__(self, wfp: str = None, scan_output: str = None, output_format: str = 'plain',
97
- debug: bool = False, trace: bool = False, quiet: bool = False, api_key: str = None, url: str = None,
98
- sbom_path: str = None, scan_type: str = None, flags: str = None, nb_threads: int = 5,
99
- post_size: int = 32, timeout: int = 180, no_wfp_file: bool = False,
100
- all_extensions: bool = False, all_folders: bool = False, hidden_files_folders: bool = False,
101
- scan_options: int = 7, sc_timeout: int = 600, sc_command: str = None, grpc_url: str = None,
102
- obfuscate: bool = False, ignore_cert_errors: bool = False, proxy: str = None, grpc_proxy: str = None,
103
- ca_cert: str = None, pac: PACFile = None, retry: int = 5, hpsm: bool = False,
104
- skip_size: int = 0, skip_extensions=None, skip_folders=None,
105
- strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None
106
- ):
107
- """
108
- Initialise scanning class, including Winnowing, ScanossApi and ThreadedScanning
72
+ def __init__( # noqa: PLR0913, PLR0915
73
+ self,
74
+ scan_output: str = None,
75
+ output_format: str = 'plain',
76
+ debug: bool = False,
77
+ trace: bool = False,
78
+ quiet: bool = False,
79
+ api_key: str = None,
80
+ url: str = None,
81
+ flags: str = None,
82
+ nb_threads: int = 5,
83
+ post_size: int = 32,
84
+ timeout: int = 180,
85
+ all_extensions: bool = False,
86
+ all_folders: bool = False,
87
+ hidden_files_folders: bool = False,
88
+ scan_options: int = 7,
89
+ sc_timeout: int = 600,
90
+ sc_command: str = None,
91
+ grpc_url: str = None,
92
+ obfuscate: bool = False,
93
+ ignore_cert_errors: bool = False,
94
+ proxy: str = None,
95
+ grpc_proxy: str = None,
96
+ ca_cert: str = None,
97
+ pac: PACFile = None,
98
+ retry: int = 5,
99
+ hpsm: bool = False,
100
+ skip_size: int = 0,
101
+ skip_extensions=None,
102
+ skip_folders=None,
103
+ strip_hpsm_ids=None,
104
+ strip_snippet_ids=None,
105
+ skip_md5_ids=None,
106
+ scan_settings: 'ScanossSettings | None' = None,
107
+ req_headers: dict = None,
108
+ use_grpc: bool = False,
109
+ skip_headers: bool = False,
110
+ skip_headers_limit: int = 0,
111
+ ):
112
+ """
113
+ Initialise scanning class, including Winnowing, ScanossApi, ThreadedScanning
109
114
  """
110
115
  super().__init__(debug, trace, quiet)
111
116
  if skip_folders is None:
112
117
  skip_folders = []
113
118
  if skip_extensions is None:
114
119
  skip_extensions = []
115
- self.wfp = wfp if wfp else "scanner_output.wfp"
116
120
  self.scan_output = scan_output
117
121
  self.output_format = output_format
118
- self.no_wfp_file = no_wfp_file
119
122
  self.isatty = sys.stderr.isatty()
120
123
  self.all_extensions = all_extensions
121
124
  self.all_folders = all_folders
@@ -125,104 +128,81 @@ class Scanner(ScanossBase):
125
128
  self.hpsm = hpsm
126
129
  self.skip_folders = skip_folders
127
130
  self.skip_size = skip_size
131
+ self.skip_extensions = skip_extensions
132
+ self.req_headers = req_headers
128
133
  ver_details = Scanner.version_details()
129
134
 
130
- self.winnowing = Winnowing(debug=debug, quiet=quiet, skip_snippets=self._skip_snippets,
131
- all_extensions=all_extensions, obfuscate=obfuscate, hpsm=self.hpsm,
132
- strip_hpsm_ids=strip_hpsm_ids, strip_snippet_ids=strip_snippet_ids,
133
- skip_md5_ids=skip_md5_ids
134
- )
135
- self.scanoss_api = ScanossApi(debug=debug, trace=trace, quiet=quiet, api_key=api_key, url=url,
136
- sbom_path=sbom_path, scan_type=scan_type, flags=flags, timeout=timeout,
137
- ver_details=ver_details, ignore_cert_errors=ignore_cert_errors,
138
- proxy=proxy, ca_cert=ca_cert, pac=pac, retry=retry
139
- )
135
+ self.winnowing = Winnowing(
136
+ debug=debug,
137
+ trace=trace,
138
+ quiet=quiet,
139
+ skip_snippets=self._skip_snippets,
140
+ all_extensions=all_extensions,
141
+ obfuscate=obfuscate,
142
+ hpsm=self.hpsm,
143
+ strip_hpsm_ids=strip_hpsm_ids,
144
+ strip_snippet_ids=strip_snippet_ids,
145
+ skip_md5_ids=skip_md5_ids,
146
+ skip_headers=skip_headers,
147
+ skip_headers_limit=skip_headers_limit,
148
+ )
149
+ self.scanoss_api = ScanossApi(
150
+ debug=debug,
151
+ trace=trace,
152
+ quiet=quiet,
153
+ api_key=api_key,
154
+ url=url,
155
+ flags=flags,
156
+ timeout=timeout,
157
+ ver_details=ver_details,
158
+ ignore_cert_errors=ignore_cert_errors,
159
+ proxy=proxy,
160
+ ca_cert=ca_cert,
161
+ pac=pac,
162
+ retry=retry,
163
+ req_headers= self.req_headers,
164
+ )
140
165
  sc_deps = ScancodeDeps(debug=debug, quiet=quiet, trace=trace, timeout=sc_timeout, sc_command=sc_command)
141
- grpc_api = ScanossGrpc(url=grpc_url, debug=debug, quiet=quiet, trace=trace, api_key=api_key,
142
- ver_details=ver_details, ca_cert=ca_cert, proxy=proxy, pac=pac, grpc_proxy=grpc_proxy
143
- )
166
+ grpc_api = ScanossGrpc(
167
+ url=grpc_url,
168
+ debug=debug,
169
+ quiet=quiet,
170
+ trace=trace,
171
+ api_key=api_key,
172
+ ver_details=ver_details,
173
+ ca_cert=ca_cert,
174
+ proxy=proxy,
175
+ pac=pac,
176
+ grpc_proxy=grpc_proxy,
177
+ req_headers=self.req_headers,
178
+ ignore_cert_errors=ignore_cert_errors,
179
+ use_grpc=use_grpc
180
+ )
144
181
  self.threaded_deps = ThreadedDependencies(sc_deps, grpc_api, debug=debug, quiet=quiet, trace=trace)
145
182
  self.nb_threads = nb_threads
146
183
  if nb_threads and nb_threads > 0:
147
- self.threaded_scan = ThreadedScanning(self.scanoss_api, debug=debug, trace=trace, quiet=quiet,
148
- nb_threads=nb_threads
149
- )
184
+ self.threaded_scan = ThreadedScanning(
185
+ self.scanoss_api, debug=debug, trace=trace, quiet=quiet, nb_threads=nb_threads
186
+ )
150
187
  else:
151
188
  self.threaded_scan = None
152
189
  self.max_post_size = post_size * 1024 if post_size > 0 else MAX_POST_SIZE # Set the max post size (default 64k)
153
190
  self.post_file_count = post_size if post_size > 0 else 32 # Max number of files for any given POST (default 32)
154
191
  if self._skip_snippets:
155
192
  self.max_post_size = 8 * 1024 # 8k Max post size if we're skipping snippets
156
- self.skip_extensions = FILTERED_EXT
157
- if skip_extensions: # Append extra file extensions to skip
158
- self.skip_extensions.extend(skip_extensions)
159
193
 
160
- def __filter_files(self, files: list) -> list:
161
- """
162
- Filter which files should be considered for processing
163
- :param files: list of files to filter
164
- :return list of filtered files
165
- """
166
- file_list = []
167
- for f in files:
168
- ignore = False
169
- if f.startswith(".") and not self.hidden_files_folders: # Ignore all . files unless requested
170
- ignore = True
171
- if not ignore and not self.all_extensions: # Skip this check if we're allowing all extensions
172
- f_lower = f.lower()
173
- if f_lower in FILTERED_FILES: # Check for exact files to ignore
174
- ignore = True
175
- if not ignore:
176
- for ending in self.skip_extensions: # Check for file endings to ignore (static and user supplied)
177
- if ending and f_lower.endswith(ending):
178
- ignore = True
179
- break
180
- if not ignore:
181
- file_list.append(f)
182
- return file_list
183
-
184
- def __filter_dirs(self, dirs: list) -> list:
185
- """
186
- Filter which folders should be considered for processing
187
- :param dirs: list of directories to filter
188
- :return: list of filtered directories
189
- """
190
- dir_list = []
191
- for d in dirs:
192
- ignore = False
193
- if d.startswith(".") and not self.hidden_files_folders: # Ignore all . folders unless requested
194
- ignore = True
195
- if not ignore and not self.all_folders: # Skip this check if we're allowing all folders
196
- d_lower = d.lower()
197
- if d_lower in FILTERED_DIRS: # Ignore specific folders (case insensitive)
198
- ignore = True
199
- elif self.skip_folders and d in self.skip_folders: # Ignore user-supplied folders (case sensitive)
200
- ignore = True
201
- if not ignore:
202
- for de in FILTERED_DIR_EXT: # Ignore specific folder endings (case insensitive)
203
- if d_lower.endswith(de):
204
- ignore = True
205
- break
206
- if not ignore:
207
- dir_list.append(d)
208
- return dir_list
194
+ self.scan_settings = scan_settings
195
+ self.post_processor = (
196
+ ScanPostProcessor(scan_settings, debug=debug, trace=trace, quiet=quiet) if scan_settings else None
197
+ )
198
+ self._maybe_set_api_sbom()
209
199
 
210
- @staticmethod
211
- def __strip_dir(scan_dir: str, length: int, path: str) -> str:
212
- """
213
- Strip the leading string from the specified path
214
- Parameters
215
- ----------
216
- scan_dir: str
217
- Root path
218
- length: int
219
- length of the root path string
220
- path: str
221
- Path to strip
222
- """
223
- if length > 0 and path.startswith(scan_dir):
224
- path = path[length:]
225
- return path
200
+ def _maybe_set_api_sbom(self):
201
+ if not self.scan_settings:
202
+ return
203
+ sbom = self.scan_settings.get_sbom()
204
+ if sbom:
205
+ self.scanoss_api.set_sbom(sbom)
226
206
 
227
207
  @staticmethod
228
208
  def __count_files_in_wfp_file(wfp_file: str):
@@ -241,27 +221,6 @@ class Scanner(ScanossBase):
241
221
  count += 1
242
222
  return count
243
223
 
244
- @staticmethod
245
- def valid_json_file(json_file: str) -> bool:
246
- """
247
- Validate if the specified file is indeed valid JSON
248
- :param: str JSON file to load
249
- :return bool True if valid, False otherwise
250
- """
251
- if not json_file:
252
- Scanner.print_stderr('ERROR: No JSON file provided to parse.')
253
- return False
254
- if not os.path.isfile(json_file):
255
- Scanner.print_stderr(f'ERROR: JSON file does not exist or is not a file: {json_file}')
256
- return False
257
- try:
258
- with open(json_file) as f:
259
- json.load(f)
260
- except Exception as e:
261
- Scanner.print_stderr(f'Problem parsing JSON file "{json_file}": {e}')
262
- return False
263
- return True
264
-
265
224
  @staticmethod
266
225
  def version_details() -> str:
267
226
  """
@@ -270,9 +229,10 @@ class Scanner(ScanossBase):
270
229
  """
271
230
  data = None
272
231
  try:
273
- f_name = pkg_resources.resource_filename(__name__, 'data/build_date.txt')
274
- with open(f_name, 'r') as f:
275
- data = f.read().rstrip()
232
+ f_name = importlib_resources.files(__name__) / 'data/build_date.txt'
233
+ with importlib_resources.as_file(f_name) as f:
234
+ with open(f, 'r', encoding='utf-8') as file:
235
+ data = file.read().rstrip()
276
236
  except Exception as e:
277
237
  Scanner.print_stderr(f'Warning: Problem loading build time details: {e}')
278
238
  if not data or len(data) == 0:
@@ -287,7 +247,7 @@ class Scanner(ScanossBase):
287
247
  if not outfile and self.scan_output:
288
248
  outfile = self.scan_output
289
249
  if outfile:
290
- with open(outfile, "a") as rf:
250
+ with open(outfile, 'a') as rf:
291
251
  rf.write(string + '\n')
292
252
  else:
293
253
  print(string)
@@ -328,26 +288,45 @@ class Scanner(ScanossBase):
328
288
  return True
329
289
  return False
330
290
 
331
- def scan_folder_with_options(self, scan_dir: str, deps_file: str = None, file_map: dict = None) -> bool:
291
+ def scan_folder_with_options( # noqa: PLR0913
292
+ self,
293
+ scan_dir: str,
294
+ deps_file: str = None,
295
+ file_map: dict = None,
296
+ dep_scope: SCOPE = None,
297
+ dep_scope_include: str = None,
298
+ dep_scope_exclude: str = None,
299
+ ) -> bool:
332
300
  """
333
301
  Scan the given folder for whatever scaning options that have been configured
302
+ :param dep_scope_exclude: comma separated list of dependency scopes to exclude
303
+ :param dep_scope_include: comma separated list of dependency scopes to include
304
+ :param dep_scope: Enum dependency scope to use
334
305
  :param scan_dir: directory to scan
335
306
  :param deps_file: pre-parsed dependency file to decorate
336
307
  :param file_map: mapping of obfuscated files back into originals
337
308
  :return: True if successful, False otherwise
338
309
  """
310
+
339
311
  success = True
340
312
  if not scan_dir:
341
- raise Exception(f"ERROR: Please specify a folder to scan")
313
+ raise Exception('ERROR: Please specify a folder to scan')
342
314
  if not os.path.exists(scan_dir) or not os.path.isdir(scan_dir):
343
- raise Exception(f"ERROR: Specified folder does not exist or is not a folder: {scan_dir}")
315
+ raise Exception(f'ERROR: Specified folder does not exist or is not a folder: {scan_dir}')
344
316
  if not self.is_file_or_snippet_scan() and not self.is_dependency_scan():
345
- raise Exception(f"ERROR: No scan options defined to scan folder: {scan_dir}")
317
+ raise Exception(f'ERROR: No scan options defined to scan folder: {scan_dir}')
346
318
 
347
319
  if self.scan_output:
348
320
  self.print_msg(f'Writing results to {self.scan_output}...')
349
321
  if self.is_dependency_scan():
350
- if not self.threaded_deps.run(what_to_scan=scan_dir, deps_file=deps_file, wait=False): # Kick off a background dependency scan
322
+ if not self.threaded_deps.run(
323
+ what_to_scan=scan_dir,
324
+ deps_file=deps_file,
325
+ wait=False,
326
+ dep_scope=dep_scope,
327
+ dep_scope_include=dep_scope_include,
328
+ dep_scope_exclude=dep_scope_exclude,
329
+ ): # Kick off a background dependency scan
351
330
  success = False
352
331
  if self.is_file_or_snippet_scan():
353
332
  if not self.scan_folder(scan_dir):
@@ -357,7 +336,7 @@ class Scanner(ScanossBase):
357
336
  success = False
358
337
  return success
359
338
 
360
- def scan_folder(self, scan_dir: str) -> bool:
339
+ def scan_folder(self, scan_dir: str) -> bool: # noqa: PLR0912, PLR0915
361
340
  """
362
341
  Scan the specified folder producing fingerprints, send to the SCANOSS API and return results
363
342
 
@@ -367,87 +346,80 @@ class Scanner(ScanossBase):
367
346
  """
368
347
  success = True
369
348
  if not scan_dir:
370
- raise Exception(f"ERROR: Please specify a folder to scan")
349
+ raise Exception('ERROR: Please specify a folder to scan')
371
350
  if not os.path.exists(scan_dir) or not os.path.isdir(scan_dir):
372
- raise Exception(f"ERROR: Specified folder does not exist or is not a folder: {scan_dir}")
373
-
374
- scan_dir_len = len(scan_dir) if scan_dir.endswith(os.path.sep) else len(scan_dir) + 1
351
+ raise Exception(f'ERROR: Specified folder does not exist or is not a folder: {scan_dir}')
352
+
353
+ file_filters = FileFilters(
354
+ debug=self.debug,
355
+ trace=self.trace,
356
+ quiet=self.quiet,
357
+ scanoss_settings=self.scan_settings,
358
+ all_extensions=self.all_extensions,
359
+ all_folders=self.all_folders,
360
+ hidden_files_folders=self.hidden_files_folders,
361
+ skip_size=self.skip_size,
362
+ skip_folders=self.skip_folders,
363
+ skip_extensions=self.skip_extensions,
364
+ operation_type='scanning',
365
+ )
375
366
  self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
376
- spinner = None
377
- if not self.quiet and self.isatty:
378
- spinner = Spinner('Fingerprinting ')
379
- save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
380
- wfp_list = []
381
- scan_block = ''
382
- scan_size = 0
383
- queue_size = 0
384
- file_count = 0 # count all files fingerprinted
385
- wfp_file_count = 0 # count number of files in each queue post
386
- scan_started = False
387
- for root, dirs, files in os.walk(scan_dir):
388
- self.print_trace(f'U Root: {root}, Dirs: {dirs}, Files {files}')
389
- if self.threaded_scan and self.threaded_scan.stop_scanning():
390
- self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
391
- break
392
- dirs[:] = self.__filter_dirs(dirs) # Strip out unwanted directories
393
- filtered_files = self.__filter_files(files) # Strip out unwanted files
394
- self.print_debug(f'F Root: {root}, Dirs: {dirs}, Files {filtered_files}')
395
- for file in filtered_files: # Cycle through each filtered file
396
- path = os.path.join(root, file)
397
- f_size = 0
398
- try:
399
- f_size = os.stat(path).st_size
400
- except Exception as e:
401
- self.print_trace(
402
- f'Ignoring missing symlink file: {file} ({e})') # Can fail if there is a broken symlink
403
- # Ignore broken links and empty files or if a user-specified size limit is supplied
404
- if f_size > 0 and (self.skip_size <= 0 or f_size > self.skip_size):
405
- self.print_trace(f'Fingerprinting {path}...')
406
- if spinner:
407
- spinner.next()
408
- wfp = self.winnowing.wfp_for_file(path, Scanner.__strip_dir(scan_dir, scan_dir_len, path))
409
- if wfp is None or wfp == '':
410
- self.print_debug(f'No WFP returned for {path}. Skipping.')
411
- continue
412
- if save_wfps_for_print:
413
- wfp_list.append(wfp)
414
- file_count += 1
415
- if self.threaded_scan:
416
- wfp_size = len(wfp.encode("utf-8"))
417
- # If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
418
- if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
419
- self.threaded_scan.queue_add(scan_block)
420
- queue_size += 1
421
- scan_block = ''
422
- wfp_file_count = 0
423
- scan_block += wfp
424
- scan_size = len(scan_block.encode("utf-8"))
425
- wfp_file_count += 1
426
- # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
427
- if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
428
- self.threaded_scan.queue_add(scan_block)
429
- queue_size += 1
430
- scan_block = ''
431
- wfp_file_count = 0
432
- if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
433
- scan_started = True
434
- if not self.threaded_scan.run(wait=False):
435
- self.print_stderr(
436
- f'Warning: Some errors encounted while scanning. Results might be incomplete.')
437
- success = False
438
- # End for loop
439
- if self.threaded_scan and scan_block != '':
440
- self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
441
- if spinner:
442
- spinner.finish()
367
+ spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
368
+
369
+ with spinner_ctx as spinner:
370
+ scan_block = ''
371
+ scan_size = 0
372
+ queue_size = 0
373
+ file_count = 0 # count all files fingerprinted
374
+ wfp_file_count = 0 # count number of files in each queue post
375
+ scan_started = False
376
+
377
+ to_scan_files = file_filters.get_filtered_files_from_folder(scan_dir)
378
+ for to_scan_file in to_scan_files:
379
+ if self.threaded_scan and self.threaded_scan.stop_scanning():
380
+ self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
381
+ break
382
+ self.print_debug(f'Fingerprinting {to_scan_file}...')
383
+ if spinner:
384
+ spinner.next()
385
+ abs_path = Path(scan_dir, to_scan_file).resolve()
386
+ wfp = self.winnowing.wfp_for_file(str(abs_path), to_scan_file)
387
+ if wfp is None or wfp == '':
388
+ self.print_debug(f'No WFP returned for {to_scan_file}. Skipping.')
389
+ continue
390
+ file_count += 1
391
+ if self.threaded_scan:
392
+ wfp_size = len(wfp.encode('utf-8'))
393
+ # If the WFP is bigger than the max post size and we already have something
394
+ # stored in the scan block, add it to the queue
395
+ if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
396
+ self.threaded_scan.queue_add(scan_block)
397
+ queue_size += 1
398
+ scan_block = ''
399
+ wfp_file_count = 0
400
+ scan_block += wfp
401
+ scan_size = len(scan_block.encode('utf-8'))
402
+ wfp_file_count += 1
403
+ # If the scan request block (group of WFPs) is larger than the POST size
404
+ # or we have reached the file limit, add it to the queue
405
+ if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
406
+ self.threaded_scan.queue_add(scan_block)
407
+ queue_size += 1
408
+ scan_block = ''
409
+ wfp_file_count = 0
410
+ if not scan_started and queue_size > self.nb_threads: # Start scanning if we have something to do
411
+ scan_started = True
412
+ if not self.threaded_scan.run(wait=False):
413
+ self.print_stderr(
414
+ 'Warning: Some errors encountered while scanning. '
415
+ 'Results might be incomplete.'
416
+ )
417
+ success = False
418
+ # End for loop
419
+ if self.threaded_scan and scan_block != '':
420
+ self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
443
421
 
444
422
  if file_count > 0:
445
- if save_wfps_for_print: # Write a WFP file if no threading is requested
446
- self.print_debug(f'Writing fingerprints to {self.wfp}')
447
- with open(self.wfp, 'w') as f:
448
- f.write(''.join(wfp_list))
449
- else:
450
- self.print_debug(f'Skipping writing WFP file {self.wfp}')
451
423
  if self.threaded_scan:
452
424
  success = self.__run_scan_threaded(scan_started, file_count)
453
425
  else:
@@ -465,103 +437,109 @@ class Scanner(ScanossBase):
465
437
  self.threaded_scan.update_bar(create=True, file_count=file_count)
466
438
  if not scan_started:
467
439
  if not self.threaded_scan.run(wait=False): # Run the scan but do not wait for it to complete
468
- self.print_stderr(f'Warning: Some errors encounted while scanning. Results might be incomplete.')
440
+ self.print_stderr('Warning: Some errors encounted while scanning. Results might be incomplete.')
469
441
  success = False
470
442
  return success
471
443
 
472
- def __finish_scan_threaded(self, file_map: dict = None) -> bool:
473
- """
474
- Wait for the threaded scans to complete
475
- :param file_map: mapping of obfuscated files back into originals
476
- :return: True if successful, False otherwise
444
+ def __finish_scan_threaded(self, file_map: Optional[Dict[Any, Any]] = None) -> bool:
445
+ """Wait for the threaded scan to complete and process the results
446
+
447
+ Args:
448
+ file_map: Mapping of obfuscated files back to originals
449
+
450
+ Returns:
451
+ bool: True if successful, False otherwise
452
+
453
+ Raises:
454
+ ValueError: If output format is invalid
477
455
  """
478
- success = True
479
- responses = None
456
+ success: bool = True
457
+ scan_responses = None
480
458
  dep_responses = None
481
459
  if self.is_file_or_snippet_scan():
482
460
  if not self.threaded_scan.complete(): # Wait for the scans to complete
483
- self.print_stderr(f'Warning: Scanning analysis ran into some trouble.')
461
+ self.print_stderr('Warning: Scanning analysis ran into some trouble.')
484
462
  success = False
485
463
  self.threaded_scan.complete_bar()
486
- responses = self.threaded_scan.responses
464
+ scan_responses = self.threaded_scan.responses
487
465
  if self.is_dependency_scan():
488
466
  self.print_msg('Retrieving dependency data...')
489
467
  if not self.threaded_deps.complete():
490
- self.print_stderr(f'Warning: Dependency analysis ran into some trouble.')
468
+ self.print_stderr('Warning: Dependency analysis ran into some trouble.')
491
469
  success = False
492
470
  dep_responses = self.threaded_deps.responses
493
- # self.print_stderr(f'Dep Data: {dep_responses}')
494
- # TODO change to dictionary
495
- raw_output = "{\n"
496
- # TODO look into merging the two dictionaries. See https://favtutor.com/blogs/merge-dictionaries-python
497
- if responses or dep_responses:
498
- first = True
499
- if responses:
500
- for scan_resp in responses:
501
- if scan_resp is not None:
502
- for key, value in scan_resp.items():
503
- if file_map: # We have a map for obfuscated files. Check if we can revert it
504
- fm = file_map.get(key)
505
- if fm:
506
- key = fm # Replace the obfuscated filename
507
- if first:
508
- raw_output += " \"%s\":%s" % (key, json.dumps(value, indent=2))
509
- first = False
510
- else:
511
- raw_output += ",\n \"%s\":%s" % (key, json.dumps(value, indent=2))
512
- # End for loop
513
- if dep_responses:
514
- dep_files = dep_responses.get("files")
515
- if dep_files and len(dep_files) > 0:
516
- for dep_file in dep_files:
517
- file = dep_file.pop("file", None)
518
- if file is not None:
519
- if first:
520
- raw_output += " \"%s\":[%s]" % (file, json.dumps(dep_file, indent=2))
521
- first = False
522
- else:
523
- raw_output += ",\n \"%s\":[%s]" % (file, json.dumps(dep_file, indent=2))
524
- # End for loop
471
+
472
+ raw_scan_results = self._merge_scan_results(scan_responses, dep_responses, file_map)
473
+
474
+ if self.post_processor:
475
+ results = self.post_processor.load_results(raw_scan_results).post_process()
525
476
  else:
526
- success = False
527
- raw_output += "\n}"
528
- parsed_json = None
529
- try:
530
- parsed_json = json.loads(raw_output)
531
- except Exception as e:
532
- self.print_stderr(f'Warning: Problem decoding parsed json: {e}')
477
+ results = raw_scan_results
533
478
 
534
479
  if self.output_format == 'plain':
535
- if parsed_json:
536
- self.__log_result(json.dumps(parsed_json, indent=2, sort_keys=True))
537
- else:
538
- self.__log_result(raw_output)
480
+ self.__log_result(json.dumps(results, indent=2, sort_keys=True))
539
481
  elif self.output_format == 'cyclonedx':
540
482
  cdx = CycloneDx(self.debug, self.scan_output)
541
- if parsed_json:
542
- success = cdx.produce_from_json(parsed_json)
543
- else:
544
- success = cdx.produce_from_str(raw_output)
483
+ success, _ = cdx.produce_from_json(results)
545
484
  elif self.output_format == 'spdxlite':
546
485
  spdxlite = SpdxLite(self.debug, self.scan_output)
547
- if parsed_json:
548
- success = spdxlite.produce_from_json(parsed_json)
549
- else:
550
- success = spdxlite.produce_from_str(raw_output)
486
+ success = spdxlite.produce_from_json(results)
551
487
  elif self.output_format == 'csv':
552
488
  csvo = CsvOutput(self.debug, self.scan_output)
553
- if parsed_json:
554
- success = csvo.produce_from_json(parsed_json)
555
- else:
556
- success = csvo.produce_from_str(raw_output)
489
+ success = csvo.produce_from_json(results)
557
490
  else:
558
491
  self.print_stderr(f'ERROR: Unknown output format: {self.output_format}')
559
492
  success = False
560
493
  return success
561
494
 
562
- def scan_file_with_options(self, file: str, deps_file: str = None, file_map: dict = None) -> bool:
495
+ def _merge_scan_results(
496
+ self,
497
+ scan_responses: Optional[List],
498
+ dep_responses: Optional[Dict[str, Any]],
499
+ file_map: Optional[Dict[str, Any]],
500
+ ) -> Dict[str, Any]:
501
+ """Merge scan and dependency responses into a single dictionary"""
502
+ results: Dict[str, Any] = {}
503
+
504
+ if scan_responses:
505
+ for response in scan_responses:
506
+ if response is not None:
507
+ if file_map:
508
+ response = self._deobfuscate_filenames(response, file_map) # noqa: PLW2901
509
+ results.update(response)
510
+
511
+ dep_files = dep_responses.get('files', None) if dep_responses else None
512
+ if dep_files:
513
+ for dep_file in dep_files:
514
+ file = dep_file.pop('file', None)
515
+ if file:
516
+ results[file] = [dep_file]
517
+
518
+ return results
519
+
520
+ def _deobfuscate_filenames(self, response: dict, file_map: dict) -> dict:
521
+ """Convert obfuscated filenames back to original names"""
522
+ deobfuscated = {}
523
+ for key, value in response.items():
524
+ deobfuscated_name = file_map.get(key, None)
525
+ if deobfuscated_name:
526
+ deobfuscated[deobfuscated_name] = value
527
+ else:
528
+ deobfuscated[key] = value
529
+ return deobfuscated
530
+
531
+ def scan_file_with_options( # noqa: PLR0913
532
+ self,
533
+ file: str,
534
+ deps_file: str = None,
535
+ file_map: dict = None,
536
+ dep_scope: SCOPE = None,
537
+ dep_scope_include: str = None,
538
+ dep_scope_exclude: str = None,
539
+ ) -> bool:
563
540
  """
564
541
  Scan the given file for whatever scaning options that have been configured
542
+ :param dep_scope:
565
543
  :param file: file to scan
566
544
  :param deps_file: pre-parsed dependency file to decorate
567
545
  :param file_map: mapping of obfuscated files back into originals
@@ -569,16 +547,23 @@ class Scanner(ScanossBase):
569
547
  """
570
548
  success = True
571
549
  if not file:
572
- raise Exception(f"ERROR: Please specify a file to scan")
550
+ raise Exception('ERROR: Please specify a file to scan')
573
551
  if not os.path.exists(file) or not os.path.isfile(file):
574
- raise Exception(f"ERROR: Specified file does not exist or is not a file: {file}")
552
+ raise Exception(f'ERROR: Specified file does not exist or is not a file: {file}')
575
553
  if not self.is_file_or_snippet_scan() and not self.is_dependency_scan():
576
- raise Exception(f"ERROR: No scan options defined to scan file: {file}")
554
+ raise Exception(f'ERROR: No scan options defined to scan file: {file}')
577
555
 
578
556
  if self.scan_output:
579
557
  self.print_msg(f'Writing results to {self.scan_output}...')
580
558
  if self.is_dependency_scan():
581
- if not self.threaded_deps.run(what_to_scan=file, deps_file=deps_file, wait=False): # Kick off a background dependency scan
559
+ if not self.threaded_deps.run(
560
+ what_to_scan=file,
561
+ deps_file=deps_file,
562
+ wait=False,
563
+ dep_scope=dep_scope,
564
+ dep_scope_include=dep_scope_include,
565
+ dep_scope_exclude=dep_scope_exclude,
566
+ ): # Kick off a background dependency scan
582
567
  success = False
583
568
  if self.is_file_or_snippet_scan():
584
569
  if not self.scan_file(file):
@@ -599,9 +584,9 @@ class Scanner(ScanossBase):
599
584
  """
600
585
  success = True
601
586
  if not file:
602
- raise Exception(f"ERROR: Please specify a file to scan")
587
+ raise Exception('ERROR: Please specify a file to scan')
603
588
  if not os.path.exists(file) or not os.path.isfile(file):
604
- raise Exception(f"ERROR: Specified files does not exist or is not a file: {file}")
589
+ raise Exception(f'ERROR: Specified files does not exist or is not a file: {file}')
605
590
  self.print_debug(f'Fingerprinting {file}...')
606
591
  wfp = self.winnowing.wfp_for_file(file, file)
607
592
  if wfp is not None and wfp != '':
@@ -614,7 +599,7 @@ class Scanner(ScanossBase):
614
599
  success = False
615
600
  return success
616
601
 
617
- def scan_files(self, files: []) -> bool:
602
+ def scan_files(self, files: []) -> bool: # noqa: PLR0912, PLR0915
618
603
  """
619
604
  Scan the specified list of files, producing fingerprints, send to the SCANOSS API and return results
620
605
  Please note that by providing an explicit list you bypass any exclusions that may be defined on the scanner
@@ -624,52 +609,58 @@ class Scanner(ScanossBase):
624
609
  """
625
610
  success = True
626
611
  if not files:
627
- raise Exception(f"ERROR: Please provide a non-empty list of filenames to scan")
628
- self.print_msg(f'Scanning {len(files)} files...')
629
- spinner = None
630
- if not self.quiet and self.isatty:
631
- spinner = Spinner('Fingerprinting ')
632
- save_wfps_for_print = not self.no_wfp_file or not self.threaded_scan
633
- wfp_list = []
634
- scan_block = ''
635
- scan_size = 0
636
- queue_size = 0
637
- file_count = 0 # count all files fingerprinted
638
- wfp_file_count = 0 # count number of files in each queue post
639
- scan_started = False
640
- for file in files:
641
- if self.threaded_scan and self.threaded_scan.stop_scanning():
642
- self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
643
- break
644
- f_size = 0
645
- try:
646
- f_size = os.stat(file).st_size
647
- except Exception as e:
648
- self.print_trace(
649
- f'Ignoring missing symlink file: {file} ({e})') # Can fail if there is a broken symlink
650
- if f_size > 0: # Ignore broken links and empty files
651
- self.print_trace(f'Fingerprinting {file}...')
612
+ raise Exception('ERROR: Please provide a non-empty list of filenames to scan')
613
+
614
+ file_filters = FileFilters(
615
+ debug=self.debug,
616
+ trace=self.trace,
617
+ quiet=self.quiet,
618
+ scanoss_settings=self.scan_settings,
619
+ all_extensions=self.all_extensions,
620
+ all_folders=self.all_folders,
621
+ hidden_files_folders=self.hidden_files_folders,
622
+ skip_size=self.skip_size,
623
+ skip_folders=self.skip_folders,
624
+ skip_extensions=self.skip_extensions,
625
+ operation_type='scanning',
626
+ )
627
+ spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
628
+
629
+ with spinner_ctx as spinner:
630
+ scan_block = ''
631
+ scan_size = 0
632
+ queue_size = 0
633
+ file_count = 0 # count all files fingerprinted
634
+ wfp_file_count = 0 # count number of files in each queue post
635
+ scan_started = False
636
+
637
+ to_scan_files = file_filters.get_filtered_files_from_files(files)
638
+ for file in to_scan_files:
639
+ if self.threaded_scan and self.threaded_scan.stop_scanning():
640
+ self.print_stderr('Warning: Aborting fingerprinting as the scanning service is not available.')
641
+ break
642
+ self.print_debug(f'Fingerprinting {file}...')
652
643
  if spinner:
653
644
  spinner.next()
654
645
  wfp = self.winnowing.wfp_for_file(file, file)
655
646
  if wfp is None or wfp == '':
656
647
  self.print_debug(f'No WFP returned for {file}. Skipping.')
657
648
  continue
658
- if save_wfps_for_print:
659
- wfp_list.append(wfp)
660
649
  file_count += 1
661
650
  if self.threaded_scan:
662
- wfp_size = len(wfp.encode("utf-8"))
663
- # If the WFP is bigger than the max post size and we already have something stored in the scan block, add it to the queue
651
+ wfp_size = len(wfp.encode('utf-8'))
652
+ # If the WFP is bigger than the max post size and we already have something
653
+ # stored in the scan block, add it to the queue
664
654
  if scan_block != '' and (wfp_size + scan_size) >= self.max_post_size:
665
655
  self.threaded_scan.queue_add(scan_block)
666
656
  queue_size += 1
667
657
  scan_block = ''
668
658
  wfp_file_count = 0
669
659
  scan_block += wfp
670
- scan_size = len(scan_block.encode("utf-8"))
660
+ scan_size = len(scan_block.encode('utf-8'))
671
661
  wfp_file_count += 1
672
- # If the scan request block (group of WFPs) or larger than the POST size or we have reached the file limit, add it to the queue
662
+ # If the scan request block (group of WFPs) is larger than the POST size
663
+ # or we have reached the file limit, add it to the queue
673
664
  if wfp_file_count > self.post_file_count or scan_size >= self.max_post_size:
674
665
  self.threaded_scan.queue_add(scan_block)
675
666
  queue_size += 1
@@ -679,27 +670,22 @@ class Scanner(ScanossBase):
679
670
  scan_started = True
680
671
  if not self.threaded_scan.run(wait=False):
681
672
  self.print_stderr(
682
- f'Warning: Some errors encounted while scanning. Results might be incomplete.')
673
+ 'Warning: Some errors encountered while scanning. '
674
+ 'Results might be incomplete.'
675
+ )
683
676
  success = False
684
- # End for loop
685
- if self.threaded_scan and scan_block != '':
686
- self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
687
- if spinner:
688
- spinner.finish()
677
+
678
+ # End for loop
679
+ if self.threaded_scan and scan_block != '':
680
+ self.threaded_scan.queue_add(scan_block) # Make sure all files have been submitted
689
681
 
690
682
  if file_count > 0:
691
- if save_wfps_for_print: # Write a WFP file if no threading is requested
692
- self.print_debug(f'Writing fingerprints to {self.wfp}')
693
- with open(self.wfp, 'w') as f:
694
- f.write(''.join(wfp_list))
695
- else:
696
- self.print_debug(f'Skipping writing WFP file {self.wfp}')
697
683
  if self.threaded_scan:
698
684
  success = self.__run_scan_threaded(scan_started, file_count)
699
685
  else:
700
- Scanner.print_stderr(f'Warning: No files found to scan from: {files}')
686
+ Scanner.print_stderr(f'Warning: No files found to scan from: {to_scan_files}')
701
687
  return success
702
-
688
+
703
689
  def scan_files_with_options(self, files: [], deps_file: str = None, file_map: dict = None) -> bool:
704
690
  """
705
691
  Scan the given list of files for whatever scaning options that have been configured
@@ -710,11 +696,13 @@ class Scanner(ScanossBase):
710
696
  """
711
697
  success = True
712
698
  if not files:
713
- raise Exception(f"ERROR: Please specify a list of files to scan")
699
+ raise Exception('ERROR: Please specify a list of files to scan')
714
700
  if not self.is_file_or_snippet_scan():
715
- raise Exception(f"ERROR: file or snippet scan options have to be set to scan files: {files}")
701
+ raise Exception(f'ERROR: file or snippet scan options have to be set to scan files: {files}')
716
702
  if self.is_dependency_scan() or deps_file:
717
- raise Exception(f"ERROR: The dependency scan option is currently not supported when scanning a list of files")
703
+ raise Exception(
704
+ 'ERROR: The dependency scan option is currently not supported when scanning a list of files'
705
+ )
718
706
  if self.scan_output:
719
707
  self.print_msg(f'Writing results to {self.scan_output}...')
720
708
  if self.is_file_or_snippet_scan():
@@ -735,9 +723,9 @@ class Scanner(ScanossBase):
735
723
  """
736
724
  success = True
737
725
  if not filename:
738
- raise Exception(f"ERROR: Please specify a filename to scan")
726
+ raise Exception('ERROR: Please specify a filename to scan')
739
727
  if not contents:
740
- raise Exception(f"ERROR: Please specify a file contents to scan")
728
+ raise Exception('ERROR: Please specify a file contents to scan')
741
729
 
742
730
  self.print_debug(f'Fingerprinting {filename}...')
743
731
  wfp = self.winnowing.wfp_for_contents(filename, False, contents)
@@ -754,121 +742,22 @@ class Scanner(ScanossBase):
754
742
  success = False
755
743
  return success
756
744
 
757
- def scan_wfp_file(self, file: str = None) -> bool:
758
- """
759
- Scan the contents of the specified WFP file (in the current process)
760
- :param file: Scan the contents of the specified WFP file (in the current process)
761
- :return: True if successful, False otherwise
762
- """
763
- success = True
764
- wfp_file = file if file else self.wfp # If a WFP file is specified, use it, otherwise us the default
765
- if not os.path.exists(wfp_file) or not os.path.isfile(wfp_file):
766
- raise Exception(f"ERROR: Specified WFP file does not exist or is not a file: {wfp_file}")
767
- file_count = Scanner.__count_files_in_wfp_file(wfp_file)
768
- cur_files = 0
769
- cur_size = 0
770
- batch_files = 0
771
- wfp = ''
772
- max_component = {'name': '', 'hits': 0}
773
- components = {}
774
- self.print_debug(f'Found {file_count} files to process.')
775
- raw_output = "{\n"
776
- file_print = ''
777
- bar = None
778
- if not self.quiet and self.isatty:
779
- bar = Bar('Scanning', max=file_count)
780
- bar.next(0)
781
- with open(wfp_file) as f:
782
- for line in f:
783
- if line.startswith(WFP_FILE_START):
784
- if file_print:
785
- wfp += file_print # Store the WFP for the current file
786
- cur_size = len(wfp.encode("utf-8"))
787
- file_print = line # Start storing the next file
788
- cur_files += 1
789
- batch_files += 1
790
- else:
791
- file_print += line # Store the rest of the WFP for this file
792
- l_size = cur_size + len(file_print.encode('utf-8'))
793
- # Hit the max post size, so sending the current batch and continue processing
794
- if l_size >= self.max_post_size and wfp:
795
- self.print_debug(f'Sending {batch_files} ({cur_files}) of'
796
- f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.')
797
- if self.debug and cur_size > self.max_post_size:
798
- Scanner.print_stderr(f'Warning: Post size {cur_size} greater than limit {self.max_post_size}')
799
- scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
800
- if bar:
801
- bar.next(batch_files)
802
- if scan_resp is not None:
803
- for key, value in scan_resp.items():
804
- raw_output += " \"%s\":%s," % (key, json.dumps(value, indent=2))
805
- for v in value:
806
- if hasattr(v, 'get'):
807
- if v.get('id') != 'none':
808
- vcv = '%s:%s:%s' % (v.get('vendor'), v.get('component'), v.get('version'))
809
- components[vcv] = components[vcv] + 1 if vcv in components else 1
810
- if max_component['hits'] < components[vcv]:
811
- max_component['name'] = v.get('component')
812
- max_component['hits'] = components[vcv]
813
- else:
814
- Scanner.print_stderr(f'Warning: Unknown value: {v}')
815
- else:
816
- success = False
817
- batch_files = 0
818
- wfp = ''
819
- if file_print:
820
- wfp += file_print # Store the WFP for the current file
821
- if wfp:
822
- self.print_debug(f'Sending {batch_files} ({cur_files}) of'
823
- f' {file_count} ({len(wfp.encode("utf-8"))} bytes) files to the ScanOSS API.')
824
- scan_resp = self.scanoss_api.scan(wfp, max_component['name']) # Scan current WFP and store
825
- if bar:
826
- bar.next(batch_files)
827
- first = True
828
- if scan_resp is not None:
829
- for key, value in scan_resp.items():
830
- if first:
831
- raw_output += " \"%s\":%s" % (key, json.dumps(value, indent=2))
832
- first = False
833
- else:
834
- raw_output += ",\n \"%s\":%s" % (key, json.dumps(value, indent=2))
835
- else:
836
- success = False
837
- raw_output += "\n}"
838
- if bar:
839
- bar.finish()
840
- if self.output_format == 'plain':
841
- self.__log_result(raw_output)
842
- elif self.output_format == 'cyclonedx':
843
- cdx = CycloneDx(self.debug, self.scan_output)
844
- cdx.produce_from_str(raw_output)
845
- elif self.output_format == 'spdxlite':
846
- spdxlite = SpdxLite(self.debug, self.scan_output)
847
- success = spdxlite.produce_from_str(raw_output)
848
- elif self.output_format == 'csv':
849
- csvo = CsvOutput(self.debug, self.scan_output)
850
- csvo.produce_from_str(raw_output)
851
- else:
852
- self.print_stderr(f'ERROR: Unknown output format: {self.output_format}')
853
- success = False
854
-
855
- return success
856
-
857
- def scan_wfp_with_options(self, wfp: str, deps_file: str, file_map: dict = None) -> bool:
745
+ def scan_wfp_with_options(self, wfp_file: str, deps_file: str, file_map: dict = None) -> bool:
858
746
  """
859
747
  Scan the given WFP file for whatever scaning options that have been configured
860
- :param wfp: WFP file to scan
748
+ :param wfp_file: WFP file to scan
861
749
  :param deps_file: pre-parsed dependency file to decorate
862
750
  :param file_map: mapping of obfuscated files back into originals
863
751
  :return: True if successful, False otherwise
864
752
  """
865
753
  success = True
866
- wfp_file = wfp if wfp else self.wfp # If a WFP file is specified, use it, otherwise us the default
754
+ if not wfp_file:
755
+ raise Exception('ERROR: Please specify a WFP file to scan')
867
756
  if not os.path.exists(wfp_file) or not os.path.isfile(wfp_file):
868
- raise Exception(f"ERROR: Specified WFP file does not exist or is not a file: {wfp_file}")
757
+ raise Exception(f'ERROR: Specified WFP file does not exist or is not a file: {wfp_file}')
869
758
 
870
759
  if not self.is_file_or_snippet_scan() and not self.is_dependency_scan():
871
- raise Exception(f"ERROR: No scan options defined to scan WFP: {wfp}")
760
+ raise Exception(f'ERROR: No scan options defined to scan WFP: {wfp_file}')
872
761
 
873
762
  if self.scan_output:
874
763
  self.print_msg(f'Writing results to {self.scan_output}...')
@@ -883,16 +772,17 @@ class Scanner(ScanossBase):
883
772
  success = False
884
773
  return success
885
774
 
886
- def scan_wfp_file_threaded(self, file: str = None) -> bool:
775
+ def scan_wfp_file_threaded(self, wfp_file: str) -> bool: # noqa: PLR0912
887
776
  """
888
777
  Scan the contents of the specified WFP file (threaded)
889
- :param file: WFP file to scan (optional)
778
+ :param wfp_file: WFP file to scan
890
779
  return: True if successful, False otherwise
891
780
  """
892
781
  success = True
893
- wfp_file = file if file else self.wfp # If a WFP file is specified, use it, otherwise us the default
782
+ if not wfp_file:
783
+ raise Exception('ERROR: Please specify a WFP file to scan')
894
784
  if not os.path.exists(wfp_file) or not os.path.isfile(wfp_file):
895
- raise Exception(f"ERROR: Specified WFP file does not exist or is not a file: {wfp_file}")
785
+ raise Exception(f'ERROR: Specified WFP file does not exist or is not a file: {wfp_file}')
896
786
  cur_size = 0
897
787
  queue_size = 0
898
788
  file_count = 0 # count all files fingerprinted
@@ -905,7 +795,7 @@ class Scanner(ScanossBase):
905
795
  if line.startswith(WFP_FILE_START):
906
796
  if scan_block:
907
797
  wfp += scan_block # Store the WFP for the current file
908
- cur_size = len(wfp.encode("utf-8"))
798
+ cur_size = len(wfp.encode('utf-8'))
909
799
  scan_block = line # Start storing the next file
910
800
  file_count += 1
911
801
  wfp_file_count += 1
@@ -924,7 +814,8 @@ class Scanner(ScanossBase):
924
814
  scan_started = True
925
815
  if not self.threaded_scan.run(wait=False):
926
816
  self.print_stderr(
927
- f'Warning: Some errors encounted while scanning. Results might be incomplete.')
817
+ 'Warning: Some errors uncounted while scanning. Results might be incomplete.'
818
+ )
928
819
  success = False
929
820
  # End for loop
930
821
  if scan_block:
@@ -947,15 +838,15 @@ class Scanner(ScanossBase):
947
838
  """
948
839
  success = True
949
840
  if not wfp:
950
- raise Exception(f"ERROR: Please specify a WFP to scan")
951
- raw_output = "{\n"
841
+ raise Exception('ERROR: Please specify a WFP to scan')
842
+ raw_output = '{\n'
952
843
  scan_resp = self.scanoss_api.scan(wfp)
953
844
  if scan_resp is not None:
954
845
  for key, value in scan_resp.items():
955
- raw_output += " \"%s\":%s" % (key, json.dumps(value, indent=2))
846
+ raw_output += ' "%s":%s' % (key, json.dumps(value, indent=2))
956
847
  else:
957
848
  success = False
958
- raw_output += "\n}"
849
+ raw_output += '\n}'
959
850
  if self.output_format == 'plain':
960
851
  self.__log_result(raw_output)
961
852
  elif self.output_format == 'cyclonedx':
@@ -983,9 +874,9 @@ class Scanner(ScanossBase):
983
874
  :return:
984
875
  """
985
876
  if not filename:
986
- raise Exception(f"ERROR: Please specify a filename to scan")
877
+ raise Exception('ERROR: Please specify a filename to scan')
987
878
  if not contents:
988
- raise Exception(f"ERROR: Please specify a file contents to scan")
879
+ raise Exception('ERROR: Please specify a file contents to scan')
989
880
 
990
881
  self.print_debug(f'Fingerprinting {filename}...')
991
882
  wfp = self.winnowing.wfp_for_contents(filename, False, contents)
@@ -1004,9 +895,9 @@ class Scanner(ScanossBase):
1004
895
  Fingerprint the specified file
1005
896
  """
1006
897
  if not scan_file:
1007
- raise Exception(f"ERROR: Please specify a file to fingerprint")
898
+ raise Exception('ERROR: Please specify a file to fingerprint')
1008
899
  if not os.path.exists(scan_file) or not os.path.isfile(scan_file):
1009
- raise Exception(f"ERROR: Specified file does not exist or is not a file: {scan_file}")
900
+ raise Exception(f'ERROR: Specified file does not exist or is not a file: {scan_file}')
1010
901
 
1011
902
  self.print_debug(f'Fingerprinting {scan_file}...')
1012
903
  wfp = self.winnowing.wfp_for_file(scan_file, scan_file)
@@ -1025,34 +916,34 @@ class Scanner(ScanossBase):
1025
916
  Fingerprint the specified folder producing fingerprints
1026
917
  """
1027
918
  if not scan_dir:
1028
- raise Exception(f"ERROR: Please specify a folder to fingerprint")
919
+ raise Exception('ERROR: Please specify a folder to fingerprint')
1029
920
  if not os.path.exists(scan_dir) or not os.path.isdir(scan_dir):
1030
- raise Exception(f"ERROR: Specified folder does not exist or is not a folder: {scan_dir}")
921
+ raise Exception(f'ERROR: Specified folder does not exist or is not a folder: {scan_dir}')
922
+ file_filters = FileFilters(
923
+ debug=self.debug,
924
+ trace=self.trace,
925
+ quiet=self.quiet,
926
+ scanoss_settings=self.scan_settings,
927
+ all_extensions=self.all_extensions,
928
+ all_folders=self.all_folders,
929
+ hidden_files_folders=self.hidden_files_folders,
930
+ skip_size=self.skip_size,
931
+ skip_folders=self.skip_folders,
932
+ skip_extensions=self.skip_extensions,
933
+ operation_type='scanning',
934
+ )
1031
935
  wfps = ''
1032
- scan_dir_len = len(scan_dir) if scan_dir.endswith(os.path.sep) else len(scan_dir) + 1
1033
936
  self.print_msg(f'Searching {scan_dir} for files to fingerprint...')
1034
- spinner = None
1035
- if not self.quiet and self.isatty:
1036
- spinner = Spinner('Fingerprinting ')
1037
- for root, dirs, files in os.walk(scan_dir):
1038
- dirs[:] = self.__filter_dirs(dirs) # Strip out unwanted directories
1039
- filtered_files = self.__filter_files(files) # Strip out unwanted files
1040
- self.print_trace(f'Root: {root}, Dirs: {dirs}, Files {filtered_files}')
1041
- for file in filtered_files:
1042
- path = os.path.join(root, file)
1043
- f_size = 0
1044
- try:
1045
- f_size = os.stat(path).st_size
1046
- except Exception as e:
1047
- self.print_trace(
1048
- f'Ignoring missing symlink file: {file} ({e})') # Can fail if there is a broken symlink
1049
- if f_size > 0: # Ignore empty files
1050
- self.print_debug(f'Fingerprinting {path}...')
1051
- if spinner:
1052
- spinner.next()
1053
- wfps += self.winnowing.wfp_for_file(path, Scanner.__strip_dir(scan_dir, scan_dir_len, path))
1054
- if spinner:
1055
- spinner.finish()
937
+ spinner_ctx = Spinner('Fingerprinting ') if (not self.quiet and self.isatty) else nullcontext()
938
+
939
+ with spinner_ctx as spinner:
940
+ to_fingerprint_files = file_filters.get_filtered_files_from_folder(scan_dir)
941
+ for file in to_fingerprint_files:
942
+ if spinner:
943
+ spinner.next()
944
+ abs_path = Path(scan_dir, file).resolve()
945
+ self.print_debug(f'Fingerprinting {file}...')
946
+ wfps += self.winnowing.wfp_for_file(str(abs_path), file)
1056
947
  if wfps:
1057
948
  if wfp_file:
1058
949
  self.print_stderr(f'Writing fingerprints to {wfp_file}')
@@ -1063,6 +954,7 @@ class Scanner(ScanossBase):
1063
954
  else:
1064
955
  Scanner.print_stderr(f'Warning: No files found to fingerprint in folder: {scan_dir}')
1065
956
 
957
+
1066
958
  #
1067
959
  # End of ScanOSS Class
1068
960
  #