scanoss 1.20.0__py3-none-any.whl → 1.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protoc_gen_swagger/__init__.py +13 -13
- protoc_gen_swagger/options/__init__.py +13 -13
- protoc_gen_swagger/options/annotations_pb2.py +12 -9
- protoc_gen_swagger/options/annotations_pb2_grpc.py +1 -1
- protoc_gen_swagger/options/openapiv2_pb2.py +98 -96
- protoc_gen_swagger/options/openapiv2_pb2_grpc.py +1 -1
- scanoss/__init__.py +18 -18
- scanoss/api/__init__.py +17 -17
- scanoss/api/common/__init__.py +17 -17
- scanoss/api/common/v2/__init__.py +17 -17
- scanoss/api/common/v2/scanoss_common_pb2.py +18 -18
- scanoss/api/common/v2/scanoss_common_pb2_grpc.py +1 -1
- scanoss/api/components/__init__.py +17 -17
- scanoss/api/components/v2/__init__.py +17 -17
- scanoss/api/components/v2/scanoss_components_pb2.py +48 -38
- scanoss/api/components/v2/scanoss_components_pb2_grpc.py +142 -96
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +22 -16
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +75 -49
- scanoss/api/dependencies/__init__.py +17 -17
- scanoss/api/dependencies/v2/__init__.py +17 -17
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +30 -24
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +75 -49
- scanoss/api/scanning/__init__.py +17 -17
- scanoss/api/scanning/v2/__init__.py +17 -17
- scanoss/api/scanning/v2/scanoss_scanning_pb2.py +10 -8
- scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +40 -32
- scanoss/api/semgrep/__init__.py +17 -17
- scanoss/api/semgrep/v2/__init__.py +17 -17
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +22 -18
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +71 -49
- scanoss/api/vulnerabilities/__init__.py +17 -17
- scanoss/api/vulnerabilities/v2/__init__.py +17 -17
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +37 -27
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +109 -72
- scanoss/cli.py +576 -293
- scanoss/components.py +67 -45
- scanoss/csvoutput.py +83 -56
- scanoss/cyclonedx.py +48 -46
- scanoss/data/build_date.txt +1 -1
- scanoss/file_filters.py +13 -15
- scanoss/filecount.py +43 -36
- scanoss/inspection/__init__.py +17 -17
- scanoss/inspection/copyleft.py +71 -58
- scanoss/inspection/policy_check.py +76 -53
- scanoss/inspection/undeclared_component.py +98 -75
- scanoss/inspection/utils/license_utils.py +66 -44
- scanoss/results.py +51 -60
- scanoss/scancodedeps.py +61 -38
- scanoss/scanner.py +203 -135
- scanoss/scanoss_settings.py +5 -3
- scanoss/scanossapi.py +98 -69
- scanoss/scanossbase.py +19 -19
- scanoss/scanossgrpc.py +73 -51
- scanoss/scanpostprocessor.py +9 -6
- scanoss/scantype.py +22 -21
- scanoss/spdxlite.py +265 -171
- scanoss/threadeddependencies.py +91 -61
- scanoss/threadedscanning.py +37 -31
- scanoss/utils/file.py +4 -4
- scanoss/winnowing.py +111 -47
- {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/METADATA +1 -1
- scanoss-1.20.1.dist-info/RECORD +74 -0
- scanoss-1.20.0.dist-info/RECORD +0 -74
- {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/LICENSE +0 -0
- {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/WHEEL +0 -0
- {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/entry_points.txt +0 -0
- {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/top_level.txt +0 -0
scanoss/threadeddependencies.py
CHANGED
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2021, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
import threading
|
|
@@ -33,9 +33,21 @@ from .scancodedeps import ScancodeDeps
|
|
|
33
33
|
from .scanossbase import ScanossBase
|
|
34
34
|
from .scanossgrpc import ScanossGrpc
|
|
35
35
|
|
|
36
|
-
DEP_FILE_PREFIX =
|
|
36
|
+
DEP_FILE_PREFIX = 'file=' # Default prefix to signify an existing parsed dependency file
|
|
37
37
|
|
|
38
|
-
DEV_DEPENDENCIES = {
|
|
38
|
+
DEV_DEPENDENCIES = {
|
|
39
|
+
'dev',
|
|
40
|
+
'test',
|
|
41
|
+
'development',
|
|
42
|
+
'provided',
|
|
43
|
+
'runtime',
|
|
44
|
+
'devDependencies',
|
|
45
|
+
'dev-dependencies',
|
|
46
|
+
'testImplementation',
|
|
47
|
+
'testCompile',
|
|
48
|
+
'Test',
|
|
49
|
+
'require-dev',
|
|
50
|
+
}
|
|
39
51
|
|
|
40
52
|
|
|
41
53
|
# Define an enum class
|
|
@@ -46,17 +58,21 @@ class SCOPE(Enum):
|
|
|
46
58
|
|
|
47
59
|
@dataclass
|
|
48
60
|
class ThreadedDependencies(ScanossBase):
|
|
49
|
-
"""
|
|
61
|
+
""" """
|
|
50
62
|
|
|
51
|
-
"""
|
|
52
63
|
inputs: queue.Queue = queue.Queue()
|
|
53
64
|
output: queue.Queue = queue.Queue()
|
|
54
65
|
|
|
55
|
-
def __init__(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
sc_deps: ScancodeDeps,
|
|
69
|
+
grpc_api: ScanossGrpc,
|
|
70
|
+
what_to_scan: str = None,
|
|
71
|
+
debug: bool = False,
|
|
72
|
+
trace: bool = False,
|
|
73
|
+
quiet: bool = False,
|
|
74
|
+
) -> None:
|
|
75
|
+
""" """
|
|
60
76
|
super().__init__(debug, trace, quiet)
|
|
61
77
|
self.sc_deps = sc_deps
|
|
62
78
|
self.grpc_api = grpc_api
|
|
@@ -76,8 +92,15 @@ class ThreadedDependencies(ScanossBase):
|
|
|
76
92
|
return resp
|
|
77
93
|
return None
|
|
78
94
|
|
|
79
|
-
def run(
|
|
80
|
-
|
|
95
|
+
def run(
|
|
96
|
+
self,
|
|
97
|
+
what_to_scan: str = None,
|
|
98
|
+
deps_file: str = None,
|
|
99
|
+
wait: bool = True,
|
|
100
|
+
dep_scope: SCOPE = None,
|
|
101
|
+
dep_scope_include: str = None,
|
|
102
|
+
dep_scope_exclude: str = None,
|
|
103
|
+
) -> bool:
|
|
81
104
|
"""
|
|
82
105
|
Initiate a background scan for the specified file/dir
|
|
83
106
|
:param dep_scope_exclude: comma separated list of dependency scopes to exclude
|
|
@@ -91,23 +114,25 @@ class ThreadedDependencies(ScanossBase):
|
|
|
91
114
|
what_to_scan = what_to_scan if what_to_scan else self.what_to_scan
|
|
92
115
|
self._errors = False
|
|
93
116
|
try:
|
|
94
|
-
if deps_file:
|
|
117
|
+
if deps_file: # Decorate the given dependencies file
|
|
95
118
|
self.print_msg(f'Decorating {deps_file} dependencies...')
|
|
96
|
-
self.inputs.put(f'{DEP_FILE_PREFIX}{deps_file}')
|
|
97
|
-
else:
|
|
119
|
+
self.inputs.put(f'{DEP_FILE_PREFIX}{deps_file}') # Add to queue and have parent wait on it
|
|
120
|
+
else: # Search for dependencies to decorate
|
|
98
121
|
self.print_msg(f'Searching {what_to_scan} for dependencies...')
|
|
99
122
|
self.inputs.put(what_to_scan)
|
|
100
123
|
# Add to queue and have parent wait on it
|
|
101
|
-
self._thread = threading.Thread(
|
|
124
|
+
self._thread = threading.Thread(
|
|
125
|
+
target=self.scan_dependencies(dep_scope, dep_scope_include, dep_scope_exclude), daemon=True
|
|
126
|
+
)
|
|
102
127
|
self._thread.start()
|
|
103
128
|
except Exception as e:
|
|
104
129
|
self.print_stderr(f'ERROR: Problem running threaded dependencies: {e}')
|
|
105
130
|
self._errors = True
|
|
106
|
-
if wait and not self._errors:
|
|
131
|
+
if wait and not self._errors: # Wait for all inputs to complete
|
|
107
132
|
self.complete()
|
|
108
133
|
return False if self._errors else True
|
|
109
134
|
|
|
110
|
-
def filter_dependencies(self,deps
|
|
135
|
+
def filter_dependencies(self, deps, filter_dep) -> json:
|
|
111
136
|
files = deps.get('files', [])
|
|
112
137
|
# Iterate over files and their purls
|
|
113
138
|
for file in files:
|
|
@@ -120,52 +145,56 @@ class ThreadedDependencies(ScanossBase):
|
|
|
120
145
|
]
|
|
121
146
|
# End of for loop
|
|
122
147
|
|
|
123
|
-
return {
|
|
124
|
-
'files': [
|
|
125
|
-
file for file in deps.get('files', [])
|
|
126
|
-
if file.get('purls')
|
|
127
|
-
]
|
|
128
|
-
}
|
|
148
|
+
return {'files': [file for file in deps.get('files', []) if file.get('purls')]}
|
|
129
149
|
|
|
130
|
-
def filter_dependencies_by_scopes(
|
|
131
|
-
|
|
150
|
+
def filter_dependencies_by_scopes(
|
|
151
|
+
self, deps: json, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None
|
|
152
|
+
) -> json:
|
|
132
153
|
# Predefined set of scopes to filter
|
|
133
154
|
|
|
134
155
|
# Include all scopes
|
|
135
|
-
include_all = (dep_scope is None or dep_scope ==
|
|
156
|
+
include_all = (dep_scope is None or dep_scope == '') and dep_scope_include is None and dep_scope_exclude is None
|
|
136
157
|
## All dependencies, remove scope key
|
|
137
158
|
if include_all:
|
|
138
|
-
return self.filter_dependencies(deps, lambda purl:True)
|
|
159
|
+
return self.filter_dependencies(deps, lambda purl: True)
|
|
139
160
|
|
|
140
161
|
# Use default list of scopes if a custom list is not set
|
|
141
|
-
if (dep_scope is not None and dep_scope !=
|
|
142
|
-
return self.filter_dependencies(
|
|
143
|
-
|
|
162
|
+
if (dep_scope is not None and dep_scope != '') and dep_scope_include is None and dep_scope_exclude is None:
|
|
163
|
+
return self.filter_dependencies(
|
|
164
|
+
deps,
|
|
165
|
+
lambda purl: (dep_scope == SCOPE.PRODUCTION and purl not in DEV_DEPENDENCIES)
|
|
166
|
+
or dep_scope == SCOPE.DEVELOPMENT
|
|
167
|
+
and purl in DEV_DEPENDENCIES,
|
|
168
|
+
)
|
|
144
169
|
|
|
145
|
-
if (
|
|
146
|
-
|
|
170
|
+
if (
|
|
171
|
+
(dep_scope_include is not None and dep_scope_include != '')
|
|
172
|
+
or dep_scope_exclude is not None
|
|
173
|
+
and dep_scope_exclude != ''
|
|
174
|
+
):
|
|
147
175
|
# Create sets from comma-separated strings, if provided
|
|
148
176
|
exclude = set(dep_scope_exclude.split(',')) if dep_scope_exclude else set()
|
|
149
177
|
include = set(dep_scope_include.split(',')) if dep_scope_include else set()
|
|
150
178
|
|
|
151
179
|
# Define a lambda function that checks the inclusion/exclusion logic
|
|
152
180
|
return self.filter_dependencies(
|
|
153
|
-
deps,
|
|
154
|
-
lambda purl: (exclude and purl not in exclude) or (not exclude and purl in include)
|
|
181
|
+
deps, lambda purl: (exclude and purl not in exclude) or (not exclude and purl in include)
|
|
155
182
|
)
|
|
156
183
|
|
|
157
|
-
def scan_dependencies(
|
|
184
|
+
def scan_dependencies(
|
|
185
|
+
self, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None
|
|
186
|
+
) -> None:
|
|
158
187
|
"""
|
|
159
188
|
Scan for dependencies from the given file/dir or from an input file (from the input queue).
|
|
160
189
|
"""
|
|
161
190
|
current_thread = threading.get_ident()
|
|
162
191
|
self.print_trace(f'Starting dependency worker {current_thread}...')
|
|
163
192
|
try:
|
|
164
|
-
what_to_scan = self.inputs.get(timeout=5)
|
|
193
|
+
what_to_scan = self.inputs.get(timeout=5) # Begin processing the dependency request
|
|
165
194
|
deps = None
|
|
166
|
-
if what_to_scan.startswith(DEP_FILE_PREFIX):
|
|
195
|
+
if what_to_scan.startswith(DEP_FILE_PREFIX): # We have a pre-parsed dependency file, load it
|
|
167
196
|
deps = self.sc_deps.load_from_file(what_to_scan.strip(DEP_FILE_PREFIX))
|
|
168
|
-
else:
|
|
197
|
+
else: # Search the file/folder for dependency files to parse
|
|
169
198
|
if not self.sc_deps.run_scan(what_to_scan=what_to_scan):
|
|
170
199
|
self._errors = True
|
|
171
200
|
else:
|
|
@@ -176,13 +205,13 @@ class ThreadedDependencies(ScanossBase):
|
|
|
176
205
|
self.print_debug(f"Including dependencies with '{dep_scope_include.split(',')}' scopes")
|
|
177
206
|
if dep_scope_exclude is not None:
|
|
178
207
|
self.print_debug(f"Excluding dependencies with '{dep_scope_exclude.split(',')}' scopes")
|
|
179
|
-
deps = self.filter_dependencies_by_scopes(deps, dep_scope,dep_scope_include, dep_scope_exclude)
|
|
208
|
+
deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
|
|
180
209
|
|
|
181
210
|
if not self._errors:
|
|
182
211
|
if deps is None:
|
|
183
212
|
self.print_stderr(f'Problem searching for dependencies for: {what_to_scan}')
|
|
184
213
|
self._errors = True
|
|
185
|
-
elif not deps or len(deps.get(
|
|
214
|
+
elif not deps or len(deps.get('files', [])) == 0:
|
|
186
215
|
self.print_debug(f'No dependencies found to decorate for: {what_to_scan}')
|
|
187
216
|
else:
|
|
188
217
|
decorated_deps = self.grpc_api.get_dependencies(deps)
|
|
@@ -210,6 +239,7 @@ class ThreadedDependencies(ScanossBase):
|
|
|
210
239
|
self._errors = True
|
|
211
240
|
return True if not self._errors else False
|
|
212
241
|
|
|
242
|
+
|
|
213
243
|
#
|
|
214
244
|
# End of ThreadedDependencies Class
|
|
215
245
|
#
|
scanoss/threadedscanning.py
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2021, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
|
+
|
|
24
25
|
import os
|
|
25
26
|
import sys
|
|
26
27
|
import threading
|
|
@@ -34,8 +35,10 @@ from progress.bar import Bar
|
|
|
34
35
|
from .scanossapi import ScanossApi
|
|
35
36
|
from .scanossbase import ScanossBase
|
|
36
37
|
|
|
37
|
-
WFP_FILE_START =
|
|
38
|
-
MAX_ALLOWED_THREADS =
|
|
38
|
+
WFP_FILE_START = 'file='
|
|
39
|
+
MAX_ALLOWED_THREADS = (
|
|
40
|
+
int(os.environ.get('SCANOSS_MAX_ALLOWED_THREADS')) if os.environ.get('SCANOSS_MAX_ALLOWED_THREADS') else 30
|
|
41
|
+
)
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
@dataclass
|
|
@@ -45,13 +48,14 @@ class ThreadedScanning(ScanossBase):
|
|
|
45
48
|
WFP scan requests are loaded into the input queue.
|
|
46
49
|
Multiple threads pull messages off this queue, process the request and put the results into an output queue
|
|
47
50
|
"""
|
|
51
|
+
|
|
48
52
|
inputs: queue.Queue = queue.Queue()
|
|
49
53
|
output: queue.Queue = queue.Queue()
|
|
50
54
|
bar: Bar = None
|
|
51
55
|
|
|
52
|
-
def __init__(
|
|
53
|
-
|
|
54
|
-
|
|
56
|
+
def __init__(
|
|
57
|
+
self, scanapi: ScanossApi, debug: bool = False, trace: bool = False, quiet: bool = False, nb_threads: int = 5
|
|
58
|
+
) -> None:
|
|
55
59
|
"""
|
|
56
60
|
Initialise the ThreadedScanning class
|
|
57
61
|
:param scanapi: SCANOSS API to send scan requests to
|
|
@@ -158,8 +162,9 @@ class ThreadedScanning(ScanossBase):
|
|
|
158
162
|
"""
|
|
159
163
|
qsize = self.inputs.qsize()
|
|
160
164
|
if qsize < self.nb_threads:
|
|
161
|
-
self.print_debug(
|
|
162
|
-
|
|
165
|
+
self.print_debug(
|
|
166
|
+
f'Input queue ({qsize}) smaller than requested threads: {self.nb_threads}. Reducing to queue size.'
|
|
167
|
+
)
|
|
163
168
|
self.nb_threads = qsize
|
|
164
169
|
else:
|
|
165
170
|
self.print_debug(f'Starting {self.nb_threads} threads to process {qsize} requests...')
|
|
@@ -171,7 +176,7 @@ class ThreadedScanning(ScanossBase):
|
|
|
171
176
|
except Exception as e:
|
|
172
177
|
self.print_stderr(f'ERROR: Problem running threaded scanning: {e}')
|
|
173
178
|
self._errors = True
|
|
174
|
-
if wait:
|
|
179
|
+
if wait: # Wait for all inputs to complete
|
|
175
180
|
self.complete()
|
|
176
181
|
return False if self._errors else True
|
|
177
182
|
|
|
@@ -180,7 +185,7 @@ class ThreadedScanning(ScanossBase):
|
|
|
180
185
|
Wait for input queue to complete processing and complete the worker threads
|
|
181
186
|
"""
|
|
182
187
|
self.inputs.join()
|
|
183
|
-
self._stop_event.set()
|
|
188
|
+
self._stop_event.set() # Tell the worker threads to stop
|
|
184
189
|
try:
|
|
185
190
|
for t in self._threads: # Complete the threads
|
|
186
191
|
t.join(timeout=5)
|
|
@@ -199,7 +204,7 @@ class ThreadedScanning(ScanossBase):
|
|
|
199
204
|
api_error = False
|
|
200
205
|
while not self._stop_event.is_set():
|
|
201
206
|
wfp = None
|
|
202
|
-
if not self.inputs.empty():
|
|
207
|
+
if not self.inputs.empty(): # Only try to get a message if there is one on the queue
|
|
203
208
|
try:
|
|
204
209
|
wfp = self.inputs.get(timeout=5)
|
|
205
210
|
if api_error: # API error encountered, so stop processing anymore requests
|
|
@@ -228,6 +233,7 @@ class ThreadedScanning(ScanossBase):
|
|
|
228
233
|
time.sleep(1) # Sleep while waiting for the queue depth to build up
|
|
229
234
|
self.print_trace(f'Thread complete ({current_thread}).')
|
|
230
235
|
|
|
236
|
+
|
|
231
237
|
#
|
|
232
238
|
# End of ThreadedScanning Class
|
|
233
239
|
#
|
scanoss/utils/file.py
CHANGED
|
@@ -52,24 +52,24 @@ def validate_json_file(json_file_path: str) -> JsonValidation:
|
|
|
52
52
|
Tuple[bool, str]: A tuple containing a boolean indicating if the file is valid and a message
|
|
53
53
|
"""
|
|
54
54
|
if not json_file_path:
|
|
55
|
-
return JsonValidation(is_valid=False, error=
|
|
55
|
+
return JsonValidation(is_valid=False, error='No JSON file specified')
|
|
56
56
|
if not os.path.isfile(json_file_path):
|
|
57
57
|
return JsonValidation(
|
|
58
58
|
is_valid=False,
|
|
59
|
-
error=f
|
|
59
|
+
error=f'File not found: {json_file_path}',
|
|
60
60
|
error_code=JSON_ERROR_FILE_NOT_FOUND,
|
|
61
61
|
)
|
|
62
62
|
try:
|
|
63
63
|
if os.stat(json_file_path).st_size == 0:
|
|
64
64
|
return JsonValidation(
|
|
65
65
|
is_valid=False,
|
|
66
|
-
error=f
|
|
66
|
+
error=f'File is empty: {json_file_path}',
|
|
67
67
|
error_code=JSON_ERROR_FILE_EMPTY,
|
|
68
68
|
)
|
|
69
69
|
except OSError as e:
|
|
70
70
|
return JsonValidation(
|
|
71
71
|
is_valid=False,
|
|
72
|
-
error=f
|
|
72
|
+
error=f'Problem checking file size: {json_file_path}: {e}',
|
|
73
73
|
error_code=JSON_ERROR_FILE_SIZE,
|
|
74
74
|
)
|
|
75
75
|
try:
|
scanoss/winnowing.py
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Copyright (c) 2021, SCANOSS
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
Winnowing Algorithm implementation for SCANOSS.
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
This module implements an adaptation of the original winnowing algorithm by S. Schleimer, D. S. Wilkerson and
|
|
27
|
+
A. Aiken as described in their seminal article which can be found here:
|
|
28
|
+
https://theory.stanford.edu/~aiken/publications/papers/sigmod03.pdf
|
|
29
29
|
"""
|
|
30
|
+
|
|
30
31
|
import hashlib
|
|
31
32
|
import pathlib
|
|
32
33
|
import platform
|
|
@@ -55,11 +56,56 @@ MAX_POST_SIZE = 64 * 1024 # 64k Max post size
|
|
|
55
56
|
MIN_FILE_SIZE = 256
|
|
56
57
|
|
|
57
58
|
SKIP_SNIPPET_EXT = { # File extensions to ignore snippets for
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
'.exe',
|
|
60
|
+
'.zip',
|
|
61
|
+
'.tar',
|
|
62
|
+
'.tgz',
|
|
63
|
+
'.gz',
|
|
64
|
+
'.7z',
|
|
65
|
+
'.rar',
|
|
66
|
+
'.jar',
|
|
67
|
+
'.war',
|
|
68
|
+
'.ear',
|
|
69
|
+
'.class',
|
|
70
|
+
'.pyc',
|
|
71
|
+
'.o',
|
|
72
|
+
'.a',
|
|
73
|
+
'.so',
|
|
74
|
+
'.obj',
|
|
75
|
+
'.dll',
|
|
76
|
+
'.lib',
|
|
77
|
+
'.out',
|
|
78
|
+
'.app',
|
|
79
|
+
'.bin',
|
|
80
|
+
'.lst',
|
|
81
|
+
'.dat',
|
|
82
|
+
'.json',
|
|
83
|
+
'.htm',
|
|
84
|
+
'.html',
|
|
85
|
+
'.xml',
|
|
86
|
+
'.md',
|
|
87
|
+
'.txt',
|
|
88
|
+
'.doc',
|
|
89
|
+
'.docx',
|
|
90
|
+
'.xls',
|
|
91
|
+
'.xlsx',
|
|
92
|
+
'.ppt',
|
|
93
|
+
'.pptx',
|
|
94
|
+
'.odt',
|
|
95
|
+
'.ods',
|
|
96
|
+
'.odp',
|
|
97
|
+
'.pages',
|
|
98
|
+
'.key',
|
|
99
|
+
'.numbers',
|
|
100
|
+
'.pdf',
|
|
101
|
+
'.min.js',
|
|
102
|
+
'.mf',
|
|
103
|
+
'.sum',
|
|
104
|
+
'.woff',
|
|
105
|
+
'.woff2',
|
|
106
|
+
'.xsd',
|
|
107
|
+
'.pom',
|
|
108
|
+
'.whl',
|
|
63
109
|
}
|
|
64
110
|
|
|
65
111
|
CRC8_MAXIM_DOW_TABLE_SIZE = 0x100
|
|
@@ -111,11 +157,21 @@ class Winnowing(ScanossBase):
|
|
|
111
157
|
a list of WFP fingerprints with their corresponding line numbers.
|
|
112
158
|
"""
|
|
113
159
|
|
|
114
|
-
def __init__(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
160
|
+
def __init__(
|
|
161
|
+
self,
|
|
162
|
+
size_limit: bool = False,
|
|
163
|
+
debug: bool = False,
|
|
164
|
+
trace: bool = False,
|
|
165
|
+
quiet: bool = False,
|
|
166
|
+
skip_snippets: bool = False,
|
|
167
|
+
post_size: int = 32,
|
|
168
|
+
all_extensions: bool = False,
|
|
169
|
+
obfuscate: bool = False,
|
|
170
|
+
hpsm: bool = False,
|
|
171
|
+
strip_hpsm_ids=None,
|
|
172
|
+
strip_snippet_ids=None,
|
|
173
|
+
skip_md5_ids=None,
|
|
174
|
+
):
|
|
119
175
|
"""
|
|
120
176
|
Instantiate Winnowing class
|
|
121
177
|
Parameters
|
|
@@ -190,12 +246,16 @@ class Winnowing(ScanossBase):
|
|
|
190
246
|
if src_len == 0 or src_len <= MIN_FILE_SIZE: # Ignore empty or files that are too small
|
|
191
247
|
self.print_trace(f'Skipping snippets as the file is too small: {file} - {src_len}')
|
|
192
248
|
return True
|
|
193
|
-
prefix = src[0:(MIN_FILE_SIZE - 1)].lower().strip()
|
|
194
|
-
if len(prefix) > 0 and (prefix[0] ==
|
|
249
|
+
prefix = src[0 : (MIN_FILE_SIZE - 1)].lower().strip()
|
|
250
|
+
if len(prefix) > 0 and (prefix[0] == '{' or prefix[0] == '['): # Ignore json
|
|
195
251
|
self.print_trace(f'Skipping snippets as the file appears to be JSON: {file}')
|
|
196
252
|
return True
|
|
197
|
-
if
|
|
198
|
-
|
|
253
|
+
if (
|
|
254
|
+
prefix.startswith('<?xml')
|
|
255
|
+
or prefix.startswith('<html')
|
|
256
|
+
or prefix.startswith('<ac3d')
|
|
257
|
+
or prefix.startswith('<!doc')
|
|
258
|
+
):
|
|
199
259
|
self.print_trace(f'Skipping snippets as the file appears to be xml/html/binary: {file}')
|
|
200
260
|
return True # Ignore xml & html & ac3d
|
|
201
261
|
index = src.index('\n') if '\n' in src else (src_len - 1) # TODO still necessary if we have a binary check?
|
|
@@ -258,11 +318,12 @@ class Winnowing(ScanossBase):
|
|
|
258
318
|
elif hpsm_id_len % 2 == 1:
|
|
259
319
|
hpsm_id_len = hpsm_id_len + 1
|
|
260
320
|
|
|
261
|
-
to_remove = hpsm[hpsm_id_index:hpsm_id_index + hpsm_id_len]
|
|
321
|
+
to_remove = hpsm[hpsm_id_index : hpsm_id_index + hpsm_id_len]
|
|
262
322
|
self.print_debug(f'HPSM ID {to_remove} to replace')
|
|
263
323
|
# Calculate the XOR of each byte to produce the correct ignore sequence.
|
|
264
324
|
replacement = ''.join(
|
|
265
|
-
[format(int(to_remove[i:i + 2], 16) ^ 0xFF, '02x') for i in range(0, len(to_remove), 2)]
|
|
325
|
+
[format(int(to_remove[i : i + 2], 16) ^ 0xFF, '02x') for i in range(0, len(to_remove), 2)]
|
|
326
|
+
)
|
|
266
327
|
|
|
267
328
|
self.print_debug(f'HPSM ID replacement {replacement}')
|
|
268
329
|
# Overwrite HPSM bytes to be removed.
|
|
@@ -309,7 +370,7 @@ class Winnowing(ScanossBase):
|
|
|
309
370
|
# Print file line
|
|
310
371
|
content_length = len(contents)
|
|
311
372
|
original_filename = file
|
|
312
|
-
|
|
373
|
+
|
|
313
374
|
if platform.system() == 'Windows':
|
|
314
375
|
original_filename = file.replace('\\', '/')
|
|
315
376
|
wfp_filename = repr(original_filename).strip("'") # return a utf-8 compatible version of the filename
|
|
@@ -361,14 +422,16 @@ class Winnowing(ScanossBase):
|
|
|
361
422
|
crc_hex = '{:08x}'.format(crc)
|
|
362
423
|
if last_line != line:
|
|
363
424
|
if output != '':
|
|
364
|
-
if
|
|
365
|
-
|
|
366
|
-
|
|
425
|
+
if (
|
|
426
|
+
self.size_limit
|
|
427
|
+
and (len(wfp.encode('utf-8')) + len(output.encode('utf-8')))
|
|
428
|
+
> self.max_post_size
|
|
429
|
+
):
|
|
367
430
|
self.print_debug(f'Truncating WFP ({self.max_post_size} limit) for: {file}')
|
|
368
431
|
output = ''
|
|
369
432
|
break # Stop collecting snippets as it's over 64k
|
|
370
433
|
wfp += output + '\n'
|
|
371
|
-
output =
|
|
434
|
+
output = '%d=%s' % (line, crc_hex)
|
|
372
435
|
else:
|
|
373
436
|
output += ',' + crc_hex
|
|
374
437
|
|
|
@@ -379,7 +442,7 @@ class Winnowing(ScanossBase):
|
|
|
379
442
|
# Shift gram
|
|
380
443
|
gram = gram[1:]
|
|
381
444
|
if output != '':
|
|
382
|
-
if not self.size_limit or (len(wfp.encode(
|
|
445
|
+
if not self.size_limit or (len(wfp.encode('utf-8')) + len(output.encode('utf-8'))) < self.max_post_size:
|
|
383
446
|
wfp += output + '\n'
|
|
384
447
|
else:
|
|
385
448
|
self.print_debug(f'Warning: skipping output in WFP for {file} - "{output}"')
|
|
@@ -403,13 +466,13 @@ class Winnowing(ScanossBase):
|
|
|
403
466
|
last_line = 0
|
|
404
467
|
for i, byte in enumerate(content):
|
|
405
468
|
c = byte
|
|
406
|
-
if c == ASCII_LF:
|
|
407
|
-
if len(list_normalized):
|
|
469
|
+
if c == ASCII_LF: # When there is a new line
|
|
470
|
+
if len(list_normalized):
|
|
408
471
|
crc_lines.append(self.crc8_buffer(list_normalized))
|
|
409
472
|
list_normalized = []
|
|
410
|
-
elif last_line+1 == i:
|
|
473
|
+
elif last_line + 1 == i:
|
|
411
474
|
crc_lines.append(0xFF)
|
|
412
|
-
elif i-last_line > 1:
|
|
475
|
+
elif i - last_line > 1:
|
|
413
476
|
crc_lines.append(0x00)
|
|
414
477
|
last_line = i
|
|
415
478
|
else:
|
|
@@ -470,6 +533,7 @@ class Winnowing(ScanossBase):
|
|
|
470
533
|
crc ^= CRC8_MAXIM_DOW_FINAL # Bitwise OR (XOR) of crc in Maxim Dow Final
|
|
471
534
|
return crc
|
|
472
535
|
|
|
536
|
+
|
|
473
537
|
#
|
|
474
538
|
# End of Winnowing Class
|
|
475
539
|
#
|