scanoss 1.20.0__py3-none-any.whl → 1.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. protoc_gen_swagger/__init__.py +13 -13
  2. protoc_gen_swagger/options/__init__.py +13 -13
  3. protoc_gen_swagger/options/annotations_pb2.py +12 -9
  4. protoc_gen_swagger/options/annotations_pb2_grpc.py +1 -1
  5. protoc_gen_swagger/options/openapiv2_pb2.py +98 -96
  6. protoc_gen_swagger/options/openapiv2_pb2_grpc.py +1 -1
  7. scanoss/__init__.py +18 -18
  8. scanoss/api/__init__.py +17 -17
  9. scanoss/api/common/__init__.py +17 -17
  10. scanoss/api/common/v2/__init__.py +17 -17
  11. scanoss/api/common/v2/scanoss_common_pb2.py +18 -18
  12. scanoss/api/common/v2/scanoss_common_pb2_grpc.py +1 -1
  13. scanoss/api/components/__init__.py +17 -17
  14. scanoss/api/components/v2/__init__.py +17 -17
  15. scanoss/api/components/v2/scanoss_components_pb2.py +48 -38
  16. scanoss/api/components/v2/scanoss_components_pb2_grpc.py +142 -96
  17. scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +22 -16
  18. scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +75 -49
  19. scanoss/api/dependencies/__init__.py +17 -17
  20. scanoss/api/dependencies/v2/__init__.py +17 -17
  21. scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +30 -24
  22. scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +75 -49
  23. scanoss/api/scanning/__init__.py +17 -17
  24. scanoss/api/scanning/v2/__init__.py +17 -17
  25. scanoss/api/scanning/v2/scanoss_scanning_pb2.py +10 -8
  26. scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +40 -32
  27. scanoss/api/semgrep/__init__.py +17 -17
  28. scanoss/api/semgrep/v2/__init__.py +17 -17
  29. scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +22 -18
  30. scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +71 -49
  31. scanoss/api/vulnerabilities/__init__.py +17 -17
  32. scanoss/api/vulnerabilities/v2/__init__.py +17 -17
  33. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +37 -27
  34. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +109 -72
  35. scanoss/cli.py +576 -293
  36. scanoss/components.py +67 -45
  37. scanoss/csvoutput.py +83 -56
  38. scanoss/cyclonedx.py +48 -46
  39. scanoss/data/build_date.txt +1 -1
  40. scanoss/file_filters.py +13 -15
  41. scanoss/filecount.py +43 -36
  42. scanoss/inspection/__init__.py +17 -17
  43. scanoss/inspection/copyleft.py +71 -58
  44. scanoss/inspection/policy_check.py +76 -53
  45. scanoss/inspection/undeclared_component.py +98 -75
  46. scanoss/inspection/utils/license_utils.py +66 -44
  47. scanoss/results.py +51 -60
  48. scanoss/scancodedeps.py +61 -38
  49. scanoss/scanner.py +203 -135
  50. scanoss/scanoss_settings.py +5 -3
  51. scanoss/scanossapi.py +98 -69
  52. scanoss/scanossbase.py +19 -19
  53. scanoss/scanossgrpc.py +73 -51
  54. scanoss/scanpostprocessor.py +9 -6
  55. scanoss/scantype.py +22 -21
  56. scanoss/spdxlite.py +265 -171
  57. scanoss/threadeddependencies.py +91 -61
  58. scanoss/threadedscanning.py +37 -31
  59. scanoss/utils/file.py +4 -4
  60. scanoss/winnowing.py +111 -47
  61. {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/METADATA +1 -1
  62. scanoss-1.20.1.dist-info/RECORD +74 -0
  63. scanoss-1.20.0.dist-info/RECORD +0 -74
  64. {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/LICENSE +0 -0
  65. {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/WHEEL +0 -0
  66. {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/entry_points.txt +0 -0
  67. {scanoss-1.20.0.dist-info → scanoss-1.20.1.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,25 @@
1
1
  """
2
- SPDX-License-Identifier: MIT
3
-
4
- Copyright (c) 2021, SCANOSS
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining a copy
7
- of this software and associated documentation files (the "Software"), to deal
8
- in the Software without restriction, including without limitation the rights
9
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- copies of the Software, and to permit persons to whom the Software is
11
- furnished to do so, subject to the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be included in
14
- all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
- THE SOFTWARE.
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2021, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
23
  """
24
24
 
25
25
  import threading
@@ -33,9 +33,21 @@ from .scancodedeps import ScancodeDeps
33
33
  from .scanossbase import ScanossBase
34
34
  from .scanossgrpc import ScanossGrpc
35
35
 
36
- DEP_FILE_PREFIX = "file=" # Default prefix to signify an existing parsed dependency file
36
+ DEP_FILE_PREFIX = 'file=' # Default prefix to signify an existing parsed dependency file
37
37
 
38
- DEV_DEPENDENCIES = { "dev", "test", "development", "provided", "runtime", "devDependencies", "dev-dependencies", "testImplementation", "testCompile", "Test", "require-dev" }
38
+ DEV_DEPENDENCIES = {
39
+ 'dev',
40
+ 'test',
41
+ 'development',
42
+ 'provided',
43
+ 'runtime',
44
+ 'devDependencies',
45
+ 'dev-dependencies',
46
+ 'testImplementation',
47
+ 'testCompile',
48
+ 'Test',
49
+ 'require-dev',
50
+ }
39
51
 
40
52
 
41
53
  # Define an enum class
@@ -46,17 +58,21 @@ class SCOPE(Enum):
46
58
 
47
59
  @dataclass
48
60
  class ThreadedDependencies(ScanossBase):
49
- """
61
+ """ """
50
62
 
51
- """
52
63
  inputs: queue.Queue = queue.Queue()
53
64
  output: queue.Queue = queue.Queue()
54
65
 
55
- def __init__(self, sc_deps: ScancodeDeps, grpc_api: ScanossGrpc, what_to_scan: str = None, debug: bool = False,
56
- trace: bool = False, quiet: bool = False) -> None:
57
- """
58
-
59
- """
66
+ def __init__(
67
+ self,
68
+ sc_deps: ScancodeDeps,
69
+ grpc_api: ScanossGrpc,
70
+ what_to_scan: str = None,
71
+ debug: bool = False,
72
+ trace: bool = False,
73
+ quiet: bool = False,
74
+ ) -> None:
75
+ """ """
60
76
  super().__init__(debug, trace, quiet)
61
77
  self.sc_deps = sc_deps
62
78
  self.grpc_api = grpc_api
@@ -76,8 +92,15 @@ class ThreadedDependencies(ScanossBase):
76
92
  return resp
77
93
  return None
78
94
 
79
- def run(self, what_to_scan: str = None, deps_file: str = None, wait: bool = True, dep_scope: SCOPE = None,
80
- dep_scope_include: str = None, dep_scope_exclude: str = None) -> bool:
95
+ def run(
96
+ self,
97
+ what_to_scan: str = None,
98
+ deps_file: str = None,
99
+ wait: bool = True,
100
+ dep_scope: SCOPE = None,
101
+ dep_scope_include: str = None,
102
+ dep_scope_exclude: str = None,
103
+ ) -> bool:
81
104
  """
82
105
  Initiate a background scan for the specified file/dir
83
106
  :param dep_scope_exclude: comma separated list of dependency scopes to exclude
@@ -91,23 +114,25 @@ class ThreadedDependencies(ScanossBase):
91
114
  what_to_scan = what_to_scan if what_to_scan else self.what_to_scan
92
115
  self._errors = False
93
116
  try:
94
- if deps_file: # Decorate the given dependencies file
117
+ if deps_file: # Decorate the given dependencies file
95
118
  self.print_msg(f'Decorating {deps_file} dependencies...')
96
- self.inputs.put(f'{DEP_FILE_PREFIX}{deps_file}') # Add to queue and have parent wait on it
97
- else: # Search for dependencies to decorate
119
+ self.inputs.put(f'{DEP_FILE_PREFIX}{deps_file}') # Add to queue and have parent wait on it
120
+ else: # Search for dependencies to decorate
98
121
  self.print_msg(f'Searching {what_to_scan} for dependencies...')
99
122
  self.inputs.put(what_to_scan)
100
123
  # Add to queue and have parent wait on it
101
- self._thread = threading.Thread(target=self.scan_dependencies(dep_scope, dep_scope_include, dep_scope_exclude), daemon=True)
124
+ self._thread = threading.Thread(
125
+ target=self.scan_dependencies(dep_scope, dep_scope_include, dep_scope_exclude), daemon=True
126
+ )
102
127
  self._thread.start()
103
128
  except Exception as e:
104
129
  self.print_stderr(f'ERROR: Problem running threaded dependencies: {e}')
105
130
  self._errors = True
106
- if wait and not self._errors: # Wait for all inputs to complete
131
+ if wait and not self._errors: # Wait for all inputs to complete
107
132
  self.complete()
108
133
  return False if self._errors else True
109
134
 
110
- def filter_dependencies(self,deps ,filter_dep)-> json:
135
+ def filter_dependencies(self, deps, filter_dep) -> json:
111
136
  files = deps.get('files', [])
112
137
  # Iterate over files and their purls
113
138
  for file in files:
@@ -120,52 +145,56 @@ class ThreadedDependencies(ScanossBase):
120
145
  ]
121
146
  # End of for loop
122
147
 
123
- return {
124
- 'files': [
125
- file for file in deps.get('files', [])
126
- if file.get('purls')
127
- ]
128
- }
148
+ return {'files': [file for file in deps.get('files', []) if file.get('purls')]}
129
149
 
130
- def filter_dependencies_by_scopes(self,deps: json, dep_scope: SCOPE = None, dep_scope_include: str = None,
131
- dep_scope_exclude: str = None) -> json:
150
+ def filter_dependencies_by_scopes(
151
+ self, deps: json, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None
152
+ ) -> json:
132
153
  # Predefined set of scopes to filter
133
154
 
134
155
  # Include all scopes
135
- include_all = (dep_scope is None or dep_scope == "") and dep_scope_include is None and dep_scope_exclude is None
156
+ include_all = (dep_scope is None or dep_scope == '') and dep_scope_include is None and dep_scope_exclude is None
136
157
  ## All dependencies, remove scope key
137
158
  if include_all:
138
- return self.filter_dependencies(deps, lambda purl:True)
159
+ return self.filter_dependencies(deps, lambda purl: True)
139
160
 
140
161
  # Use default list of scopes if a custom list is not set
141
- if (dep_scope is not None and dep_scope != "") and dep_scope_include is None and dep_scope_exclude is None:
142
- return self.filter_dependencies(deps, lambda purl: (dep_scope == SCOPE.PRODUCTION and purl not in DEV_DEPENDENCIES) or
143
- dep_scope == SCOPE.DEVELOPMENT and purl in DEV_DEPENDENCIES)
162
+ if (dep_scope is not None and dep_scope != '') and dep_scope_include is None and dep_scope_exclude is None:
163
+ return self.filter_dependencies(
164
+ deps,
165
+ lambda purl: (dep_scope == SCOPE.PRODUCTION and purl not in DEV_DEPENDENCIES)
166
+ or dep_scope == SCOPE.DEVELOPMENT
167
+ and purl in DEV_DEPENDENCIES,
168
+ )
144
169
 
145
- if ((dep_scope_include is not None and dep_scope_include != "")
146
- or dep_scope_exclude is not None and dep_scope_exclude != ""):
170
+ if (
171
+ (dep_scope_include is not None and dep_scope_include != '')
172
+ or dep_scope_exclude is not None
173
+ and dep_scope_exclude != ''
174
+ ):
147
175
  # Create sets from comma-separated strings, if provided
148
176
  exclude = set(dep_scope_exclude.split(',')) if dep_scope_exclude else set()
149
177
  include = set(dep_scope_include.split(',')) if dep_scope_include else set()
150
178
 
151
179
  # Define a lambda function that checks the inclusion/exclusion logic
152
180
  return self.filter_dependencies(
153
- deps,
154
- lambda purl: (exclude and purl not in exclude) or (not exclude and purl in include)
181
+ deps, lambda purl: (exclude and purl not in exclude) or (not exclude and purl in include)
155
182
  )
156
183
 
157
- def scan_dependencies(self, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None) -> None:
184
+ def scan_dependencies(
185
+ self, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None
186
+ ) -> None:
158
187
  """
159
188
  Scan for dependencies from the given file/dir or from an input file (from the input queue).
160
189
  """
161
190
  current_thread = threading.get_ident()
162
191
  self.print_trace(f'Starting dependency worker {current_thread}...')
163
192
  try:
164
- what_to_scan = self.inputs.get(timeout=5) # Begin processing the dependency request
193
+ what_to_scan = self.inputs.get(timeout=5) # Begin processing the dependency request
165
194
  deps = None
166
- if what_to_scan.startswith(DEP_FILE_PREFIX): # We have a pre-parsed dependency file, load it
195
+ if what_to_scan.startswith(DEP_FILE_PREFIX): # We have a pre-parsed dependency file, load it
167
196
  deps = self.sc_deps.load_from_file(what_to_scan.strip(DEP_FILE_PREFIX))
168
- else: # Search the file/folder for dependency files to parse
197
+ else: # Search the file/folder for dependency files to parse
169
198
  if not self.sc_deps.run_scan(what_to_scan=what_to_scan):
170
199
  self._errors = True
171
200
  else:
@@ -176,13 +205,13 @@ class ThreadedDependencies(ScanossBase):
176
205
  self.print_debug(f"Including dependencies with '{dep_scope_include.split(',')}' scopes")
177
206
  if dep_scope_exclude is not None:
178
207
  self.print_debug(f"Excluding dependencies with '{dep_scope_exclude.split(',')}' scopes")
179
- deps = self.filter_dependencies_by_scopes(deps, dep_scope,dep_scope_include, dep_scope_exclude)
208
+ deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
180
209
 
181
210
  if not self._errors:
182
211
  if deps is None:
183
212
  self.print_stderr(f'Problem searching for dependencies for: {what_to_scan}')
184
213
  self._errors = True
185
- elif not deps or len(deps.get("files", [])) == 0:
214
+ elif not deps or len(deps.get('files', [])) == 0:
186
215
  self.print_debug(f'No dependencies found to decorate for: {what_to_scan}')
187
216
  else:
188
217
  decorated_deps = self.grpc_api.get_dependencies(deps)
@@ -210,6 +239,7 @@ class ThreadedDependencies(ScanossBase):
210
239
  self._errors = True
211
240
  return True if not self._errors else False
212
241
 
242
+
213
243
  #
214
244
  # End of ThreadedDependencies Class
215
245
  #
@@ -1,26 +1,27 @@
1
1
  """
2
- SPDX-License-Identifier: MIT
3
-
4
- Copyright (c) 2021, SCANOSS
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining a copy
7
- of this software and associated documentation files (the "Software"), to deal
8
- in the Software without restriction, including without limitation the rights
9
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- copies of the Software, and to permit persons to whom the Software is
11
- furnished to do so, subject to the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be included in
14
- all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
- THE SOFTWARE.
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2021, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
23
  """
24
+
24
25
  import os
25
26
  import sys
26
27
  import threading
@@ -34,8 +35,10 @@ from progress.bar import Bar
34
35
  from .scanossapi import ScanossApi
35
36
  from .scanossbase import ScanossBase
36
37
 
37
- WFP_FILE_START = "file="
38
- MAX_ALLOWED_THREADS = int(os.environ.get("SCANOSS_MAX_ALLOWED_THREADS")) if os.environ.get("SCANOSS_MAX_ALLOWED_THREADS") else 30
38
+ WFP_FILE_START = 'file='
39
+ MAX_ALLOWED_THREADS = (
40
+ int(os.environ.get('SCANOSS_MAX_ALLOWED_THREADS')) if os.environ.get('SCANOSS_MAX_ALLOWED_THREADS') else 30
41
+ )
39
42
 
40
43
 
41
44
  @dataclass
@@ -45,13 +48,14 @@ class ThreadedScanning(ScanossBase):
45
48
  WFP scan requests are loaded into the input queue.
46
49
  Multiple threads pull messages off this queue, process the request and put the results into an output queue
47
50
  """
51
+
48
52
  inputs: queue.Queue = queue.Queue()
49
53
  output: queue.Queue = queue.Queue()
50
54
  bar: Bar = None
51
55
 
52
- def __init__(self, scanapi: ScanossApi, debug: bool = False, trace: bool = False, quiet: bool = False,
53
- nb_threads: int = 5
54
- ) -> None:
56
+ def __init__(
57
+ self, scanapi: ScanossApi, debug: bool = False, trace: bool = False, quiet: bool = False, nb_threads: int = 5
58
+ ) -> None:
55
59
  """
56
60
  Initialise the ThreadedScanning class
57
61
  :param scanapi: SCANOSS API to send scan requests to
@@ -158,8 +162,9 @@ class ThreadedScanning(ScanossBase):
158
162
  """
159
163
  qsize = self.inputs.qsize()
160
164
  if qsize < self.nb_threads:
161
- self.print_debug(f'Input queue ({qsize}) smaller than requested threads: {self.nb_threads}. '
162
- f'Reducing to queue size.')
165
+ self.print_debug(
166
+ f'Input queue ({qsize}) smaller than requested threads: {self.nb_threads}. Reducing to queue size.'
167
+ )
163
168
  self.nb_threads = qsize
164
169
  else:
165
170
  self.print_debug(f'Starting {self.nb_threads} threads to process {qsize} requests...')
@@ -171,7 +176,7 @@ class ThreadedScanning(ScanossBase):
171
176
  except Exception as e:
172
177
  self.print_stderr(f'ERROR: Problem running threaded scanning: {e}')
173
178
  self._errors = True
174
- if wait: # Wait for all inputs to complete
179
+ if wait: # Wait for all inputs to complete
175
180
  self.complete()
176
181
  return False if self._errors else True
177
182
 
@@ -180,7 +185,7 @@ class ThreadedScanning(ScanossBase):
180
185
  Wait for input queue to complete processing and complete the worker threads
181
186
  """
182
187
  self.inputs.join()
183
- self._stop_event.set() # Tell the worker threads to stop
188
+ self._stop_event.set() # Tell the worker threads to stop
184
189
  try:
185
190
  for t in self._threads: # Complete the threads
186
191
  t.join(timeout=5)
@@ -199,7 +204,7 @@ class ThreadedScanning(ScanossBase):
199
204
  api_error = False
200
205
  while not self._stop_event.is_set():
201
206
  wfp = None
202
- if not self.inputs.empty(): # Only try to get a message if there is one on the queue
207
+ if not self.inputs.empty(): # Only try to get a message if there is one on the queue
203
208
  try:
204
209
  wfp = self.inputs.get(timeout=5)
205
210
  if api_error: # API error encountered, so stop processing anymore requests
@@ -228,6 +233,7 @@ class ThreadedScanning(ScanossBase):
228
233
  time.sleep(1) # Sleep while waiting for the queue depth to build up
229
234
  self.print_trace(f'Thread complete ({current_thread}).')
230
235
 
236
+
231
237
  #
232
238
  # End of ThreadedScanning Class
233
239
  #
scanoss/utils/file.py CHANGED
@@ -52,24 +52,24 @@ def validate_json_file(json_file_path: str) -> JsonValidation:
52
52
  Tuple[bool, str]: A tuple containing a boolean indicating if the file is valid and a message
53
53
  """
54
54
  if not json_file_path:
55
- return JsonValidation(is_valid=False, error="No JSON file specified")
55
+ return JsonValidation(is_valid=False, error='No JSON file specified')
56
56
  if not os.path.isfile(json_file_path):
57
57
  return JsonValidation(
58
58
  is_valid=False,
59
- error=f"File not found: {json_file_path}",
59
+ error=f'File not found: {json_file_path}',
60
60
  error_code=JSON_ERROR_FILE_NOT_FOUND,
61
61
  )
62
62
  try:
63
63
  if os.stat(json_file_path).st_size == 0:
64
64
  return JsonValidation(
65
65
  is_valid=False,
66
- error=f"File is empty: {json_file_path}",
66
+ error=f'File is empty: {json_file_path}',
67
67
  error_code=JSON_ERROR_FILE_EMPTY,
68
68
  )
69
69
  except OSError as e:
70
70
  return JsonValidation(
71
71
  is_valid=False,
72
- error=f"Problem checking file size: {json_file_path}: {e}",
72
+ error=f'Problem checking file size: {json_file_path}: {e}',
73
73
  error_code=JSON_ERROR_FILE_SIZE,
74
74
  )
75
75
  try:
scanoss/winnowing.py CHANGED
@@ -1,32 +1,33 @@
1
1
  """
2
- SPDX-License-Identifier: MIT
2
+ SPDX-License-Identifier: MIT
3
3
 
4
- Copyright (c) 2021, SCANOSS
4
+ Copyright (c) 2021, SCANOSS
5
5
 
6
- Permission is hereby granted, free of charge, to any person obtaining a copy
7
- of this software and associated documentation files (the "Software"), to deal
8
- in the Software without restriction, including without limitation the rights
9
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- copies of the Software, and to permit persons to whom the Software is
11
- furnished to do so, subject to the following conditions:
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
12
 
13
- The above copyright notice and this permission notice shall be included in
14
- all copies or substantial portions of the Software.
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
15
 
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
- THE SOFTWARE.
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
23
 
24
- Winnowing Algorithm implementation for SCANOSS.
24
+ Winnowing Algorithm implementation for SCANOSS.
25
25
 
26
- This module implements an adaptation of the original winnowing algorithm by S. Schleimer, D. S. Wilkerson and
27
- A. Aiken as described in their seminal article which can be found here:
28
- https://theory.stanford.edu/~aiken/publications/papers/sigmod03.pdf
26
+ This module implements an adaptation of the original winnowing algorithm by S. Schleimer, D. S. Wilkerson and
27
+ A. Aiken as described in their seminal article which can be found here:
28
+ https://theory.stanford.edu/~aiken/publications/papers/sigmod03.pdf
29
29
  """
30
+
30
31
  import hashlib
31
32
  import pathlib
32
33
  import platform
@@ -55,11 +56,56 @@ MAX_POST_SIZE = 64 * 1024 # 64k Max post size
55
56
  MIN_FILE_SIZE = 256
56
57
 
57
58
  SKIP_SNIPPET_EXT = { # File extensions to ignore snippets for
58
- ".exe", ".zip", ".tar", ".tgz", ".gz", ".7z", ".rar", ".jar", ".war", ".ear", ".class", ".pyc",
59
- ".o", ".a", ".so", ".obj", ".dll", ".lib", ".out", ".app", ".bin",
60
- ".lst", ".dat", ".json", ".htm", ".html", ".xml", ".md", ".txt",
61
- ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".odt", ".ods", ".odp", ".pages", ".key", ".numbers",
62
- ".pdf", ".min.js", ".mf", ".sum", ".woff", ".woff2", '.xsd', ".pom", ".whl",
59
+ '.exe',
60
+ '.zip',
61
+ '.tar',
62
+ '.tgz',
63
+ '.gz',
64
+ '.7z',
65
+ '.rar',
66
+ '.jar',
67
+ '.war',
68
+ '.ear',
69
+ '.class',
70
+ '.pyc',
71
+ '.o',
72
+ '.a',
73
+ '.so',
74
+ '.obj',
75
+ '.dll',
76
+ '.lib',
77
+ '.out',
78
+ '.app',
79
+ '.bin',
80
+ '.lst',
81
+ '.dat',
82
+ '.json',
83
+ '.htm',
84
+ '.html',
85
+ '.xml',
86
+ '.md',
87
+ '.txt',
88
+ '.doc',
89
+ '.docx',
90
+ '.xls',
91
+ '.xlsx',
92
+ '.ppt',
93
+ '.pptx',
94
+ '.odt',
95
+ '.ods',
96
+ '.odp',
97
+ '.pages',
98
+ '.key',
99
+ '.numbers',
100
+ '.pdf',
101
+ '.min.js',
102
+ '.mf',
103
+ '.sum',
104
+ '.woff',
105
+ '.woff2',
106
+ '.xsd',
107
+ '.pom',
108
+ '.whl',
63
109
  }
64
110
 
65
111
  CRC8_MAXIM_DOW_TABLE_SIZE = 0x100
@@ -111,11 +157,21 @@ class Winnowing(ScanossBase):
111
157
  a list of WFP fingerprints with their corresponding line numbers.
112
158
  """
113
159
 
114
- def __init__(self, size_limit: bool = False, debug: bool = False, trace: bool = False, quiet: bool = False,
115
- skip_snippets: bool = False, post_size: int = 32, all_extensions: bool = False,
116
- obfuscate: bool = False, hpsm: bool = False,
117
- strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None
118
- ):
160
+ def __init__(
161
+ self,
162
+ size_limit: bool = False,
163
+ debug: bool = False,
164
+ trace: bool = False,
165
+ quiet: bool = False,
166
+ skip_snippets: bool = False,
167
+ post_size: int = 32,
168
+ all_extensions: bool = False,
169
+ obfuscate: bool = False,
170
+ hpsm: bool = False,
171
+ strip_hpsm_ids=None,
172
+ strip_snippet_ids=None,
173
+ skip_md5_ids=None,
174
+ ):
119
175
  """
120
176
  Instantiate Winnowing class
121
177
  Parameters
@@ -190,12 +246,16 @@ class Winnowing(ScanossBase):
190
246
  if src_len == 0 or src_len <= MIN_FILE_SIZE: # Ignore empty or files that are too small
191
247
  self.print_trace(f'Skipping snippets as the file is too small: {file} - {src_len}')
192
248
  return True
193
- prefix = src[0:(MIN_FILE_SIZE - 1)].lower().strip()
194
- if len(prefix) > 0 and (prefix[0] == "{" or prefix[0] == "["): # Ignore json
249
+ prefix = src[0 : (MIN_FILE_SIZE - 1)].lower().strip()
250
+ if len(prefix) > 0 and (prefix[0] == '{' or prefix[0] == '['): # Ignore json
195
251
  self.print_trace(f'Skipping snippets as the file appears to be JSON: {file}')
196
252
  return True
197
- if prefix.startswith("<?xml") or prefix.startswith("<html") or prefix.startswith("<ac3d") or prefix.startswith(
198
- "<!doc"):
253
+ if (
254
+ prefix.startswith('<?xml')
255
+ or prefix.startswith('<html')
256
+ or prefix.startswith('<ac3d')
257
+ or prefix.startswith('<!doc')
258
+ ):
199
259
  self.print_trace(f'Skipping snippets as the file appears to be xml/html/binary: {file}')
200
260
  return True # Ignore xml & html & ac3d
201
261
  index = src.index('\n') if '\n' in src else (src_len - 1) # TODO still necessary if we have a binary check?
@@ -258,11 +318,12 @@ class Winnowing(ScanossBase):
258
318
  elif hpsm_id_len % 2 == 1:
259
319
  hpsm_id_len = hpsm_id_len + 1
260
320
 
261
- to_remove = hpsm[hpsm_id_index:hpsm_id_index + hpsm_id_len]
321
+ to_remove = hpsm[hpsm_id_index : hpsm_id_index + hpsm_id_len]
262
322
  self.print_debug(f'HPSM ID {to_remove} to replace')
263
323
  # Calculate the XOR of each byte to produce the correct ignore sequence.
264
324
  replacement = ''.join(
265
- [format(int(to_remove[i:i + 2], 16) ^ 0xFF, '02x') for i in range(0, len(to_remove), 2)])
325
+ [format(int(to_remove[i : i + 2], 16) ^ 0xFF, '02x') for i in range(0, len(to_remove), 2)]
326
+ )
266
327
 
267
328
  self.print_debug(f'HPSM ID replacement {replacement}')
268
329
  # Overwrite HPSM bytes to be removed.
@@ -309,7 +370,7 @@ class Winnowing(ScanossBase):
309
370
  # Print file line
310
371
  content_length = len(contents)
311
372
  original_filename = file
312
-
373
+
313
374
  if platform.system() == 'Windows':
314
375
  original_filename = file.replace('\\', '/')
315
376
  wfp_filename = repr(original_filename).strip("'") # return a utf-8 compatible version of the filename
@@ -361,14 +422,16 @@ class Winnowing(ScanossBase):
361
422
  crc_hex = '{:08x}'.format(crc)
362
423
  if last_line != line:
363
424
  if output != '':
364
- if self.size_limit and \
365
- (len(wfp.encode("utf-8")) + len(
366
- output.encode("utf-8"))) > self.max_post_size:
425
+ if (
426
+ self.size_limit
427
+ and (len(wfp.encode('utf-8')) + len(output.encode('utf-8')))
428
+ > self.max_post_size
429
+ ):
367
430
  self.print_debug(f'Truncating WFP ({self.max_post_size} limit) for: {file}')
368
431
  output = ''
369
432
  break # Stop collecting snippets as it's over 64k
370
433
  wfp += output + '\n'
371
- output = "%d=%s" % (line, crc_hex)
434
+ output = '%d=%s' % (line, crc_hex)
372
435
  else:
373
436
  output += ',' + crc_hex
374
437
 
@@ -379,7 +442,7 @@ class Winnowing(ScanossBase):
379
442
  # Shift gram
380
443
  gram = gram[1:]
381
444
  if output != '':
382
- if not self.size_limit or (len(wfp.encode("utf-8")) + len(output.encode("utf-8"))) < self.max_post_size:
445
+ if not self.size_limit or (len(wfp.encode('utf-8')) + len(output.encode('utf-8'))) < self.max_post_size:
383
446
  wfp += output + '\n'
384
447
  else:
385
448
  self.print_debug(f'Warning: skipping output in WFP for {file} - "{output}"')
@@ -403,13 +466,13 @@ class Winnowing(ScanossBase):
403
466
  last_line = 0
404
467
  for i, byte in enumerate(content):
405
468
  c = byte
406
- if c == ASCII_LF: # When there is a new line
407
- if len(list_normalized):
469
+ if c == ASCII_LF: # When there is a new line
470
+ if len(list_normalized):
408
471
  crc_lines.append(self.crc8_buffer(list_normalized))
409
472
  list_normalized = []
410
- elif last_line+1 == i:
473
+ elif last_line + 1 == i:
411
474
  crc_lines.append(0xFF)
412
- elif i-last_line > 1:
475
+ elif i - last_line > 1:
413
476
  crc_lines.append(0x00)
414
477
  last_line = i
415
478
  else:
@@ -470,6 +533,7 @@ class Winnowing(ScanossBase):
470
533
  crc ^= CRC8_MAXIM_DOW_FINAL # Bitwise OR (XOR) of crc in Maxim Dow Final
471
534
  return crc
472
535
 
536
+
473
537
  #
474
538
  # End of Winnowing Class
475
539
  #
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scanoss
3
- Version: 1.20.0
3
+ Version: 1.20.1
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS