scanoss 1.27.1__py3-none-any.whl → 1.43.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protoc_gen_swagger/options/annotations_pb2.py +18 -12
- protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
- protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
- protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
- protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
- protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
- scanoss/__init__.py +1 -1
- scanoss/api/common/v2/scanoss_common_pb2.py +49 -22
- scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
- scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
- scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -47
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +650 -33
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -37
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +64 -12
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +74 -31
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +252 -13
- scanoss/api/licenses/__init__.py +23 -0
- scanoss/api/licenses/v2/__init__.py +23 -0
- scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
- scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
- scanoss/api/scanning/v2/scanoss_scanning_pb2.py +32 -21
- scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +49 -8
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
- scanoss/cli.py +1000 -186
- scanoss/components.py +80 -50
- scanoss/constants.py +7 -1
- scanoss/cryptography.py +89 -55
- scanoss/csvoutput.py +13 -7
- scanoss/cyclonedx.py +141 -9
- scanoss/data/build_date.txt +1 -1
- scanoss/data/osadl-copyleft.json +133 -0
- scanoss/delta.py +197 -0
- scanoss/export/__init__.py +23 -0
- scanoss/export/dependency_track.py +227 -0
- scanoss/file_filters.py +2 -163
- scanoss/filecount.py +37 -38
- scanoss/gitlabqualityreport.py +214 -0
- scanoss/header_filter.py +563 -0
- scanoss/inspection/policy_check/__init__.py +0 -0
- scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
- scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
- scanoss/inspection/{policy_check.py → policy_check/policy_check.py} +65 -72
- scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
- scanoss/inspection/{copyleft.py → policy_check/scanoss/copyleft.py} +89 -73
- scanoss/inspection/{undeclared_component.py → policy_check/scanoss/undeclared_component.py} +52 -46
- scanoss/inspection/summary/__init__.py +0 -0
- scanoss/inspection/summary/component_summary.py +170 -0
- scanoss/inspection/{license_summary.py → summary/license_summary.py} +62 -12
- scanoss/inspection/summary/match_summary.py +341 -0
- scanoss/inspection/utils/file_utils.py +44 -0
- scanoss/inspection/utils/license_utils.py +57 -71
- scanoss/inspection/utils/markdown_utils.py +63 -0
- scanoss/inspection/{inspect_base.py → utils/scan_result_processor.py} +53 -67
- scanoss/osadl.py +125 -0
- scanoss/scanner.py +135 -253
- scanoss/scanners/folder_hasher.py +47 -32
- scanoss/scanners/scanner_hfh.py +50 -18
- scanoss/scanoss_settings.py +33 -3
- scanoss/scanossapi.py +23 -25
- scanoss/scanossbase.py +1 -1
- scanoss/scanossgrpc.py +543 -289
- scanoss/services/dependency_track_service.py +132 -0
- scanoss/spdxlite.py +11 -4
- scanoss/threadeddependencies.py +19 -18
- scanoss/threadedscanning.py +10 -0
- scanoss/utils/scanoss_scan_results_utils.py +41 -0
- scanoss/winnowing.py +71 -19
- {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/METADATA +8 -5
- scanoss-1.43.1.dist-info/RECORD +110 -0
- scanoss/inspection/component_summary.py +0 -94
- scanoss-1.27.1.dist-info/RECORD +0 -87
- {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/WHEEL +0 -0
- {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
- {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/licenses/LICENSE +0 -0
- {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import requests
|
|
26
|
+
|
|
27
|
+
from ..scanossbase import ScanossBase
|
|
28
|
+
|
|
29
|
+
HTTP_OK = 200
|
|
30
|
+
|
|
31
|
+
class DependencyTrackService(ScanossBase):
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
api_key: str,
|
|
36
|
+
url: str,
|
|
37
|
+
debug: bool = False,
|
|
38
|
+
trace: bool = False,
|
|
39
|
+
quiet: bool = False,
|
|
40
|
+
):
|
|
41
|
+
super().__init__(debug=debug, trace=trace, quiet=quiet)
|
|
42
|
+
if not url:
|
|
43
|
+
raise ValueError("Error: Dependency Track URL is required")
|
|
44
|
+
self.url = url.strip().rstrip('/')
|
|
45
|
+
if not api_key:
|
|
46
|
+
raise ValueError("Error: Dependency Track API key is required")
|
|
47
|
+
self.api_key = api_key
|
|
48
|
+
|
|
49
|
+
def get_project_by_name_version(self, name, version):
|
|
50
|
+
"""
|
|
51
|
+
Get project information by name and version from Dependency Track
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
name: Project name to search for
|
|
55
|
+
version: Project version to search for
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
dict: Project data if found, None otherwise
|
|
59
|
+
"""
|
|
60
|
+
if not name or not version:
|
|
61
|
+
self.print_stderr('Error: Missing name or version.')
|
|
62
|
+
return None
|
|
63
|
+
# Use the project search endpoint
|
|
64
|
+
params = {
|
|
65
|
+
'name': name,
|
|
66
|
+
'version': version
|
|
67
|
+
}
|
|
68
|
+
self.print_debug(f'Searching for project by: {params}')
|
|
69
|
+
return self.get_dep_track_data(f'{self.url}/api/v1/project/lookup', params)
|
|
70
|
+
|
|
71
|
+
def get_project_status(self, upload_token):
|
|
72
|
+
"""
|
|
73
|
+
Get Dependency Track project processing status.
|
|
74
|
+
|
|
75
|
+
Queries the Dependency Track API to check if the project upload
|
|
76
|
+
processing is complete using the upload token.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
dict: Project status information or None if request fails
|
|
80
|
+
"""
|
|
81
|
+
if not upload_token:
|
|
82
|
+
self.print_stderr('Error: Missing upload token. Cannot search for project status.')
|
|
83
|
+
return None
|
|
84
|
+
self.print_trace(f'URL: {self.url} Upload token: {upload_token}')
|
|
85
|
+
return self.get_dep_track_data(f'{self.url}/api/v1/event/token/{upload_token}')
|
|
86
|
+
|
|
87
|
+
def get_project_violations(self,project_id:str):
|
|
88
|
+
"""
|
|
89
|
+
Get project violations from Dependency Track.
|
|
90
|
+
|
|
91
|
+
Waits for project processing to complete, then retrieves all policy
|
|
92
|
+
violations for the specified project ID.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List of policy violations or None if the request fails
|
|
96
|
+
"""
|
|
97
|
+
if not project_id:
|
|
98
|
+
self.print_stderr('Error: Missing project id. Cannot search for project violations.')
|
|
99
|
+
return None
|
|
100
|
+
# Return the result as-is - None indicates API failure, empty list means no violations
|
|
101
|
+
return self.get_dep_track_data(f'{self.url}/api/v1/violation/project/{project_id}')
|
|
102
|
+
|
|
103
|
+
def get_project_by_id(self, project_id:str):
|
|
104
|
+
"""
|
|
105
|
+
Get a Dependency Track project by id.
|
|
106
|
+
|
|
107
|
+
Queries the Dependency Track API to get a project by id
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
dict
|
|
111
|
+
"""
|
|
112
|
+
if not project_id:
|
|
113
|
+
self.print_stderr('Error: Missing project id. Cannot search for project.')
|
|
114
|
+
return None
|
|
115
|
+
self.print_trace(f'URL: {self.url}, UUID: {project_id}')
|
|
116
|
+
return self.get_dep_track_data(f'{self.url}/api/v1/project/{project_id}')
|
|
117
|
+
|
|
118
|
+
def get_dep_track_data(self, uri, params=None):
|
|
119
|
+
if not uri:
|
|
120
|
+
self.print_stderr('Error: Missing URI. Cannot search for project.')
|
|
121
|
+
return None
|
|
122
|
+
req_headers = {'X-Api-Key': self.api_key, 'Content-Type': 'application/json'}
|
|
123
|
+
try:
|
|
124
|
+
if params:
|
|
125
|
+
response = requests.get(uri, headers=req_headers, params=params)
|
|
126
|
+
else:
|
|
127
|
+
response = requests.get(uri, headers=req_headers)
|
|
128
|
+
response.raise_for_status() # Raises an HTTPError for bad responses
|
|
129
|
+
return response.json()
|
|
130
|
+
except requests.exceptions.RequestException as e:
|
|
131
|
+
self.print_stderr(f"Error: Problem getting project data: {e}")
|
|
132
|
+
return None
|
scanoss/spdxlite.py
CHANGED
|
@@ -71,9 +71,12 @@ class SpdxLite:
|
|
|
71
71
|
:param data: json - JSON object
|
|
72
72
|
:return: summary dictionary
|
|
73
73
|
"""
|
|
74
|
-
if
|
|
74
|
+
if data is None:
|
|
75
75
|
self.print_stderr('ERROR: No JSON data provided to parse.')
|
|
76
76
|
return None
|
|
77
|
+
if len(data) == 0:
|
|
78
|
+
self.print_debug('Warning: Empty scan results provided. Returning empty summary.')
|
|
79
|
+
return {}
|
|
77
80
|
|
|
78
81
|
self.print_debug('Processing raw results into summary format...')
|
|
79
82
|
return self._process_files(data)
|
|
@@ -223,7 +226,9 @@ class SpdxLite:
|
|
|
223
226
|
Process license information and remove duplicates.
|
|
224
227
|
|
|
225
228
|
This method filters license information to include only licenses from trusted sources
|
|
226
|
-
('component_declared'
|
|
229
|
+
('component_declared', 'license_file', 'file_header'). Licenses with an unspecified
|
|
230
|
+
source (None or '') are allowed. Non-empty, non-allowed sources are excluded. It also
|
|
231
|
+
removes any duplicate license names.
|
|
227
232
|
The result is a simplified list of license dictionaries containing only the 'id' field.
|
|
228
233
|
|
|
229
234
|
Args:
|
|
@@ -244,7 +249,7 @@ class SpdxLite:
|
|
|
244
249
|
for license_info in licenses:
|
|
245
250
|
name = license_info.get('name')
|
|
246
251
|
source = license_info.get('source')
|
|
247
|
-
if source not in ("component_declared", "license_file", "file_header"):
|
|
252
|
+
if source not in (None, '') and source not in ("component_declared", "license_file", "file_header"):
|
|
248
253
|
continue
|
|
249
254
|
if name and name not in seen_names:
|
|
250
255
|
processed_licenses.append({'id': name})
|
|
@@ -277,9 +282,11 @@ class SpdxLite:
|
|
|
277
282
|
:return: True if successful, False otherwise
|
|
278
283
|
"""
|
|
279
284
|
raw_data = self.parse(data)
|
|
280
|
-
if
|
|
285
|
+
if raw_data is None:
|
|
281
286
|
self.print_stderr('ERROR: No SPDX data returned for the JSON string provided.')
|
|
282
287
|
return False
|
|
288
|
+
if len(raw_data) == 0:
|
|
289
|
+
self.print_debug('Warning: Empty scan results - generating minimal SPDX Lite document with no packages.')
|
|
283
290
|
|
|
284
291
|
self.load_license_data()
|
|
285
292
|
spdx_document = self._create_base_document(raw_data)
|
scanoss/threadeddependencies.py
CHANGED
|
@@ -22,12 +22,12 @@ SPDX-License-Identifier: MIT
|
|
|
22
22
|
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
import threading
|
|
26
|
-
import queue
|
|
27
25
|
import json
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
import queue
|
|
27
|
+
import threading
|
|
30
28
|
from dataclasses import dataclass
|
|
29
|
+
from enum import Enum
|
|
30
|
+
from typing import Dict
|
|
31
31
|
|
|
32
32
|
from .scancodedeps import ScancodeDeps
|
|
33
33
|
from .scanossbase import ScanossBase
|
|
@@ -63,7 +63,7 @@ class ThreadedDependencies(ScanossBase):
|
|
|
63
63
|
inputs: queue.Queue = queue.Queue()
|
|
64
64
|
output: queue.Queue = queue.Queue()
|
|
65
65
|
|
|
66
|
-
def __init__(
|
|
66
|
+
def __init__( # noqa: PLR0913
|
|
67
67
|
self,
|
|
68
68
|
sc_deps: ScancodeDeps,
|
|
69
69
|
grpc_api: ScanossGrpc,
|
|
@@ -180,13 +180,15 @@ class ThreadedDependencies(ScanossBase):
|
|
|
180
180
|
return self.filter_dependencies(
|
|
181
181
|
deps, lambda purl: (exclude and purl not in exclude) or (not exclude and purl in include)
|
|
182
182
|
)
|
|
183
|
+
return None
|
|
183
184
|
|
|
184
|
-
def scan_dependencies(
|
|
185
|
+
def scan_dependencies( # noqa: PLR0912
|
|
185
186
|
self, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None
|
|
186
187
|
) -> None:
|
|
187
188
|
"""
|
|
188
189
|
Scan for dependencies from the given file/dir or from an input file (from the input queue).
|
|
189
190
|
"""
|
|
191
|
+
# TODO refactor to simplify branches based on PLR0912
|
|
190
192
|
current_thread = threading.get_ident()
|
|
191
193
|
self.print_trace(f'Starting dependency worker {current_thread}...')
|
|
192
194
|
try:
|
|
@@ -194,18 +196,17 @@ class ThreadedDependencies(ScanossBase):
|
|
|
194
196
|
deps = None
|
|
195
197
|
if what_to_scan.startswith(DEP_FILE_PREFIX): # We have a pre-parsed dependency file, load it
|
|
196
198
|
deps = self.sc_deps.load_from_file(what_to_scan.strip(DEP_FILE_PREFIX))
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
|
|
199
|
+
elif not self.sc_deps.run_scan(what_to_scan=what_to_scan):
|
|
200
|
+
self._errors = True
|
|
201
|
+
else:
|
|
202
|
+
deps = self.sc_deps.produce_from_file()
|
|
203
|
+
if dep_scope is not None:
|
|
204
|
+
self.print_debug(f'Filtering {dep_scope.name} dependencies')
|
|
205
|
+
if dep_scope_include is not None:
|
|
206
|
+
self.print_debug(f"Including dependencies with '{dep_scope_include.split(',')}' scopes")
|
|
207
|
+
if dep_scope_exclude is not None:
|
|
208
|
+
self.print_debug(f"Excluding dependencies with '{dep_scope_exclude.split(',')}' scopes")
|
|
209
|
+
deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
|
|
209
210
|
|
|
210
211
|
if not self._errors:
|
|
211
212
|
if deps is None:
|
scanoss/threadedscanning.py
CHANGED
|
@@ -22,6 +22,7 @@ SPDX-License-Identifier: MIT
|
|
|
22
22
|
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
+
import atexit
|
|
25
26
|
import os
|
|
26
27
|
import queue
|
|
27
28
|
import sys
|
|
@@ -77,6 +78,8 @@ class ThreadedScanning(ScanossBase):
|
|
|
77
78
|
if nb_threads > MAX_ALLOWED_THREADS:
|
|
78
79
|
self.print_msg(f'Warning: Requested threads too large: {nb_threads}. Reducing to {MAX_ALLOWED_THREADS}')
|
|
79
80
|
self.nb_threads = MAX_ALLOWED_THREADS
|
|
81
|
+
# Register cleanup to ensure progress bar is finished on exit
|
|
82
|
+
atexit.register(self.complete_bar)
|
|
80
83
|
|
|
81
84
|
@staticmethod
|
|
82
85
|
def __count_files_in_wfp(wfp: str):
|
|
@@ -101,6 +104,13 @@ class ThreadedScanning(ScanossBase):
|
|
|
101
104
|
if self.bar:
|
|
102
105
|
self.bar.finish()
|
|
103
106
|
|
|
107
|
+
def __del__(self):
|
|
108
|
+
"""Ensure progress bar is cleaned up when object is destroyed"""
|
|
109
|
+
try:
|
|
110
|
+
self.complete_bar()
|
|
111
|
+
except Exception:
|
|
112
|
+
pass # Ignore errors during cleanup
|
|
113
|
+
|
|
104
114
|
def set_bar(self, bar: Bar) -> None:
|
|
105
115
|
"""
|
|
106
116
|
Set the Progress Bar to display progress while scanning
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def get_lines(lines: str) -> list:
|
|
26
|
+
"""
|
|
27
|
+
Parse line range string into a list of line numbers.
|
|
28
|
+
|
|
29
|
+
Converts SCANOSS line notation (e.g., '10-20,25-30') into a flat list
|
|
30
|
+
of individual line numbers for processing.
|
|
31
|
+
|
|
32
|
+
:param lines: Comma-separated line ranges in SCANOSS format (e.g., '10-20,25-30')
|
|
33
|
+
:return: Flat list of all line numbers extracted from the ranges
|
|
34
|
+
"""
|
|
35
|
+
lines_list = []
|
|
36
|
+
lines = lines.split(',')
|
|
37
|
+
for line in lines:
|
|
38
|
+
line_parts = line.split('-')
|
|
39
|
+
for part in line_parts:
|
|
40
|
+
lines_list.append(int(part))
|
|
41
|
+
return lines_list
|
scanoss/winnowing.py
CHANGED
|
@@ -37,6 +37,7 @@ from typing import Tuple
|
|
|
37
37
|
from binaryornot.check import is_binary
|
|
38
38
|
from crc32c import crc32c
|
|
39
39
|
|
|
40
|
+
from .header_filter import HeaderFilter
|
|
40
41
|
from .scanossbase import ScanossBase
|
|
41
42
|
|
|
42
43
|
# Winnowing configuration. DO NOT CHANGE.
|
|
@@ -172,6 +173,8 @@ class Winnowing(ScanossBase):
|
|
|
172
173
|
strip_hpsm_ids=None,
|
|
173
174
|
strip_snippet_ids=None,
|
|
174
175
|
skip_md5_ids=None,
|
|
176
|
+
skip_headers: bool = False,
|
|
177
|
+
skip_headers_limit: int = 0,
|
|
175
178
|
):
|
|
176
179
|
"""
|
|
177
180
|
Instantiate Winnowing class
|
|
@@ -198,7 +201,9 @@ class Winnowing(ScanossBase):
|
|
|
198
201
|
self.strip_hpsm_ids = strip_hpsm_ids
|
|
199
202
|
self.strip_snippet_ids = strip_snippet_ids
|
|
200
203
|
self.hpsm = hpsm
|
|
204
|
+
self.skip_headers = skip_headers
|
|
201
205
|
self.is_windows = platform.system() == 'Windows'
|
|
206
|
+
self.header_filter = HeaderFilter(debug=debug, trace=trace, quiet=quiet, skip_limit=skip_headers_limit)
|
|
202
207
|
if hpsm:
|
|
203
208
|
self.crc8_maxim_dow_table = []
|
|
204
209
|
self.crc8_generate_table()
|
|
@@ -353,6 +358,48 @@ class Winnowing(ScanossBase):
|
|
|
353
358
|
self.print_debug(f'Stripped snippet ids from {file}')
|
|
354
359
|
return wfp
|
|
355
360
|
|
|
361
|
+
def __strip_lines_until_offset(self, file: str, wfp: str, line_offset: int) -> str:
|
|
362
|
+
"""
|
|
363
|
+
Strip lines from the WFP up to and including the line_offset
|
|
364
|
+
|
|
365
|
+
:param file: name of fingerprinted file
|
|
366
|
+
:param wfp: WFP to clean
|
|
367
|
+
:param line_offset: line number offset to strip up to
|
|
368
|
+
:return: updated WFP
|
|
369
|
+
"""
|
|
370
|
+
# No offset specified, return original WFP
|
|
371
|
+
if line_offset <= 0:
|
|
372
|
+
return wfp
|
|
373
|
+
lines = wfp.split('\n')
|
|
374
|
+
filtered_lines = []
|
|
375
|
+
start_line_added = False
|
|
376
|
+
for line in lines:
|
|
377
|
+
# Check if a line contains snippet data (format: line_number=hash,hash,...)
|
|
378
|
+
line_details = line.split('=')
|
|
379
|
+
if line_details[0].isdigit():
|
|
380
|
+
try:
|
|
381
|
+
line_num = int(line_details[0])
|
|
382
|
+
# Keep lines that are after the offset
|
|
383
|
+
# (line_offset is the last line previous to real code)
|
|
384
|
+
if line_num > line_offset:
|
|
385
|
+
# Add the start_line tag before the first snippet line
|
|
386
|
+
if not start_line_added:
|
|
387
|
+
filtered_lines.append(f'start_line={line_offset}')
|
|
388
|
+
start_line_added = True
|
|
389
|
+
filtered_lines.append(line)
|
|
390
|
+
except (ValueError, IndexError) as e:
|
|
391
|
+
self.print_stderr(f'Error decoding line number from line {line} in {file}: {e}')
|
|
392
|
+
# Keep non-snippet lines (like file=, hpsm=, etc.)
|
|
393
|
+
filtered_lines.append(line)
|
|
394
|
+
else:
|
|
395
|
+
# Keep non-snippet lines (like file=, hpsm=, etc.)
|
|
396
|
+
filtered_lines.append(line)
|
|
397
|
+
# End for loop comment
|
|
398
|
+
wfp = '\n'.join(filtered_lines)
|
|
399
|
+
if start_line_added:
|
|
400
|
+
self.print_debug(f'Stripped lines up to offset {line_offset} from {file}')
|
|
401
|
+
return wfp
|
|
402
|
+
|
|
356
403
|
def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool]:
|
|
357
404
|
"""Detect the types of line endings present in file contents.
|
|
358
405
|
|
|
@@ -362,13 +409,14 @@ class Winnowing(ScanossBase):
|
|
|
362
409
|
Returns:
|
|
363
410
|
Tuple of (has_crlf, has_lf_only, has_cr_only, has_mixed) indicating which line ending types are present.
|
|
364
411
|
"""
|
|
412
|
+
if not contents:
|
|
413
|
+
self.print_debug('Warning: No file contents provided')
|
|
365
414
|
has_crlf = b'\r\n' in contents
|
|
366
415
|
# For LF detection, we need to find LF that's not part of CRLF
|
|
367
416
|
content_without_crlf = contents.replace(b'\r\n', b'')
|
|
368
417
|
has_standalone_lf = b'\n' in content_without_crlf
|
|
369
418
|
# For CR detection, we need to find CR that's not part of CRLF
|
|
370
419
|
has_standalone_cr = b'\r' in content_without_crlf
|
|
371
|
-
|
|
372
420
|
return has_crlf, has_standalone_lf, has_standalone_cr
|
|
373
421
|
|
|
374
422
|
def __calculate_opposite_line_ending_hash(self, contents: bytes):
|
|
@@ -384,13 +432,11 @@ class Winnowing(ScanossBase):
|
|
|
384
432
|
Hash with opposite line endings as hex string, or None if no line endings detected.
|
|
385
433
|
"""
|
|
386
434
|
has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
|
|
387
|
-
|
|
388
435
|
if not has_crlf and not has_standalone_lf and not has_standalone_cr:
|
|
436
|
+
self.print_debug('No line endings detected in file contents')
|
|
389
437
|
return None
|
|
390
|
-
|
|
391
|
-
# Normalize all line endings to LF first
|
|
438
|
+
# Normalise all line endings to LF first
|
|
392
439
|
normalized = contents.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
|
393
|
-
|
|
394
440
|
# Determine the dominant line ending type
|
|
395
441
|
if has_crlf and not has_standalone_lf and not has_standalone_cr:
|
|
396
442
|
# File is Windows (CRLF) - produce Unix (LF) hash
|
|
@@ -398,7 +444,7 @@ class Winnowing(ScanossBase):
|
|
|
398
444
|
else:
|
|
399
445
|
# File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
|
|
400
446
|
opposite_contents = normalized.replace(b'\n', b'\r\n')
|
|
401
|
-
|
|
447
|
+
# Return the MD5 hash of the opposite contents
|
|
402
448
|
return hashlib.md5(opposite_contents).hexdigest()
|
|
403
449
|
|
|
404
450
|
def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str: # noqa: PLR0912, PLR0915
|
|
@@ -420,27 +466,26 @@ class Winnowing(ScanossBase):
|
|
|
420
466
|
# Print file line
|
|
421
467
|
content_length = len(contents)
|
|
422
468
|
original_filename = file
|
|
423
|
-
|
|
424
469
|
if self.is_windows:
|
|
425
470
|
original_filename = file.replace('\\', '/')
|
|
426
471
|
wfp_filename = repr(original_filename).strip("'") # return a utf-8 compatible version of the filename
|
|
427
|
-
|
|
472
|
+
# hide the real size of the file and its name but keep the suffix
|
|
473
|
+
if self.obfuscate:
|
|
428
474
|
wfp_filename = f'{self.ob_count}{pathlib.Path(original_filename).suffix}'
|
|
429
475
|
self.ob_count = self.ob_count + 1
|
|
430
476
|
self.file_map[wfp_filename] = original_filename # Save the file name map for later (reverse lookup)
|
|
431
|
-
|
|
477
|
+
# Construct the WFP header
|
|
432
478
|
wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
|
|
433
|
-
|
|
434
|
-
# Add opposite line ending hash based on line ending analysis
|
|
479
|
+
# Add the opposite line ending hash based on line ending analysis
|
|
435
480
|
if not bin_file:
|
|
436
481
|
opposite_hash = self.__calculate_opposite_line_ending_hash(contents)
|
|
437
482
|
if opposite_hash is not None:
|
|
438
483
|
wfp += f'fh2={opposite_hash}\n'
|
|
439
|
-
|
|
440
484
|
# We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
|
|
441
|
-
|
|
485
|
+
decoded_contents = contents.decode('utf-8', 'ignore')
|
|
486
|
+
if bin_file or self.skip_snippets or self.__skip_snippets(file, decoded_contents):
|
|
442
487
|
return wfp
|
|
443
|
-
# Add HPSM
|
|
488
|
+
# Add HPSM (calculated from original contents, not filtered)
|
|
444
489
|
if self.hpsm:
|
|
445
490
|
hpsm = self.__strip_hpsm(file, self.calc_hpsm(contents))
|
|
446
491
|
if len(hpsm) > 0:
|
|
@@ -448,7 +493,7 @@ class Winnowing(ScanossBase):
|
|
|
448
493
|
# Initialize variables
|
|
449
494
|
gram = ''
|
|
450
495
|
window = []
|
|
451
|
-
line = 1
|
|
496
|
+
line = 1 # Line counter for WFP generation
|
|
452
497
|
last_hash = MAX_CRC32
|
|
453
498
|
last_line = 0
|
|
454
499
|
output = ''
|
|
@@ -503,12 +548,19 @@ class Winnowing(ScanossBase):
|
|
|
503
548
|
wfp += output + '\n'
|
|
504
549
|
else:
|
|
505
550
|
self.print_debug(f'Warning: skipping output in WFP for {file} - "{output}"')
|
|
506
|
-
|
|
551
|
+
# Warn if we don't have any WFP content
|
|
507
552
|
if wfp is None or wfp == '':
|
|
508
553
|
self.print_stderr(f'Warning: No WFP content data for {file}')
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
554
|
+
else:
|
|
555
|
+
# Apply line filter to remove headers, comments, and imports from the beginning (if enabled)
|
|
556
|
+
if self.skip_headers:
|
|
557
|
+
line_offset = self.header_filter.filter(file, decoded_contents)
|
|
558
|
+
if line_offset > 0:
|
|
559
|
+
wfp = self.__strip_lines_until_offset(file, wfp, line_offset)
|
|
560
|
+
# Strip snippet IDs from the WFP (if enabled)
|
|
561
|
+
if self.strip_snippet_ids:
|
|
562
|
+
wfp = self.__strip_snippets(file, wfp)
|
|
563
|
+
# Return the WFP contents
|
|
512
564
|
return wfp
|
|
513
565
|
|
|
514
566
|
def calc_hpsm(self, content):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scanoss
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.43.1
|
|
4
4
|
Summary: Simple Python library to leverage the SCANOSS APIs
|
|
5
5
|
Home-page: https://scanoss.com
|
|
6
6
|
Author: SCANOSS
|
|
@@ -13,15 +13,16 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Classifier: Development Status :: 5 - Production/Stable
|
|
15
15
|
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Requires-Python: >=3.
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: requests
|
|
20
20
|
Requires-Dist: crc32c>=2.2
|
|
21
21
|
Requires-Dist: binaryornot
|
|
22
22
|
Requires-Dist: progress
|
|
23
|
-
Requires-Dist: grpcio
|
|
24
|
-
Requires-Dist: protobuf
|
|
23
|
+
Requires-Dist: grpcio>=1.73.1
|
|
24
|
+
Requires-Dist: protobuf>=6.3.1
|
|
25
|
+
Requires-Dist: protoc-gen-openapiv2
|
|
25
26
|
Requires-Dist: pypac
|
|
26
27
|
Requires-Dist: pyOpenSSL
|
|
27
28
|
Requires-Dist: google-api-core
|
|
@@ -30,6 +31,8 @@ Requires-Dist: packageurl-python
|
|
|
30
31
|
Requires-Dist: pathspec
|
|
31
32
|
Requires-Dist: jsonschema
|
|
32
33
|
Requires-Dist: crc
|
|
34
|
+
Requires-Dist: protoc-gen-openapiv2
|
|
35
|
+
Requires-Dist: cyclonedx-python-lib[validation]
|
|
33
36
|
Provides-Extra: fast-winnowing
|
|
34
37
|
Requires-Dist: scanoss_winnowing>=0.5.0; extra == "fast-winnowing"
|
|
35
38
|
Dynamic: license-file
|
|
@@ -174,7 +177,7 @@ if __name__ == "__main__":
|
|
|
174
177
|
```
|
|
175
178
|
|
|
176
179
|
## Requirements
|
|
177
|
-
Python 3.
|
|
180
|
+
Python 3.9 or higher.
|
|
178
181
|
|
|
179
182
|
## Source code
|
|
180
183
|
The source for this package can be found [here](https://github.com/scanoss/scanoss.py).
|