scanoss 1.12.2__py3-none-any.whl → 1.43.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protoc_gen_swagger/__init__.py +13 -13
- protoc_gen_swagger/options/__init__.py +13 -13
- protoc_gen_swagger/options/annotations_pb2.py +18 -12
- protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
- protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
- protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
- protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
- protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
- scanoss/__init__.py +18 -18
- scanoss/api/__init__.py +17 -17
- scanoss/api/common/__init__.py +17 -17
- scanoss/api/common/v2/__init__.py +17 -17
- scanoss/api/common/v2/scanoss_common_pb2.py +49 -20
- scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
- scanoss/api/components/__init__.py +17 -17
- scanoss/api/components/v2/__init__.py +17 -17
- scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
- scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -21
- scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +766 -13
- scanoss/api/dependencies/__init__.py +17 -17
- scanoss/api/dependencies/v2/__init__.py +17 -17
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -29
- scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +94 -8
- scanoss/api/geoprovenance/__init__.py +23 -0
- scanoss/api/geoprovenance/v2/__init__.py +23 -0
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +92 -0
- scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +381 -0
- scanoss/api/licenses/__init__.py +23 -0
- scanoss/api/licenses/v2/__init__.py +23 -0
- scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
- scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
- scanoss/api/scanning/__init__.py +17 -17
- scanoss/api/scanning/v2/__init__.py +17 -17
- scanoss/api/scanning/v2/scanoss_scanning_pb2.py +42 -13
- scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +86 -7
- scanoss/api/semgrep/__init__.py +17 -17
- scanoss/api/semgrep/v2/__init__.py +17 -17
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
- scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
- scanoss/api/vulnerabilities/__init__.py +17 -17
- scanoss/api/vulnerabilities/v2/__init__.py +17 -17
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
- scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
- scanoss/cli.py +2359 -370
- scanoss/components.py +187 -94
- scanoss/constants.py +22 -0
- scanoss/cryptography.py +308 -0
- scanoss/csvoutput.py +91 -58
- scanoss/cyclonedx.py +221 -63
- scanoss/data/build_date.txt +1 -1
- scanoss/data/osadl-copyleft.json +133 -0
- scanoss/data/scanoss-settings-schema.json +254 -0
- scanoss/delta.py +197 -0
- scanoss/export/__init__.py +23 -0
- scanoss/export/dependency_track.py +227 -0
- scanoss/file_filters.py +582 -0
- scanoss/filecount.py +75 -69
- scanoss/gitlabqualityreport.py +214 -0
- scanoss/header_filter.py +563 -0
- scanoss/inspection/__init__.py +23 -0
- scanoss/inspection/policy_check/__init__.py +0 -0
- scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
- scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
- scanoss/inspection/policy_check/policy_check.py +222 -0
- scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
- scanoss/inspection/policy_check/scanoss/copyleft.py +243 -0
- scanoss/inspection/policy_check/scanoss/undeclared_component.py +309 -0
- scanoss/inspection/summary/__init__.py +0 -0
- scanoss/inspection/summary/component_summary.py +170 -0
- scanoss/inspection/summary/license_summary.py +191 -0
- scanoss/inspection/summary/match_summary.py +341 -0
- scanoss/inspection/utils/file_utils.py +44 -0
- scanoss/inspection/utils/license_utils.py +123 -0
- scanoss/inspection/utils/markdown_utils.py +63 -0
- scanoss/inspection/utils/scan_result_processor.py +417 -0
- scanoss/osadl.py +125 -0
- scanoss/results.py +275 -0
- scanoss/scancodedeps.py +87 -38
- scanoss/scanner.py +431 -539
- scanoss/scanners/__init__.py +23 -0
- scanoss/scanners/container_scanner.py +476 -0
- scanoss/scanners/folder_hasher.py +358 -0
- scanoss/scanners/scanner_config.py +73 -0
- scanoss/scanners/scanner_hfh.py +252 -0
- scanoss/scanoss_settings.py +337 -0
- scanoss/scanossapi.py +140 -101
- scanoss/scanossbase.py +59 -22
- scanoss/scanossgrpc.py +799 -251
- scanoss/scanpostprocessor.py +294 -0
- scanoss/scantype.py +22 -21
- scanoss/services/dependency_track_service.py +132 -0
- scanoss/spdxlite.py +532 -174
- scanoss/threadeddependencies.py +148 -47
- scanoss/threadedscanning.py +53 -37
- scanoss/utils/__init__.py +23 -0
- scanoss/utils/abstract_presenter.py +103 -0
- scanoss/utils/crc64.py +96 -0
- scanoss/utils/file.py +84 -0
- scanoss/utils/scanoss_scan_results_utils.py +41 -0
- scanoss/utils/simhash.py +198 -0
- scanoss/winnowing.py +241 -63
- {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/METADATA +18 -9
- scanoss-1.43.1.dist-info/RECORD +110 -0
- {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/WHEEL +1 -1
- scanoss-1.12.2.dist-info/RECORD +0 -58
- {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
- {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info/licenses}/LICENSE +0 -0
- {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0
scanoss/spdxlite.py
CHANGED
|
@@ -1,34 +1,37 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2021, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
|
-
|
|
25
|
-
import os.path
|
|
26
|
-
import sys
|
|
27
|
-
import hashlib
|
|
24
|
+
|
|
28
25
|
import datetime
|
|
29
26
|
import getpass
|
|
27
|
+
import hashlib
|
|
28
|
+
import json
|
|
29
|
+
import os.path
|
|
30
30
|
import re
|
|
31
|
-
import
|
|
31
|
+
import sys
|
|
32
|
+
|
|
33
|
+
import importlib_resources
|
|
34
|
+
from packageurl import PackageURL
|
|
32
35
|
|
|
33
36
|
from . import __version__
|
|
34
37
|
|
|
@@ -68,76 +71,192 @@ class SpdxLite:
|
|
|
68
71
|
:param data: json - JSON object
|
|
69
72
|
:return: summary dictionary
|
|
70
73
|
"""
|
|
71
|
-
if
|
|
74
|
+
if data is None:
|
|
72
75
|
self.print_stderr('ERROR: No JSON data provided to parse.')
|
|
73
76
|
return None
|
|
74
|
-
|
|
77
|
+
if len(data) == 0:
|
|
78
|
+
self.print_debug('Warning: Empty scan results provided. Returning empty summary.')
|
|
79
|
+
return {}
|
|
80
|
+
|
|
81
|
+
self.print_debug('Processing raw results into summary format...')
|
|
82
|
+
return self._process_files(data)
|
|
83
|
+
|
|
84
|
+
def _process_files(self, data: json) -> dict:
|
|
85
|
+
"""
|
|
86
|
+
Process raw results and build a component summary.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
data: JSON data containing raw results
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
dict: The built summary dictionary
|
|
93
|
+
"""
|
|
75
94
|
summary = {}
|
|
76
|
-
for
|
|
77
|
-
file_details = data.get(
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
-
id_details = d.get("id")
|
|
81
|
-
if not id_details or id_details == 'none': # Ignore files with no ids
|
|
82
|
-
continue
|
|
83
|
-
purl = None
|
|
84
|
-
if id_details == 'dependency': # Process dependency data
|
|
85
|
-
dependencies = d.get("dependencies")
|
|
86
|
-
if not dependencies:
|
|
87
|
-
self.print_stderr(f'Warning: No Dependencies found for {f}: {file_details}')
|
|
88
|
-
continue
|
|
89
|
-
for deps in dependencies:
|
|
90
|
-
# print(f'File: {f} Deps: {deps}')
|
|
91
|
-
purl = deps.get("purl")
|
|
92
|
-
if not purl:
|
|
93
|
-
self.print_stderr(f'Warning: No PURL found for {f}: {deps}')
|
|
94
|
-
continue
|
|
95
|
-
if summary.get(purl):
|
|
96
|
-
self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
|
|
97
|
-
continue
|
|
98
|
-
fd = {}
|
|
99
|
-
for field in ['component', 'version', 'url']:
|
|
100
|
-
fd[field] = deps.get(field, '')
|
|
101
|
-
licenses = deps.get('licenses')
|
|
102
|
-
fdl = []
|
|
103
|
-
dc = []
|
|
104
|
-
for lic in licenses:
|
|
105
|
-
name = lic.get("name")
|
|
106
|
-
if name not in dc: # Only save the license name once
|
|
107
|
-
fdl.append({'id': name})
|
|
108
|
-
dc.append(name)
|
|
109
|
-
fd['licenses'] = fdl
|
|
110
|
-
summary[purl] = fd
|
|
111
|
-
else: # Normal file id type
|
|
112
|
-
purls = d.get('purl')
|
|
113
|
-
if not purls:
|
|
114
|
-
self.print_stderr(f'Purl block missing for {f}: {file_details}')
|
|
115
|
-
continue
|
|
116
|
-
for p in purls:
|
|
117
|
-
self.print_debug(f'Purl: {p}')
|
|
118
|
-
purl = p
|
|
119
|
-
break
|
|
120
|
-
if not purl:
|
|
121
|
-
self.print_stderr(f'Warning: No PURL found for {f}: {file_details}')
|
|
122
|
-
continue
|
|
123
|
-
if summary.get(purl):
|
|
124
|
-
self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
|
|
125
|
-
continue
|
|
126
|
-
fd = {}
|
|
127
|
-
for field in ['id', 'vendor', 'component', 'version', 'latest', 'url']:
|
|
128
|
-
fd[field] = d.get(field)
|
|
129
|
-
licenses = d.get('licenses')
|
|
130
|
-
fdl = []
|
|
131
|
-
dc = []
|
|
132
|
-
for lic in licenses:
|
|
133
|
-
name = lic.get("name")
|
|
134
|
-
if name not in dc: # Only save the license name once
|
|
135
|
-
fdl.append({'id': name})
|
|
136
|
-
dc.append(name)
|
|
137
|
-
fd['licenses'] = fdl
|
|
138
|
-
summary[purl] = fd
|
|
95
|
+
for file_path in data:
|
|
96
|
+
file_details = data.get(file_path)
|
|
97
|
+
# summary is passed by reference and modified inside the function
|
|
98
|
+
self._process_entries(file_path, file_details, summary)
|
|
139
99
|
return summary
|
|
140
100
|
|
|
101
|
+
def _process_entries(self, file_path: str, file_details: list, summary: dict):
|
|
102
|
+
"""
|
|
103
|
+
Process entries for a single file.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
file_path: Path to the file being processed
|
|
107
|
+
file_details: Results of the file
|
|
108
|
+
summary: Reference to summary dictionary that will be modified in place
|
|
109
|
+
"""
|
|
110
|
+
for entry in file_details:
|
|
111
|
+
id_details = entry.get('id')
|
|
112
|
+
if not id_details or id_details == 'none':
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
if id_details == 'dependency':
|
|
116
|
+
self._process_dependency_entry(file_path, entry, summary)
|
|
117
|
+
else:
|
|
118
|
+
self._process_file_entry(file_path, entry, summary)
|
|
119
|
+
|
|
120
|
+
def _process_dependency_entry(self, file_path: str, entry: dict, summary: dict):
|
|
121
|
+
"""
|
|
122
|
+
Process a dependency type entry.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
file_path: Path to the file being processed
|
|
126
|
+
entry: The dependency entry to process
|
|
127
|
+
summary: Reference to summary dictionary that will be modified in place
|
|
128
|
+
"""
|
|
129
|
+
dependencies = entry.get('dependencies')
|
|
130
|
+
if not dependencies:
|
|
131
|
+
self.print_stderr(f'Warning: No Dependencies found for {file_path}')
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
for dep in dependencies:
|
|
135
|
+
purl = dep.get('purl')
|
|
136
|
+
if not self._is_valid_purl(file_path, dep, purl, summary):
|
|
137
|
+
continue
|
|
138
|
+
# Modifying the summary dictionary directly as it's passed by reference
|
|
139
|
+
summary[purl] = self._create_dependency_summary(dep)
|
|
140
|
+
|
|
141
|
+
def _process_file_entry(self, file_path: str, entry: dict, summary: dict):
|
|
142
|
+
"""
|
|
143
|
+
Process file entry.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
file_path: Path to the file being processed
|
|
147
|
+
entry: Process file match entry
|
|
148
|
+
summary: Reference to summary dictionary that will be modified in place
|
|
149
|
+
"""
|
|
150
|
+
purls = entry.get('purl')
|
|
151
|
+
if not purls:
|
|
152
|
+
self.print_stderr(f'Purl block missing for {file_path}')
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
purl = purls[0] if purls else None
|
|
156
|
+
if not self._is_valid_purl(file_path, entry, purl, summary):
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
summary[purl] = self._create_file_summary(entry)
|
|
160
|
+
|
|
161
|
+
def _is_valid_purl(self, file_path: str, entry: dict, purl: str, summary: dict) -> bool:
|
|
162
|
+
"""
|
|
163
|
+
Check if purl is valid and not already processed.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
file_path: Path to the file being processed
|
|
167
|
+
entry: The entry containing the PURL
|
|
168
|
+
purl: The PURL to validate
|
|
169
|
+
summary: Reference to summary dictionary to check for existing entries
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
bool: True if purl is valid and not already processed
|
|
173
|
+
"""
|
|
174
|
+
if not purl:
|
|
175
|
+
self.print_stderr(f'Warning: No PURL found for {file_path}: {entry}')
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
if summary.get(purl):
|
|
179
|
+
self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
|
|
180
|
+
return False
|
|
181
|
+
|
|
182
|
+
return True
|
|
183
|
+
|
|
184
|
+
def _create_dependency_summary(self, dep: dict) -> dict:
|
|
185
|
+
"""
|
|
186
|
+
Create summary for dependency entry.
|
|
187
|
+
|
|
188
|
+
This method extracts relevant fields from a dependency entry and creates a
|
|
189
|
+
standardized summary dictionary. It handles fields like component, version,
|
|
190
|
+
and URL, with special processing for licenses.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
dep (dict): The dependency entry containing component information
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
dict: A new summary dictionary containing the extracted and processed fields
|
|
197
|
+
"""
|
|
198
|
+
summary = {}
|
|
199
|
+
for field in ['component', 'version', 'url']:
|
|
200
|
+
summary[field] = dep.get(field, '')
|
|
201
|
+
summary['licenses'] = self._process_licenses(dep.get('licenses'))
|
|
202
|
+
return summary
|
|
203
|
+
|
|
204
|
+
def _create_file_summary(self, entry: dict) -> dict:
|
|
205
|
+
"""
|
|
206
|
+
Create summary for file entry.
|
|
207
|
+
|
|
208
|
+
This method extracts set of fields from file entry and creates a standardized summary dictionary.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
entry (dict): The file entry containing the metadata to summarize
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
dict: A new summary dictionary containing all extracted and processed fields
|
|
215
|
+
"""
|
|
216
|
+
summary = {}
|
|
217
|
+
fields = ['id', 'vendor', 'component', 'version', 'latest',
|
|
218
|
+
'url', 'url_hash', 'download_url']
|
|
219
|
+
for field in fields:
|
|
220
|
+
summary[field] = entry.get(field)
|
|
221
|
+
summary['licenses'] = self._process_licenses(entry.get('licenses'))
|
|
222
|
+
return summary
|
|
223
|
+
|
|
224
|
+
def _process_licenses(self, licenses: list) -> list:
|
|
225
|
+
"""
|
|
226
|
+
Process license information and remove duplicates.
|
|
227
|
+
|
|
228
|
+
This method filters license information to include only licenses from trusted sources
|
|
229
|
+
('component_declared', 'license_file', 'file_header'). Licenses with an unspecified
|
|
230
|
+
source (None or '') are allowed. Non-empty, non-allowed sources are excluded. It also
|
|
231
|
+
removes any duplicate license names.
|
|
232
|
+
The result is a simplified list of license dictionaries containing only the 'id' field.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
licenses (list): A list of license dictionaries, each containing at least 'name'
|
|
236
|
+
and 'source' fields. Can be None or empty.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
list: A filtered and deduplicated list of license dictionaries, where each
|
|
240
|
+
dictionary contains only an 'id' field matching the original license name.
|
|
241
|
+
Returns an empty list if input is None or empty.
|
|
242
|
+
"""
|
|
243
|
+
if not licenses:
|
|
244
|
+
return []
|
|
245
|
+
|
|
246
|
+
processed_licenses = []
|
|
247
|
+
seen_names = set()
|
|
248
|
+
|
|
249
|
+
for license_info in licenses:
|
|
250
|
+
name = license_info.get('name')
|
|
251
|
+
source = license_info.get('source')
|
|
252
|
+
if source not in (None, '') and source not in ("component_declared", "license_file", "file_header"):
|
|
253
|
+
continue
|
|
254
|
+
if name and name not in seen_names:
|
|
255
|
+
processed_licenses.append({'id': name})
|
|
256
|
+
seen_names.add(name)
|
|
257
|
+
|
|
258
|
+
return processed_licenses
|
|
259
|
+
|
|
141
260
|
def produce_from_file(self, json_file: str, output_file: str = None) -> bool:
|
|
142
261
|
"""
|
|
143
262
|
Parse plain/raw input JSON file and produce SPDX Lite output
|
|
@@ -163,101 +282,339 @@ class SpdxLite:
|
|
|
163
282
|
:return: True if successful, False otherwise
|
|
164
283
|
"""
|
|
165
284
|
raw_data = self.parse(data)
|
|
166
|
-
if
|
|
285
|
+
if raw_data is None:
|
|
167
286
|
self.print_stderr('ERROR: No SPDX data returned for the JSON string provided.')
|
|
168
287
|
return False
|
|
288
|
+
if len(raw_data) == 0:
|
|
289
|
+
self.print_debug('Warning: Empty scan results - generating minimal SPDX Lite document with no packages.')
|
|
290
|
+
|
|
169
291
|
self.load_license_data()
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
292
|
+
spdx_document = self._create_base_document(raw_data)
|
|
293
|
+
self._process_packages(raw_data, spdx_document)
|
|
294
|
+
return self._write_output(spdx_document, output_file)
|
|
295
|
+
|
|
296
|
+
def _create_base_document(self, raw_data: dict) -> dict:
|
|
297
|
+
"""
|
|
298
|
+
Create the base SPDX document structure.
|
|
299
|
+
|
|
300
|
+
This method initializes a new SPDX document with standard fields required by
|
|
301
|
+
the SPDX 2.2 specification. It generates a unique document namespace using
|
|
302
|
+
a hash of the raw data and current timestamp.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
raw_data (dict): The raw component data used to create a unique identifier
|
|
306
|
+
for the document namespace
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
dict: A dictionary containing the base SPDX document structure with the
|
|
310
|
+
following fields:
|
|
311
|
+
- spdxVersion: The SPDX specification version
|
|
312
|
+
- dataLicense: The license for the SPDX document itself
|
|
313
|
+
- SPDXID: The document's unique identifier
|
|
314
|
+
- name: The name of the SBOM
|
|
315
|
+
- creationInfo: Information about when and how the document was created
|
|
316
|
+
- documentNamespace: A unique URI for this document
|
|
317
|
+
- documentDescribes: List of packages described (initially empty)
|
|
318
|
+
- hasExtractedLicensingInfos: List of licenses (initially empty)
|
|
319
|
+
- packages: List of package information (initially empty)
|
|
320
|
+
"""
|
|
176
321
|
now = datetime.datetime.utcnow()
|
|
177
322
|
md5hex = hashlib.md5(f'{raw_data}-{now}'.encode('utf-8')).hexdigest()
|
|
178
|
-
|
|
323
|
+
|
|
324
|
+
return {
|
|
179
325
|
'spdxVersion': 'SPDX-2.2',
|
|
180
326
|
'dataLicense': 'CC0-1.0',
|
|
181
|
-
'SPDXID':
|
|
327
|
+
'SPDXID': 'SPDXRef-DOCUMENT',
|
|
182
328
|
'name': 'SCANOSS-SBOM',
|
|
183
|
-
'creationInfo':
|
|
184
|
-
'created': now.strftime('%Y-%m-%dT%H:%M:%S') + now.strftime('.%f')[:4] + 'Z',
|
|
185
|
-
'creators': [f'Tool: SCANOSS-PY: {__version__}', f'Person: {getpass.getuser()}']
|
|
186
|
-
},
|
|
329
|
+
'creationInfo': self._create_creation_info(now),
|
|
187
330
|
'documentNamespace': f'https://spdx.org/spdxdocs/scanoss-py-{__version__}-{md5hex}',
|
|
188
331
|
'documentDescribes': [],
|
|
189
332
|
'hasExtractedLicensingInfos': [],
|
|
190
|
-
'packages': []
|
|
333
|
+
'packages': [],
|
|
191
334
|
}
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
335
|
+
|
|
336
|
+
def _create_creation_info(self, timestamp: datetime.datetime) -> dict:
|
|
337
|
+
"""
|
|
338
|
+
Create the creation info section of an SPDX document.
|
|
339
|
+
|
|
340
|
+
This method generates the creation information required by the SPDX specification,
|
|
341
|
+
including timestamps, creator information, and document type.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
timestamp (datetime.datetime): The UTC timestamp representing when the
|
|
345
|
+
document was created
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
dict: A dictionary containing creation information with the following fields:
|
|
349
|
+
- created: ISO 8601 formatted timestamp
|
|
350
|
+
- creators: List of entities involved in creating the document
|
|
351
|
+
(tool, person, and organization)
|
|
352
|
+
- comment: Additional information about the SBOM type
|
|
353
|
+
"""
|
|
354
|
+
return {
|
|
355
|
+
'created': timestamp.strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
356
|
+
'creators': [
|
|
357
|
+
f'Tool: SCANOSS-PY: {__version__}',
|
|
358
|
+
f'Person: {getpass.getuser()}',
|
|
359
|
+
'Organization: SCANOSS'
|
|
360
|
+
],
|
|
361
|
+
'comment': 'SBOM Build information - SBOM Type: Build',
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
def _process_packages(self, raw_data: dict, spdx_document: dict):
|
|
365
|
+
"""
|
|
366
|
+
Process packages and add them to the SPDX document.
|
|
367
|
+
|
|
368
|
+
This method iterates through the raw component data, creates package information
|
|
369
|
+
for each component, and adds them to the SPDX document. It also collects
|
|
370
|
+
license references to be processed separately.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
raw_data (dict): Dictionary of package data indexed by PURL
|
|
374
|
+
(Package URL identifiers)
|
|
375
|
+
spdx_document (dict): Reference to the SPDX document being built,
|
|
376
|
+
which will be modified in place
|
|
377
|
+
|
|
378
|
+
Note:
|
|
379
|
+
This method modifies the spdx_document dictionary in place by:
|
|
380
|
+
1. Adding package information to the 'packages' list
|
|
381
|
+
2. Adding package SPDXIDs to the 'documentDescribes' list
|
|
382
|
+
3. Indirectly populating 'hasExtractedLicensingInfos' via _process_license_refs()
|
|
383
|
+
"""
|
|
384
|
+
lic_refs = set()
|
|
385
|
+
|
|
386
|
+
for purl, comp in raw_data.items():
|
|
387
|
+
package_info = self._create_package_info(purl, comp, lic_refs)
|
|
388
|
+
spdx_document['packages'].append(package_info)
|
|
389
|
+
spdx_document['documentDescribes'].append(package_info['SPDXID'])
|
|
390
|
+
|
|
391
|
+
self._process_license_refs(lic_refs, spdx_document)
|
|
392
|
+
|
|
393
|
+
def _create_package_info(self, purl: str, comp: dict, lic_refs: set) -> dict:
|
|
394
|
+
"""
|
|
395
|
+
Create package information for SPDX document.
|
|
396
|
+
|
|
397
|
+
This method generates a complete package information entry following the SPDX
|
|
398
|
+
specification format. It creates a unique identifier for the package based on
|
|
399
|
+
its PURL and version, processes license information, and formats all required
|
|
400
|
+
fields for the SPDX document.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
purl (str): Package URL identifier for the component
|
|
404
|
+
comp (dict): Component information dictionary containing metadata like
|
|
405
|
+
component name, version, URLs, and license information
|
|
406
|
+
lic_refs (set): Reference to a set that will be populated with license
|
|
407
|
+
references found in this package. This set is modified in place.
|
|
408
|
+
|
|
409
|
+
Returns:
|
|
410
|
+
dict: A dictionary containing all required SPDX package fields including:
|
|
411
|
+
- name: Component name
|
|
412
|
+
- SPDXID: Unique identifier for this package within the document
|
|
413
|
+
- versionInfo: Component version
|
|
414
|
+
- downloadLocation: URL where the package can be downloaded
|
|
415
|
+
- homepage: Component homepage URL
|
|
416
|
+
- licenseDeclared: Formatted license expression
|
|
417
|
+
- licenseConcluded: NOASSERTION as automated conclusion isn't possible
|
|
418
|
+
- filesAnalyzed: False as files are not individually analyzed
|
|
419
|
+
- copyrightText: NOASSERTION as copyright text isn't available
|
|
420
|
+
- supplier: Organization name from vendor information
|
|
421
|
+
- externalRefs: Package URL reference for package manager integration
|
|
422
|
+
- checksums: MD5 hash of the package if available
|
|
423
|
+
"""
|
|
424
|
+
lic_text = self._process_package_licenses(comp.get('licenses', []), lic_refs)
|
|
425
|
+
comp_ver = comp.get('version')
|
|
426
|
+
purl_ver = f'{purl}@{comp_ver}'
|
|
427
|
+
purl_hash = hashlib.md5(purl_ver.encode('utf-8')).hexdigest()
|
|
428
|
+
|
|
429
|
+
return {
|
|
430
|
+
'name': comp.get('component'),
|
|
431
|
+
'SPDXID': f'SPDXRef-{purl_hash}',
|
|
432
|
+
'versionInfo': comp_ver,
|
|
433
|
+
'downloadLocation': comp.get('download_url') or comp.get('url'),
|
|
434
|
+
'homepage': comp.get('url', ''),
|
|
435
|
+
'licenseDeclared': lic_text,
|
|
436
|
+
'licenseConcluded': 'NOASSERTION',
|
|
437
|
+
'filesAnalyzed': False,
|
|
438
|
+
'copyrightText': 'NOASSERTION',
|
|
439
|
+
'supplier': f'Organization: {comp.get("vendor", "NOASSERTION")}',
|
|
440
|
+
'externalRefs': [
|
|
441
|
+
{
|
|
229
442
|
'referenceCategory': 'PACKAGE-MANAGER',
|
|
230
|
-
'referenceLocator': purl_ver,
|
|
443
|
+
'referenceLocator': PackageURL.from_string(purl_ver).to_string(),
|
|
231
444
|
'referenceType': 'purl'
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
445
|
+
}
|
|
446
|
+
],
|
|
447
|
+
'checksums': [
|
|
448
|
+
{
|
|
449
|
+
'algorithm': 'MD5',
|
|
450
|
+
'checksumValue': comp.get('url_hash') or '0' * 32
|
|
451
|
+
}
|
|
452
|
+
],
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
def _process_package_licenses(self, licenses: list, lic_refs: set) -> str:
|
|
456
|
+
"""
|
|
457
|
+
Process licenses and return license text formatted for SPDX.
|
|
458
|
+
|
|
459
|
+
This method processes a list of license objects, extracts valid license IDs,
|
|
460
|
+
converts them to SPDX format, and combines them into a properly formatted
|
|
461
|
+
license expression.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
licenses (list): List of license dictionaries, each containing at least
|
|
465
|
+
an 'id' field
|
|
466
|
+
lic_refs (set): Reference to a set that will collect license references.
|
|
467
|
+
This set is modified in place.
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
str: A formatted license expression string following SPDX syntax.
|
|
471
|
+
Returns 'NOASSERTION' if no valid licenses are found.
|
|
472
|
+
"""
|
|
473
|
+
if not licenses:
|
|
474
|
+
return 'NOASSERTION'
|
|
475
|
+
|
|
476
|
+
lic_set = set()
|
|
477
|
+
for lic in licenses:
|
|
478
|
+
lc_id = lic.get('id')
|
|
479
|
+
self._process_license_id(lc_id, lic_refs, lic_set)
|
|
480
|
+
|
|
481
|
+
return self._format_license_text(lic_set)
|
|
482
|
+
|
|
483
|
+
def _process_license_id(self, lc_id: str, lic_refs: set, lic_set: set):
|
|
484
|
+
"""
|
|
485
|
+
Process individual license ID and add to appropriate sets.
|
|
486
|
+
|
|
487
|
+
This method attempts to convert a license ID to its SPDX equivalent.
|
|
488
|
+
If not found in the SPDX license list, it's formatted as a LicenseRef
|
|
489
|
+
and added to the license references set.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
lc_id (str): The license ID to process
|
|
493
|
+
lic_refs (set): Reference to a set that collects license references
|
|
494
|
+
for later processing. Modified in place.
|
|
495
|
+
lic_set (set): Reference to a set collecting all license IDs for
|
|
496
|
+
"""
|
|
497
|
+
spdx_id = self.get_spdx_license_id(lc_id)
|
|
498
|
+
if not spdx_id:
|
|
499
|
+
if not lc_id.startswith('LicenseRef'):
|
|
500
|
+
lc_id = f'LicenseRef-{lc_id}'
|
|
501
|
+
lic_refs.add(lc_id)
|
|
502
|
+
lic_set.add(spdx_id if spdx_id else lc_id)
|
|
503
|
+
|
|
504
|
+
def _format_license_text(self, lic_set: set) -> str:
|
|
505
|
+
"""
|
|
506
|
+
Format the license text with proper SPDX syntax.
|
|
507
|
+
|
|
508
|
+
This method combines multiple license IDs with the 'AND' operator
|
|
509
|
+
according to SPDX specification rules. If multiple licenses are present,
|
|
510
|
+
the expression is enclosed in parentheses.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
lic_set (set): Set of license IDs to format
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
str: A properly formatted SPDX license expression.
|
|
517
|
+
Returns 'NOASSERTION' if the set is empty.
|
|
518
|
+
"""
|
|
519
|
+
if not lic_set:
|
|
520
|
+
return 'NOASSERTION'
|
|
521
|
+
|
|
522
|
+
lic_text = ' AND '.join(lic_set)
|
|
523
|
+
if len(lic_set) > 1:
|
|
524
|
+
lic_text = f'({lic_text})'
|
|
525
|
+
return lic_text
|
|
526
|
+
|
|
527
|
+
def _process_license_refs(self, lic_refs: set, spdx_document: dict):
|
|
528
|
+
"""
|
|
529
|
+
Process and add license references to the SPDX document.
|
|
530
|
+
|
|
531
|
+
This method processes each license reference in the provided set
|
|
532
|
+
and adds corresponding license information to the SPDX document's
|
|
533
|
+
extracted licensing information section.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
lic_refs (set): Set of license references to process
|
|
537
|
+
spdx_document (dict): Reference to the SPDX document being built,
|
|
538
|
+
which will be modified in place
|
|
539
|
+
|
|
540
|
+
Note:
|
|
541
|
+
This method modifies the spdx_document dictionary in place by adding
|
|
542
|
+
entries to the 'hasExtractedLicensingInfos' list.
|
|
543
|
+
"""
|
|
544
|
+
for lic_ref in lic_refs:
|
|
545
|
+
license_info = self._parse_license_ref(lic_ref)
|
|
546
|
+
spdx_document['hasExtractedLicensingInfos'].append(license_info)
|
|
547
|
+
|
|
548
|
+
def _parse_license_ref(self, lic_ref: str) -> dict:
|
|
549
|
+
"""
|
|
550
|
+
Parse license reference and create info dictionary for SPDX document.
|
|
551
|
+
|
|
552
|
+
This method extracts information from a license reference identifier
|
|
553
|
+
and formats it into the structure required by the SPDX specification
|
|
554
|
+
for extracted licensing information.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
lic_ref (str): License reference identifier to parse
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
dict: Dictionary containing required SPDX fields for extracted license info:
|
|
561
|
+
- licenseId: The unique identifier for this license
|
|
562
|
+
- name: A readable name for the license
|
|
563
|
+
- extractedText: A placeholder for the actual license text
|
|
564
|
+
- comment: Information about how the license was detected
|
|
565
|
+
"""
|
|
566
|
+
source, name = self._extract_license_info(lic_ref)
|
|
567
|
+
source_text = f' by {source}.' if source else '.'
|
|
568
|
+
|
|
569
|
+
return {
|
|
570
|
+
'licenseId': lic_ref,
|
|
571
|
+
'name': name.replace('-', ' '),
|
|
572
|
+
'extractedText': 'Detected license, please review component source code.',
|
|
573
|
+
'comment': f'Detected license{source_text}',
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
def _extract_license_info(self, lic_ref: str):
|
|
577
|
+
"""
|
|
578
|
+
Extract source and name from license reference.
|
|
579
|
+
|
|
580
|
+
This method parses a license reference string to extract the source
|
|
581
|
+
(e.g., scancode, scanoss) and the actual license name using regular
|
|
582
|
+
expressions.
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
lic_ref (str): License reference identifier to parse
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
tuple: A tuple containing (source, name) where:
|
|
589
|
+
- source (str): The tool or system that identified the license
|
|
590
|
+
- name (str): The actual license name
|
|
591
|
+
"""
|
|
592
|
+
match = re.search(r'^LicenseRef-(scancode-|scanoss-|)(\S+)$', lic_ref, re.IGNORECASE)
|
|
593
|
+
if match:
|
|
594
|
+
source = match.group(1).replace('-', '')
|
|
595
|
+
name = match.group(2)
|
|
596
|
+
else:
|
|
236
597
|
source = ''
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
598
|
+
name = lic_ref
|
|
599
|
+
return source, name
|
|
600
|
+
|
|
601
|
+
def _write_output(self, data: dict, output_file: str = None) -> bool:
|
|
602
|
+
"""Write the SPDX document to output."""
|
|
603
|
+
try:
|
|
604
|
+
file = self._get_output_file(output_file)
|
|
605
|
+
print(json.dumps(data, indent=2), file=file)
|
|
606
|
+
if output_file:
|
|
607
|
+
file.close()
|
|
608
|
+
return True
|
|
609
|
+
except Exception as e:
|
|
610
|
+
self.print_stderr(f'Error writing output: {str(e)}')
|
|
611
|
+
return False
|
|
612
|
+
|
|
613
|
+
def _get_output_file(self, output_file: str = None):
|
|
614
|
+
"""Get the appropriate output file handle."""
|
|
253
615
|
if not output_file and self.output_file:
|
|
254
616
|
output_file = self.output_file
|
|
255
|
-
if output_file
|
|
256
|
-
file = open(output_file, 'w')
|
|
257
|
-
print(json.dumps(data, indent=2), file=file)
|
|
258
|
-
if output_file:
|
|
259
|
-
file.close()
|
|
260
|
-
return True
|
|
617
|
+
return open(output_file, 'w') if output_file else sys.stdout
|
|
261
618
|
|
|
262
619
|
def produce_from_str(self, json_str: str, output_file: str = None) -> bool:
|
|
263
620
|
"""
|
|
@@ -298,9 +655,10 @@ class SpdxLite:
|
|
|
298
655
|
:return: True if successful, False otherwise
|
|
299
656
|
"""
|
|
300
657
|
try:
|
|
301
|
-
f_name =
|
|
302
|
-
with
|
|
303
|
-
|
|
658
|
+
f_name = importlib_resources.files(__name__) / filename
|
|
659
|
+
with importlib_resources.as_file(f_name) as f:
|
|
660
|
+
with open(f, 'r', encoding='utf-8') as file:
|
|
661
|
+
data = json.load(file)
|
|
304
662
|
except Exception as e:
|
|
305
663
|
self.print_stderr(f'ERROR: Problem parsing SPDX license input JSON: {e}')
|
|
306
664
|
return False
|
|
@@ -318,8 +676,6 @@ class SpdxLite:
|
|
|
318
676
|
self._spdx_licenses[lic_id_short] = lic_id
|
|
319
677
|
if lic_name:
|
|
320
678
|
self._spdx_lic_names[lic_name] = lic_id
|
|
321
|
-
# self.print_stderr(f'Licenses: {self._spdx_licenses}')
|
|
322
|
-
# self.print_stderr(f'Lookup: {self._spdx_lic_lookup}')
|
|
323
679
|
return True
|
|
324
680
|
|
|
325
681
|
def get_spdx_license_id(self, lic_name: str) -> str:
|
|
@@ -346,6 +702,8 @@ class SpdxLite:
|
|
|
346
702
|
return lic_id
|
|
347
703
|
self.print_debug(f'Warning: Failed to find valid SPDX license identifier for: {lic_name}')
|
|
348
704
|
return None
|
|
705
|
+
|
|
706
|
+
|
|
349
707
|
#
|
|
350
708
|
# End of SpdxLite Class
|
|
351
709
|
#
|