scanoss 1.12.2__py3-none-any.whl → 1.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. protoc_gen_swagger/__init__.py +13 -13
  2. protoc_gen_swagger/options/__init__.py +13 -13
  3. protoc_gen_swagger/options/annotations_pb2.py +18 -12
  4. protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
  5. protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
  6. protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
  7. protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
  8. protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
  9. scanoss/__init__.py +18 -18
  10. scanoss/api/__init__.py +17 -17
  11. scanoss/api/common/__init__.py +17 -17
  12. scanoss/api/common/v2/__init__.py +17 -17
  13. scanoss/api/common/v2/scanoss_common_pb2.py +49 -20
  14. scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
  15. scanoss/api/components/__init__.py +17 -17
  16. scanoss/api/components/v2/__init__.py +17 -17
  17. scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
  18. scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
  19. scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -21
  20. scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +766 -13
  21. scanoss/api/dependencies/__init__.py +17 -17
  22. scanoss/api/dependencies/v2/__init__.py +17 -17
  23. scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -29
  24. scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +94 -8
  25. scanoss/api/geoprovenance/__init__.py +23 -0
  26. scanoss/api/geoprovenance/v2/__init__.py +23 -0
  27. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +92 -0
  28. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +381 -0
  29. scanoss/api/licenses/__init__.py +23 -0
  30. scanoss/api/licenses/v2/__init__.py +23 -0
  31. scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
  32. scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
  33. scanoss/api/scanning/__init__.py +17 -17
  34. scanoss/api/scanning/v2/__init__.py +17 -17
  35. scanoss/api/scanning/v2/scanoss_scanning_pb2.py +42 -13
  36. scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +86 -7
  37. scanoss/api/semgrep/__init__.py +17 -17
  38. scanoss/api/semgrep/v2/__init__.py +17 -17
  39. scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
  40. scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
  41. scanoss/api/vulnerabilities/__init__.py +17 -17
  42. scanoss/api/vulnerabilities/v2/__init__.py +17 -17
  43. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
  44. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
  45. scanoss/cli.py +2359 -370
  46. scanoss/components.py +187 -94
  47. scanoss/constants.py +22 -0
  48. scanoss/cryptography.py +308 -0
  49. scanoss/csvoutput.py +91 -58
  50. scanoss/cyclonedx.py +221 -63
  51. scanoss/data/build_date.txt +1 -1
  52. scanoss/data/osadl-copyleft.json +133 -0
  53. scanoss/data/scanoss-settings-schema.json +254 -0
  54. scanoss/delta.py +197 -0
  55. scanoss/export/__init__.py +23 -0
  56. scanoss/export/dependency_track.py +227 -0
  57. scanoss/file_filters.py +582 -0
  58. scanoss/filecount.py +75 -69
  59. scanoss/gitlabqualityreport.py +214 -0
  60. scanoss/header_filter.py +563 -0
  61. scanoss/inspection/__init__.py +23 -0
  62. scanoss/inspection/policy_check/__init__.py +0 -0
  63. scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
  64. scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
  65. scanoss/inspection/policy_check/policy_check.py +222 -0
  66. scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
  67. scanoss/inspection/policy_check/scanoss/copyleft.py +243 -0
  68. scanoss/inspection/policy_check/scanoss/undeclared_component.py +309 -0
  69. scanoss/inspection/summary/__init__.py +0 -0
  70. scanoss/inspection/summary/component_summary.py +170 -0
  71. scanoss/inspection/summary/license_summary.py +191 -0
  72. scanoss/inspection/summary/match_summary.py +341 -0
  73. scanoss/inspection/utils/file_utils.py +44 -0
  74. scanoss/inspection/utils/license_utils.py +123 -0
  75. scanoss/inspection/utils/markdown_utils.py +63 -0
  76. scanoss/inspection/utils/scan_result_processor.py +417 -0
  77. scanoss/osadl.py +125 -0
  78. scanoss/results.py +275 -0
  79. scanoss/scancodedeps.py +87 -38
  80. scanoss/scanner.py +431 -539
  81. scanoss/scanners/__init__.py +23 -0
  82. scanoss/scanners/container_scanner.py +476 -0
  83. scanoss/scanners/folder_hasher.py +358 -0
  84. scanoss/scanners/scanner_config.py +73 -0
  85. scanoss/scanners/scanner_hfh.py +252 -0
  86. scanoss/scanoss_settings.py +337 -0
  87. scanoss/scanossapi.py +140 -101
  88. scanoss/scanossbase.py +59 -22
  89. scanoss/scanossgrpc.py +799 -251
  90. scanoss/scanpostprocessor.py +294 -0
  91. scanoss/scantype.py +22 -21
  92. scanoss/services/dependency_track_service.py +132 -0
  93. scanoss/spdxlite.py +532 -174
  94. scanoss/threadeddependencies.py +148 -47
  95. scanoss/threadedscanning.py +53 -37
  96. scanoss/utils/__init__.py +23 -0
  97. scanoss/utils/abstract_presenter.py +103 -0
  98. scanoss/utils/crc64.py +96 -0
  99. scanoss/utils/file.py +84 -0
  100. scanoss/utils/scanoss_scan_results_utils.py +41 -0
  101. scanoss/utils/simhash.py +198 -0
  102. scanoss/winnowing.py +241 -63
  103. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/METADATA +18 -9
  104. scanoss-1.43.1.dist-info/RECORD +110 -0
  105. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/WHEEL +1 -1
  106. scanoss-1.12.2.dist-info/RECORD +0 -58
  107. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
  108. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info/licenses}/LICENSE +0 -0
  109. {scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0
scanoss/spdxlite.py CHANGED
@@ -1,34 +1,37 @@
1
1
  """
2
- SPDX-License-Identifier: MIT
3
-
4
- Copyright (c) 2021, SCANOSS
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining a copy
7
- of this software and associated documentation files (the "Software"), to deal
8
- in the Software without restriction, including without limitation the rights
9
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- copies of the Software, and to permit persons to whom the Software is
11
- furnished to do so, subject to the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be included in
14
- all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
- THE SOFTWARE.
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2021, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
23
  """
24
- import json
25
- import os.path
26
- import sys
27
- import hashlib
24
+
28
25
  import datetime
29
26
  import getpass
27
+ import hashlib
28
+ import json
29
+ import os.path
30
30
  import re
31
- import pkg_resources
31
+ import sys
32
+
33
+ import importlib_resources
34
+ from packageurl import PackageURL
32
35
 
33
36
  from . import __version__
34
37
 
@@ -68,76 +71,192 @@ class SpdxLite:
68
71
  :param data: json - JSON object
69
72
  :return: summary dictionary
70
73
  """
71
- if not data:
74
+ if data is None:
72
75
  self.print_stderr('ERROR: No JSON data provided to parse.')
73
76
  return None
74
- self.print_debug(f'Processing raw results into summary format...')
77
+ if len(data) == 0:
78
+ self.print_debug('Warning: Empty scan results provided. Returning empty summary.')
79
+ return {}
80
+
81
+ self.print_debug('Processing raw results into summary format...')
82
+ return self._process_files(data)
83
+
84
+ def _process_files(self, data: json) -> dict:
85
+ """
86
+ Process raw results and build a component summary.
87
+
88
+ Args:
89
+ data: JSON data containing raw results
90
+
91
+ Returns:
92
+ dict: The built summary dictionary
93
+ """
75
94
  summary = {}
76
- for f in data:
77
- file_details = data.get(f)
78
- # print(f'File: {f}: {file_details}\n')
79
- for d in file_details:
80
- id_details = d.get("id")
81
- if not id_details or id_details == 'none': # Ignore files with no ids
82
- continue
83
- purl = None
84
- if id_details == 'dependency': # Process dependency data
85
- dependencies = d.get("dependencies")
86
- if not dependencies:
87
- self.print_stderr(f'Warning: No Dependencies found for {f}: {file_details}')
88
- continue
89
- for deps in dependencies:
90
- # print(f'File: {f} Deps: {deps}')
91
- purl = deps.get("purl")
92
- if not purl:
93
- self.print_stderr(f'Warning: No PURL found for {f}: {deps}')
94
- continue
95
- if summary.get(purl):
96
- self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
97
- continue
98
- fd = {}
99
- for field in ['component', 'version', 'url']:
100
- fd[field] = deps.get(field, '')
101
- licenses = deps.get('licenses')
102
- fdl = []
103
- dc = []
104
- for lic in licenses:
105
- name = lic.get("name")
106
- if name not in dc: # Only save the license name once
107
- fdl.append({'id': name})
108
- dc.append(name)
109
- fd['licenses'] = fdl
110
- summary[purl] = fd
111
- else: # Normal file id type
112
- purls = d.get('purl')
113
- if not purls:
114
- self.print_stderr(f'Purl block missing for {f}: {file_details}')
115
- continue
116
- for p in purls:
117
- self.print_debug(f'Purl: {p}')
118
- purl = p
119
- break
120
- if not purl:
121
- self.print_stderr(f'Warning: No PURL found for {f}: {file_details}')
122
- continue
123
- if summary.get(purl):
124
- self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
125
- continue
126
- fd = {}
127
- for field in ['id', 'vendor', 'component', 'version', 'latest', 'url']:
128
- fd[field] = d.get(field)
129
- licenses = d.get('licenses')
130
- fdl = []
131
- dc = []
132
- for lic in licenses:
133
- name = lic.get("name")
134
- if name not in dc: # Only save the license name once
135
- fdl.append({'id': name})
136
- dc.append(name)
137
- fd['licenses'] = fdl
138
- summary[purl] = fd
95
+ for file_path in data:
96
+ file_details = data.get(file_path)
97
+ # summary is passed by reference and modified inside the function
98
+ self._process_entries(file_path, file_details, summary)
139
99
  return summary
140
100
 
101
+ def _process_entries(self, file_path: str, file_details: list, summary: dict):
102
+ """
103
+ Process entries for a single file.
104
+
105
+ Args:
106
+ file_path: Path to the file being processed
107
+ file_details: Results of the file
108
+ summary: Reference to summary dictionary that will be modified in place
109
+ """
110
+ for entry in file_details:
111
+ id_details = entry.get('id')
112
+ if not id_details or id_details == 'none':
113
+ continue
114
+
115
+ if id_details == 'dependency':
116
+ self._process_dependency_entry(file_path, entry, summary)
117
+ else:
118
+ self._process_file_entry(file_path, entry, summary)
119
+
120
+ def _process_dependency_entry(self, file_path: str, entry: dict, summary: dict):
121
+ """
122
+ Process a dependency type entry.
123
+
124
+ Args:
125
+ file_path: Path to the file being processed
126
+ entry: The dependency entry to process
127
+ summary: Reference to summary dictionary that will be modified in place
128
+ """
129
+ dependencies = entry.get('dependencies')
130
+ if not dependencies:
131
+ self.print_stderr(f'Warning: No Dependencies found for {file_path}')
132
+ return
133
+
134
+ for dep in dependencies:
135
+ purl = dep.get('purl')
136
+ if not self._is_valid_purl(file_path, dep, purl, summary):
137
+ continue
138
+ # Modifying the summary dictionary directly as it's passed by reference
139
+ summary[purl] = self._create_dependency_summary(dep)
140
+
141
+ def _process_file_entry(self, file_path: str, entry: dict, summary: dict):
142
+ """
143
+ Process file entry.
144
+
145
+ Args:
146
+ file_path: Path to the file being processed
147
+ entry: Process file match entry
148
+ summary: Reference to summary dictionary that will be modified in place
149
+ """
150
+ purls = entry.get('purl')
151
+ if not purls:
152
+ self.print_stderr(f'Purl block missing for {file_path}')
153
+ return
154
+
155
+ purl = purls[0] if purls else None
156
+ if not self._is_valid_purl(file_path, entry, purl, summary):
157
+ return
158
+
159
+ summary[purl] = self._create_file_summary(entry)
160
+
161
+ def _is_valid_purl(self, file_path: str, entry: dict, purl: str, summary: dict) -> bool:
162
+ """
163
+ Check if purl is valid and not already processed.
164
+
165
+ Args:
166
+ file_path: Path to the file being processed
167
+ entry: The entry containing the PURL
168
+ purl: The PURL to validate
169
+ summary: Reference to summary dictionary to check for existing entries
170
+
171
+ Returns:
172
+ bool: True if purl is valid and not already processed
173
+ """
174
+ if not purl:
175
+ self.print_stderr(f'Warning: No PURL found for {file_path}: {entry}')
176
+ return False
177
+
178
+ if summary.get(purl):
179
+ self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
180
+ return False
181
+
182
+ return True
183
+
184
+ def _create_dependency_summary(self, dep: dict) -> dict:
185
+ """
186
+ Create summary for dependency entry.
187
+
188
+ This method extracts relevant fields from a dependency entry and creates a
189
+ standardized summary dictionary. It handles fields like component, version,
190
+ and URL, with special processing for licenses.
191
+
192
+ Args:
193
+ dep (dict): The dependency entry containing component information
194
+
195
+ Returns:
196
+ dict: A new summary dictionary containing the extracted and processed fields
197
+ """
198
+ summary = {}
199
+ for field in ['component', 'version', 'url']:
200
+ summary[field] = dep.get(field, '')
201
+ summary['licenses'] = self._process_licenses(dep.get('licenses'))
202
+ return summary
203
+
204
+ def _create_file_summary(self, entry: dict) -> dict:
205
+ """
206
+ Create summary for file entry.
207
+
208
+ This method extracts set of fields from file entry and creates a standardized summary dictionary.
209
+
210
+ Args:
211
+ entry (dict): The file entry containing the metadata to summarize
212
+
213
+ Returns:
214
+ dict: A new summary dictionary containing all extracted and processed fields
215
+ """
216
+ summary = {}
217
+ fields = ['id', 'vendor', 'component', 'version', 'latest',
218
+ 'url', 'url_hash', 'download_url']
219
+ for field in fields:
220
+ summary[field] = entry.get(field)
221
+ summary['licenses'] = self._process_licenses(entry.get('licenses'))
222
+ return summary
223
+
224
+ def _process_licenses(self, licenses: list) -> list:
225
+ """
226
+ Process license information and remove duplicates.
227
+
228
+ This method filters license information to include only licenses from trusted sources
229
+ ('component_declared', 'license_file', 'file_header'). Licenses with an unspecified
230
+ source (None or '') are allowed. Non-empty, non-allowed sources are excluded. It also
231
+ removes any duplicate license names.
232
+ The result is a simplified list of license dictionaries containing only the 'id' field.
233
+
234
+ Args:
235
+ licenses (list): A list of license dictionaries, each containing at least 'name'
236
+ and 'source' fields. Can be None or empty.
237
+
238
+ Returns:
239
+ list: A filtered and deduplicated list of license dictionaries, where each
240
+ dictionary contains only an 'id' field matching the original license name.
241
+ Returns an empty list if input is None or empty.
242
+ """
243
+ if not licenses:
244
+ return []
245
+
246
+ processed_licenses = []
247
+ seen_names = set()
248
+
249
+ for license_info in licenses:
250
+ name = license_info.get('name')
251
+ source = license_info.get('source')
252
+ if source not in (None, '') and source not in ("component_declared", "license_file", "file_header"):
253
+ continue
254
+ if name and name not in seen_names:
255
+ processed_licenses.append({'id': name})
256
+ seen_names.add(name)
257
+
258
+ return processed_licenses
259
+
141
260
  def produce_from_file(self, json_file: str, output_file: str = None) -> bool:
142
261
  """
143
262
  Parse plain/raw input JSON file and produce SPDX Lite output
@@ -163,101 +282,339 @@ class SpdxLite:
163
282
  :return: True if successful, False otherwise
164
283
  """
165
284
  raw_data = self.parse(data)
166
- if not raw_data:
285
+ if raw_data is None:
167
286
  self.print_stderr('ERROR: No SPDX data returned for the JSON string provided.')
168
287
  return False
288
+ if len(raw_data) == 0:
289
+ self.print_debug('Warning: Empty scan results - generating minimal SPDX Lite document with no packages.')
290
+
169
291
  self.load_license_data()
170
- # Using this SPDX version as the spec
171
- # https://github.com/spdx/spdx-spec/blob/development/v2.2.2/examples/SPDXJSONExample-v2.2.spdx.json
172
- # Validate using:
173
- # pip3 install jsonschema
174
- # jsonschema -i spdxlite.json <(curl https://raw.githubusercontent.com/spdx/spdx-spec/v2.2/schemas/spdx-schema.json)
175
- # Validation can also be done online here: https://tools.spdx.org/app/validate/
292
+ spdx_document = self._create_base_document(raw_data)
293
+ self._process_packages(raw_data, spdx_document)
294
+ return self._write_output(spdx_document, output_file)
295
+
296
+ def _create_base_document(self, raw_data: dict) -> dict:
297
+ """
298
+ Create the base SPDX document structure.
299
+
300
+ This method initializes a new SPDX document with standard fields required by
301
+ the SPDX 2.2 specification. It generates a unique document namespace using
302
+ a hash of the raw data and current timestamp.
303
+
304
+ Args:
305
+ raw_data (dict): The raw component data used to create a unique identifier
306
+ for the document namespace
307
+
308
+ Returns:
309
+ dict: A dictionary containing the base SPDX document structure with the
310
+ following fields:
311
+ - spdxVersion: The SPDX specification version
312
+ - dataLicense: The license for the SPDX document itself
313
+ - SPDXID: The document's unique identifier
314
+ - name: The name of the SBOM
315
+ - creationInfo: Information about when and how the document was created
316
+ - documentNamespace: A unique URI for this document
317
+ - documentDescribes: List of packages described (initially empty)
318
+ - hasExtractedLicensingInfos: List of licenses (initially empty)
319
+ - packages: List of package information (initially empty)
320
+ """
176
321
  now = datetime.datetime.utcnow()
177
322
  md5hex = hashlib.md5(f'{raw_data}-{now}'.encode('utf-8')).hexdigest()
178
- data = {
323
+
324
+ return {
179
325
  'spdxVersion': 'SPDX-2.2',
180
326
  'dataLicense': 'CC0-1.0',
181
- 'SPDXID': f'SPDXRef-{md5hex}',
327
+ 'SPDXID': 'SPDXRef-DOCUMENT',
182
328
  'name': 'SCANOSS-SBOM',
183
- 'creationInfo': {
184
- 'created': now.strftime('%Y-%m-%dT%H:%M:%S') + now.strftime('.%f')[:4] + 'Z',
185
- 'creators': [f'Tool: SCANOSS-PY: {__version__}', f'Person: {getpass.getuser()}']
186
- },
329
+ 'creationInfo': self._create_creation_info(now),
187
330
  'documentNamespace': f'https://spdx.org/spdxdocs/scanoss-py-{__version__}-{md5hex}',
188
331
  'documentDescribes': [],
189
332
  'hasExtractedLicensingInfos': [],
190
- 'packages': []
333
+ 'packages': [],
191
334
  }
192
- lic_refs = set() # Hash Set of non-SPDX license references
193
- for purl in raw_data:
194
- comp = raw_data.get(purl)
195
- licenses = comp.get('licenses')
196
- lic_text = 'NOASSERTION'
197
- if licenses:
198
- lic_set = set()
199
- for lic in licenses:
200
- lc_id = lic.get('id')
201
- if lc_id:
202
- spdx_id = self.get_spdx_license_id(lc_id)
203
- if not spdx_id:
204
- if not lc_id.startswith('LicenseRef'):
205
- lc_id = f'LicenseRef-{lc_id}' # Make sure it has a license ref in its name
206
- lic_refs.add(lc_id) # save non-SPDX license for later reference
207
- lic_set.add(spdx_id if spdx_id else lc_id)
208
- if len(lic_set) > 0:
209
- lic_text = ' AND '.join(lic_set)
210
- if len(lic_set) > 1:
211
- lic_text = f'({lic_text})' # wrap the names in () if there is more than one
212
- comp_name = comp.get('component')
213
- comp_ver = comp.get('version')
214
- purl_ver = f'{purl}@{comp_ver}'
215
- purl_hash = hashlib.md5(f'{purl_ver}'.encode('utf-8')).hexdigest()
216
- purl_spdx = f'SPDXRef-{purl_hash}'
217
- data['documentDescribes'].append(purl_spdx)
218
- data['packages'].append({
219
- 'name': comp_name,
220
- 'SPDXID': purl_spdx,
221
- 'versionInfo': comp_ver,
222
- 'downloadLocation': 'NOASSERTION', # TODO Add actual download location
223
- 'homepage': comp.get('url', ''),
224
- 'licenseDeclared': lic_text,
225
- 'licenseConcluded': 'NOASSERTION',
226
- 'filesAnalyzed': False,
227
- 'copyrightText': 'NOASSERTION',
228
- 'externalRefs': [{
335
+
336
+ def _create_creation_info(self, timestamp: datetime.datetime) -> dict:
337
+ """
338
+ Create the creation info section of an SPDX document.
339
+
340
+ This method generates the creation information required by the SPDX specification,
341
+ including timestamps, creator information, and document type.
342
+
343
+ Args:
344
+ timestamp (datetime.datetime): The UTC timestamp representing when the
345
+ document was created
346
+
347
+ Returns:
348
+ dict: A dictionary containing creation information with the following fields:
349
+ - created: ISO 8601 formatted timestamp
350
+ - creators: List of entities involved in creating the document
351
+ (tool, person, and organization)
352
+ - comment: Additional information about the SBOM type
353
+ """
354
+ return {
355
+ 'created': timestamp.strftime('%Y-%m-%dT%H:%M:%SZ'),
356
+ 'creators': [
357
+ f'Tool: SCANOSS-PY: {__version__}',
358
+ f'Person: {getpass.getuser()}',
359
+ 'Organization: SCANOSS'
360
+ ],
361
+ 'comment': 'SBOM Build information - SBOM Type: Build',
362
+ }
363
+
364
+ def _process_packages(self, raw_data: dict, spdx_document: dict):
365
+ """
366
+ Process packages and add them to the SPDX document.
367
+
368
+ This method iterates through the raw component data, creates package information
369
+ for each component, and adds them to the SPDX document. It also collects
370
+ license references to be processed separately.
371
+
372
+ Args:
373
+ raw_data (dict): Dictionary of package data indexed by PURL
374
+ (Package URL identifiers)
375
+ spdx_document (dict): Reference to the SPDX document being built,
376
+ which will be modified in place
377
+
378
+ Note:
379
+ This method modifies the spdx_document dictionary in place by:
380
+ 1. Adding package information to the 'packages' list
381
+ 2. Adding package SPDXIDs to the 'documentDescribes' list
382
+ 3. Indirectly populating 'hasExtractedLicensingInfos' via _process_license_refs()
383
+ """
384
+ lic_refs = set()
385
+
386
+ for purl, comp in raw_data.items():
387
+ package_info = self._create_package_info(purl, comp, lic_refs)
388
+ spdx_document['packages'].append(package_info)
389
+ spdx_document['documentDescribes'].append(package_info['SPDXID'])
390
+
391
+ self._process_license_refs(lic_refs, spdx_document)
392
+
393
+ def _create_package_info(self, purl: str, comp: dict, lic_refs: set) -> dict:
394
+ """
395
+ Create package information for SPDX document.
396
+
397
+ This method generates a complete package information entry following the SPDX
398
+ specification format. It creates a unique identifier for the package based on
399
+ its PURL and version, processes license information, and formats all required
400
+ fields for the SPDX document.
401
+
402
+ Args:
403
+ purl (str): Package URL identifier for the component
404
+ comp (dict): Component information dictionary containing metadata like
405
+ component name, version, URLs, and license information
406
+ lic_refs (set): Reference to a set that will be populated with license
407
+ references found in this package. This set is modified in place.
408
+
409
+ Returns:
410
+ dict: A dictionary containing all required SPDX package fields including:
411
+ - name: Component name
412
+ - SPDXID: Unique identifier for this package within the document
413
+ - versionInfo: Component version
414
+ - downloadLocation: URL where the package can be downloaded
415
+ - homepage: Component homepage URL
416
+ - licenseDeclared: Formatted license expression
417
+ - licenseConcluded: NOASSERTION as automated conclusion isn't possible
418
+ - filesAnalyzed: False as files are not individually analyzed
419
+ - copyrightText: NOASSERTION as copyright text isn't available
420
+ - supplier: Organization name from vendor information
421
+ - externalRefs: Package URL reference for package manager integration
422
+ - checksums: MD5 hash of the package if available
423
+ """
424
+ lic_text = self._process_package_licenses(comp.get('licenses', []), lic_refs)
425
+ comp_ver = comp.get('version')
426
+ purl_ver = f'{purl}@{comp_ver}'
427
+ purl_hash = hashlib.md5(purl_ver.encode('utf-8')).hexdigest()
428
+
429
+ return {
430
+ 'name': comp.get('component'),
431
+ 'SPDXID': f'SPDXRef-{purl_hash}',
432
+ 'versionInfo': comp_ver,
433
+ 'downloadLocation': comp.get('download_url') or comp.get('url'),
434
+ 'homepage': comp.get('url', ''),
435
+ 'licenseDeclared': lic_text,
436
+ 'licenseConcluded': 'NOASSERTION',
437
+ 'filesAnalyzed': False,
438
+ 'copyrightText': 'NOASSERTION',
439
+ 'supplier': f'Organization: {comp.get("vendor", "NOASSERTION")}',
440
+ 'externalRefs': [
441
+ {
229
442
  'referenceCategory': 'PACKAGE-MANAGER',
230
- 'referenceLocator': purl_ver,
443
+ 'referenceLocator': PackageURL.from_string(purl_ver).to_string(),
231
444
  'referenceType': 'purl'
232
- }]
233
- })
234
- # End purls for loop
235
- for lic_ref in lic_refs: # Insert all the non-SPDX license references
445
+ }
446
+ ],
447
+ 'checksums': [
448
+ {
449
+ 'algorithm': 'MD5',
450
+ 'checksumValue': comp.get('url_hash') or '0' * 32
451
+ }
452
+ ],
453
+ }
454
+
455
+ def _process_package_licenses(self, licenses: list, lic_refs: set) -> str:
456
+ """
457
+ Process licenses and return license text formatted for SPDX.
458
+
459
+ This method processes a list of license objects, extracts valid license IDs,
460
+ converts them to SPDX format, and combines them into a properly formatted
461
+ license expression.
462
+
463
+ Args:
464
+ licenses (list): List of license dictionaries, each containing at least
465
+ an 'id' field
466
+ lic_refs (set): Reference to a set that will collect license references.
467
+ This set is modified in place.
468
+
469
+ Returns:
470
+ str: A formatted license expression string following SPDX syntax.
471
+ Returns 'NOASSERTION' if no valid licenses are found.
472
+ """
473
+ if not licenses:
474
+ return 'NOASSERTION'
475
+
476
+ lic_set = set()
477
+ for lic in licenses:
478
+ lc_id = lic.get('id')
479
+ self._process_license_id(lc_id, lic_refs, lic_set)
480
+
481
+ return self._format_license_text(lic_set)
482
+
483
+ def _process_license_id(self, lc_id: str, lic_refs: set, lic_set: set):
484
+ """
485
+ Process individual license ID and add to appropriate sets.
486
+
487
+ This method attempts to convert a license ID to its SPDX equivalent.
488
+ If not found in the SPDX license list, it's formatted as a LicenseRef
489
+ and added to the license references set.
490
+
491
+ Args:
492
+ lc_id (str): The license ID to process
493
+ lic_refs (set): Reference to a set that collects license references
494
+ for later processing. Modified in place.
495
+ lic_set (set): Reference to a set collecting all license IDs for
496
+ """
497
+ spdx_id = self.get_spdx_license_id(lc_id)
498
+ if not spdx_id:
499
+ if not lc_id.startswith('LicenseRef'):
500
+ lc_id = f'LicenseRef-{lc_id}'
501
+ lic_refs.add(lc_id)
502
+ lic_set.add(spdx_id if spdx_id else lc_id)
503
+
504
+ def _format_license_text(self, lic_set: set) -> str:
505
+ """
506
+ Format the license text with proper SPDX syntax.
507
+
508
+ This method combines multiple license IDs with the 'AND' operator
509
+ according to SPDX specification rules. If multiple licenses are present,
510
+ the expression is enclosed in parentheses.
511
+
512
+ Args:
513
+ lic_set (set): Set of license IDs to format
514
+
515
+ Returns:
516
+ str: A properly formatted SPDX license expression.
517
+ Returns 'NOASSERTION' if the set is empty.
518
+ """
519
+ if not lic_set:
520
+ return 'NOASSERTION'
521
+
522
+ lic_text = ' AND '.join(lic_set)
523
+ if len(lic_set) > 1:
524
+ lic_text = f'({lic_text})'
525
+ return lic_text
526
+
527
+ def _process_license_refs(self, lic_refs: set, spdx_document: dict):
528
+ """
529
+ Process and add license references to the SPDX document.
530
+
531
+ This method processes each license reference in the provided set
532
+ and adds corresponding license information to the SPDX document's
533
+ extracted licensing information section.
534
+
535
+ Args:
536
+ lic_refs (set): Set of license references to process
537
+ spdx_document (dict): Reference to the SPDX document being built,
538
+ which will be modified in place
539
+
540
+ Note:
541
+ This method modifies the spdx_document dictionary in place by adding
542
+ entries to the 'hasExtractedLicensingInfos' list.
543
+ """
544
+ for lic_ref in lic_refs:
545
+ license_info = self._parse_license_ref(lic_ref)
546
+ spdx_document['hasExtractedLicensingInfos'].append(license_info)
547
+
548
+ def _parse_license_ref(self, lic_ref: str) -> dict:
549
+ """
550
+ Parse license reference and create info dictionary for SPDX document.
551
+
552
+ This method extracts information from a license reference identifier
553
+ and formats it into the structure required by the SPDX specification
554
+ for extracted licensing information.
555
+
556
+ Args:
557
+ lic_ref (str): License reference identifier to parse
558
+
559
+ Returns:
560
+ dict: Dictionary containing required SPDX fields for extracted license info:
561
+ - licenseId: The unique identifier for this license
562
+ - name: A readable name for the license
563
+ - extractedText: A placeholder for the actual license text
564
+ - comment: Information about how the license was detected
565
+ """
566
+ source, name = self._extract_license_info(lic_ref)
567
+ source_text = f' by {source}.' if source else '.'
568
+
569
+ return {
570
+ 'licenseId': lic_ref,
571
+ 'name': name.replace('-', ' '),
572
+ 'extractedText': 'Detected license, please review component source code.',
573
+ 'comment': f'Detected license{source_text}',
574
+ }
575
+
576
+ def _extract_license_info(self, lic_ref: str):
577
+ """
578
+ Extract source and name from license reference.
579
+
580
+ This method parses a license reference string to extract the source
581
+ (e.g., scancode, scanoss) and the actual license name using regular
582
+ expressions.
583
+
584
+ Args:
585
+ lic_ref (str): License reference identifier to parse
586
+
587
+ Returns:
588
+ tuple: A tuple containing (source, name) where:
589
+ - source (str): The tool or system that identified the license
590
+ - name (str): The actual license name
591
+ """
592
+ match = re.search(r'^LicenseRef-(scancode-|scanoss-|)(\S+)$', lic_ref, re.IGNORECASE)
593
+ if match:
594
+ source = match.group(1).replace('-', '')
595
+ name = match.group(2)
596
+ else:
236
597
  source = ''
237
- match = re.search(r'^LicenseRef-(scancode-|scanoss-|)(\S+)$', lic_ref, re.IGNORECASE)
238
- if match:
239
- source = match.group(1).replace('-', '') # source for the custom license
240
- name = match.group(2) # license name (without references, etc.)
241
- else:
242
- name = lic_ref
243
- name = name.replace('-', ' ')
244
- source = f' by {source}.' if source else '.'
245
- data['hasExtractedLicensingInfos'].append({
246
- 'licenseId': lic_ref,
247
- 'name': name,
248
- 'extractedText': 'Detected license, please review component source code.',
249
- 'comment': f'Detected license{source}'
250
- })
251
- # End license refs for loop
252
- file = sys.stdout
598
+ name = lic_ref
599
+ return source, name
600
+
601
+ def _write_output(self, data: dict, output_file: str = None) -> bool:
602
+ """Write the SPDX document to output."""
603
+ try:
604
+ file = self._get_output_file(output_file)
605
+ print(json.dumps(data, indent=2), file=file)
606
+ if output_file:
607
+ file.close()
608
+ return True
609
+ except Exception as e:
610
+ self.print_stderr(f'Error writing output: {str(e)}')
611
+ return False
612
+
613
+ def _get_output_file(self, output_file: str = None):
614
+ """Get the appropriate output file handle."""
253
615
  if not output_file and self.output_file:
254
616
  output_file = self.output_file
255
- if output_file:
256
- file = open(output_file, 'w')
257
- print(json.dumps(data, indent=2), file=file)
258
- if output_file:
259
- file.close()
260
- return True
617
+ return open(output_file, 'w') if output_file else sys.stdout
261
618
 
262
619
  def produce_from_str(self, json_str: str, output_file: str = None) -> bool:
263
620
  """
@@ -298,9 +655,10 @@ class SpdxLite:
298
655
  :return: True if successful, False otherwise
299
656
  """
300
657
  try:
301
- f_name = pkg_resources.resource_filename(__name__, filename)
302
- with open(f_name, 'r') as f:
303
- data = json.loads(f.read())
658
+ f_name = importlib_resources.files(__name__) / filename
659
+ with importlib_resources.as_file(f_name) as f:
660
+ with open(f, 'r', encoding='utf-8') as file:
661
+ data = json.load(file)
304
662
  except Exception as e:
305
663
  self.print_stderr(f'ERROR: Problem parsing SPDX license input JSON: {e}')
306
664
  return False
@@ -318,8 +676,6 @@ class SpdxLite:
318
676
  self._spdx_licenses[lic_id_short] = lic_id
319
677
  if lic_name:
320
678
  self._spdx_lic_names[lic_name] = lic_id
321
- # self.print_stderr(f'Licenses: {self._spdx_licenses}')
322
- # self.print_stderr(f'Lookup: {self._spdx_lic_lookup}')
323
679
  return True
324
680
 
325
681
  def get_spdx_license_id(self, lic_name: str) -> str:
@@ -346,6 +702,8 @@ class SpdxLite:
346
702
  return lic_id
347
703
  self.print_debug(f'Warning: Failed to find valid SPDX license identifier for: {lic_name}')
348
704
  return None
705
+
706
+
349
707
  #
350
708
  # End of SpdxLite Class
351
709
  #