scanoss 1.25.2__py3-none-any.whl → 1.26.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,378 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import json
26
+ import os.path
27
+ from abc import abstractmethod
28
+ from enum import Enum
29
+ from typing import Any, Dict
30
+
31
+ from ..scanossbase import ScanossBase
32
+ from .utils.license_utils import LicenseUtil
33
+
34
+
35
+ class ComponentID(Enum):
36
+ """
37
+ Enumeration representing different types of software components.
38
+
39
+ Attributes:
40
+ FILE (str): Represents a file component (value: "file").
41
+ SNIPPET (str): Represents a code snippet component (value: "snippet").
42
+ DEPENDENCY (str): Represents a dependency component (value: "dependency").
43
+ """
44
+
45
+ FILE = 'file'
46
+ SNIPPET = 'snippet'
47
+ DEPENDENCY = 'dependency'
48
+
49
+
50
+ #
51
+ # End of ComponentID Class
52
+ #
53
+
54
+
55
+ class InspectBase(ScanossBase):
56
+ """
57
+ A base class to perform inspections over scan results.
58
+
59
+ This class provides a basic for scan results inspection, including methods for
60
+ processing scan results components and licenses.
61
+
62
+ Inherits from:
63
+ ScanossBase: A base class providing common functionality for SCANOSS-related operations.
64
+ """
65
+
66
+ def __init__( # noqa: PLR0913
67
+ self,
68
+ debug: bool = False,
69
+ trace: bool = True,
70
+ quiet: bool = False,
71
+ filepath: str = None,
72
+ output: str = None,
73
+ ):
74
+ super().__init__(debug, trace, quiet)
75
+ self.license_util = LicenseUtil()
76
+ self.filepath = filepath
77
+ self.output = output
78
+ self.results = self._load_input_file()
79
+
80
+ @abstractmethod
81
+ def _get_components(self):
82
+ """
83
+ Retrieve and process components from the preloaded results.
84
+
85
+ This method performs the following steps:
86
+ 1. Checks if the results have been previously loaded (self.results).
87
+ 2. Extracts and processes components from the loaded results.
88
+
89
+ :return: A list of processed components, or None if an error occurred during any step.
90
+
91
+ Possible reasons for returning None include:
92
+ - Results not loaded (self.results is None)
93
+ - Failure to extract components from the results
94
+
95
+ Note:
96
+ - This method assumes that the results have been previously loaded and stored in self.results.
97
+ - Implementations must extract components (e.g. via `_get_components_data`,
98
+ `_get_dependencies_data`, or other helpers).
99
+ - If `self.results` is `None`, simply return `None`.
100
+ """
101
+ pass
102
+
103
+ def _append_component(self, components: Dict[str, Any], new_component: Dict[str, Any]) -> Dict[str, Any]:
104
+ """
105
+ Append a new component to the component dictionary.
106
+
107
+ This function creates a new entry in the component dictionary for the given component,
108
+ initializing all required counters:
109
+ - count: Total occurrences of this component (used by both license and component summaries)
110
+ - declared: Number of times this component is marked as 'identified' (used by component summary)
111
+ - undeclared: Number of times this component is marked as 'pending' (used by component summary)
112
+
113
+ Each component also contains a 'licenses' dictionary where each license entry tracks:
114
+ - count: Number of times this license appears for this component (used by license summary)
115
+
116
+ Args:
117
+ components: The existing dictionary of components
118
+ new_component: The new component to be added
119
+ Returns:
120
+ The updated components dictionary
121
+ """
122
+ match_id = new_component.get('id')
123
+ # Determine the component key and purl based on component type
124
+ if match_id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
125
+ purl = new_component['purl'][0] # Take the first purl for these component types
126
+ else:
127
+ purl = new_component['purl']
128
+
129
+ if not purl:
130
+ self.print_debug(f'WARNING: _append_component: No purl found for new component: {new_component}')
131
+ return components
132
+
133
+ component_key = f'{purl}@{new_component["version"]}'
134
+ status = new_component.get('status')
135
+
136
+ if component_key in components:
137
+ # Component already exists, update component counters and try to append a new license
138
+ self._update_component_counters(components[component_key], status)
139
+ self._append_license_to_component(components, new_component, component_key)
140
+ # Maintain 'pending' status - takes precedence over 'identified'
141
+ if status == 'pending':
142
+ components[component_key]['status'] = "pending"
143
+ return components
144
+
145
+ # Create a new component
146
+ components[component_key] = {
147
+ 'purl': purl,
148
+ 'version': new_component['version'],
149
+ 'licenses': {},
150
+ 'status': status,
151
+ 'count': 1,
152
+ 'declared': 1 if status == 'identified' else 0,
153
+ 'undeclared': 1 if status == 'pending' else 0
154
+ }
155
+ if not new_component.get('licenses'):
156
+ self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
157
+ return components
158
+
159
+ ## Append license to component
160
+ self._append_license_to_component(components, new_component, component_key)
161
+ return components
162
+
163
+ def _append_license_to_component(self,
164
+ components: Dict[str, Any], new_component: Dict[str, Any], component_key: str) -> None:
165
+ """
166
+ Add or update licenses for an existing component.
167
+
168
+ For each license in the component:
169
+ - If the license already exists, increments its count
170
+ - If it's a new license, adds it with an initial count of 1
171
+
172
+ The license count is used by license_summary to track how many times each license appears
173
+ across all components. This count contributes to:
174
+ - Total number of licenses in the project
175
+ - Number of copyleft licenses when the license is marked as copyleft
176
+
177
+ Args:
178
+ components: Dictionary containing all components
179
+ new_component: Component whose licenses need to be processed
180
+ component_key: purl + version of the component to be updated
181
+ """
182
+ licenses_order_by_source_priority = self._get_licenses_order_by_source_priority(new_component['licenses'])
183
+ # Process licenses for this component
184
+ for license_item in licenses_order_by_source_priority:
185
+ if license_item.get('name'):
186
+ spdxid = license_item['name']
187
+ source = license_item.get('source')
188
+ if not source:
189
+ source = 'unknown'
190
+
191
+ if spdxid in components[component_key]['licenses']:
192
+ # If license exists, increment counter
193
+ components[component_key]['licenses'][spdxid]['count'] += 1 # Increment counter for license
194
+ else:
195
+ # If a license doesn't exist, create new entry
196
+ components[component_key]['licenses'][spdxid] = {
197
+ 'spdxid': spdxid,
198
+ 'copyleft': self.license_util.is_copyleft(spdxid),
199
+ 'url': self.license_util.get_spdx_url(spdxid),
200
+ 'source': source,
201
+ 'count': 1, # Set counter to 1 on new license
202
+ }
203
+
204
+ def _update_component_counters(self, component, status):
205
+ """Update component counters based on status."""
206
+ component['count'] += 1
207
+ if status == 'identified':
208
+ component['declared'] += 1
209
+ else:
210
+ component['undeclared'] += 1
211
+
212
+ def _get_components_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
213
+ """
214
+ Extract and process file and snippet components from results.
215
+
216
+ This method processes scan results to build or update component entries. For each component:
217
+
218
+ Component Counters (used by ComponentSummary):
219
+ - count: Incremented for each occurrence of the component
220
+ - declared: Incremented when component status is 'identified'
221
+ - undeclared: Incremented when component status is 'pending'
222
+
223
+ License Tracking:
224
+ - For new components, initializes license dictionary through _append_component
225
+ - For existing components, updates license counters through _append_license_to_component
226
+ which tracks the number of occurrences of each license
227
+
228
+ Args:
229
+ results: A dictionary containing the raw results of a component scan
230
+ Returns:
231
+ Updated components dictionary with file and snippet data
232
+ """
233
+ for component in results.values():
234
+ for c in component:
235
+ component_id = c.get('id')
236
+ if not component_id:
237
+ self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
238
+ continue
239
+ ## Skip dependency
240
+ if component_id == ComponentID.DEPENDENCY.value:
241
+ continue
242
+ status = c.get('status')
243
+ if not status:
244
+ self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
245
+ continue
246
+ if component_id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
247
+ if not c.get('purl'):
248
+ self.print_debug(f'WARNING: Result missing purl. Skipping: {c}')
249
+ continue
250
+ if len(c.get('purl')) <= 0:
251
+ self.print_debug(f'WARNING: Result missing purls. Skipping: {c}')
252
+ continue
253
+ version = c.get('version')
254
+ if not version:
255
+ self.print_debug(f'WARNING: Result missing version. Setting it to unknown: {c}')
256
+ version = 'unknown'
257
+ c['version'] = version #If no version exists. Set 'unknown' version to current component
258
+ # Append component
259
+ components = self._append_component(components, c)
260
+
261
+ # End component loop
262
+ # End components loop
263
+ return components
264
+
265
+ def _get_dependencies_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
266
+ """
267
+ Extract and process dependency components from results.
268
+
269
+ :param results: A dictionary containing the raw results of a component scan
270
+ :param components: Existing components dictionary to update
271
+ :return: Updated components dictionary with dependency data
272
+ """
273
+ for component in results.values():
274
+ for c in component:
275
+ component_id = c.get('id')
276
+ if not component_id:
277
+ self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
278
+ continue
279
+ status = c.get('status')
280
+ if not status:
281
+ self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
282
+ continue
283
+ if component_id == ComponentID.DEPENDENCY.value:
284
+ if c.get('dependencies') is None:
285
+ continue
286
+ for dependency in c['dependencies']:
287
+ if not dependency.get('purl'):
288
+ self.print_debug(f'WARNING: Dependency result missing purl. Skipping: {dependency}')
289
+ continue
290
+ version = dependency.get('version')
291
+ if not version:
292
+ self.print_debug(f'WARNING: Result missing version. Setting it to unknown: {c}')
293
+ version = 'unknown'
294
+ c['version'] = version # Set an 'unknown' version to the current component
295
+
296
+ # Append component
297
+ components = self._append_component(components, dependency)
298
+
299
+ # End dependency loop
300
+ # End component loop
301
+ # End of result loop
302
+ return components
303
+
304
+ def _load_input_file(self):
305
+ """
306
+ Load the result.json file
307
+
308
+ Returns:
309
+ Dict[str, Any]: The parsed JSON data
310
+ """
311
+ if not os.path.exists(self.filepath):
312
+ self.print_stderr(f'ERROR: The file "{self.filepath}" does not exist.')
313
+ return None
314
+ with open(self.filepath, 'r') as jsonfile:
315
+ try:
316
+ return json.load(jsonfile)
317
+ except Exception as e:
318
+ self.print_stderr(f'ERROR: Problem parsing input JSON: {e}')
319
+ return None
320
+
321
+ def _convert_components_to_list(self, components: dict):
322
+ if components is None:
323
+ self.print_debug(f'WARNING: Components is empty {self.results}')
324
+ return None
325
+ results_list = list(components.values())
326
+ for component in results_list:
327
+ licenses = component.get('licenses')
328
+ if licenses is not None:
329
+ component['licenses'] = list(licenses.values())
330
+ else:
331
+ self.print_debug(f'WARNING: Licenses missing for: {component}')
332
+ component['licenses'] = []
333
+ return results_list
334
+
335
+ def _get_licenses_order_by_source_priority(self,licenses_data):
336
+ """
337
+ Select licenses based on source priority:
338
+ 1. component_declared (highest priority)
339
+ 2. license_file
340
+ 3. file_header
341
+ 4. scancode (lowest priority)
342
+
343
+ If any high-priority source is found, return only licenses from that source.
344
+ If none found, return all licenses.
345
+
346
+ Returns: list with ordered licenses by source.
347
+ """
348
+ # Define priority order (highest to lowest)
349
+ priority_sources = ['component_declared', 'license_file', 'file_header', 'scancode']
350
+
351
+ # Group licenses by source
352
+ licenses_by_source = {}
353
+ for license_item in licenses_data:
354
+
355
+ source = license_item.get('source', 'unknown')
356
+ if source not in licenses_by_source:
357
+ licenses_by_source[source] = {}
358
+
359
+ license_name = license_item.get('name')
360
+ if license_name:
361
+ # Use license name as key, store full license object as value
362
+ # If duplicate license names exist in same source, the last one wins
363
+ licenses_by_source[source][license_name] = license_item
364
+
365
+ # Find the highest priority source that has licenses
366
+ for priority_source in priority_sources:
367
+ if priority_source in licenses_by_source:
368
+ self.print_trace(f'Choosing {priority_source} as source')
369
+ return list(licenses_by_source[priority_source].values())
370
+
371
+ # If no priority sources found, combine all licenses into a single list
372
+ self.print_debug("No priority sources found, returning all licenses as list")
373
+ return licenses_data
374
+
375
+
376
+ #
377
+ # End of PolicyCheck Class
378
+ #
@@ -0,0 +1,163 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import json
26
+ from typing import Any, Dict
27
+
28
+ from .inspect_base import InspectBase
29
+
30
+
31
+ class LicenseSummary(InspectBase):
32
+ """
33
+ SCANOSS LicenseSummary class
34
+ Inspects results and generates comprehensive license summaries from detected components.
35
+
36
+ This class processes component scan results to extract, validate, and aggregate license
37
+ information, providing detailed summaries including copyleft analysis and license statistics.
38
+ """
39
+
40
+ # Define required license fields as class constants
41
+ REQUIRED_LICENSE_FIELDS = ['spdxid', 'url', 'copyleft', 'source']
42
+
43
+ def __init__( # noqa: PLR0913
44
+ self,
45
+ debug: bool = False,
46
+ trace: bool = True,
47
+ quiet: bool = False,
48
+ filepath: str = None,
49
+ status: str = None,
50
+ output: str = None,
51
+ include: str = None,
52
+ exclude: str = None,
53
+ explicit: str = None,
54
+ ):
55
+ """
56
+ Initialize the LicenseSummary class.
57
+
58
+ :param debug: Enable debug mode
59
+ :param trace: Enable trace mode (default True)
60
+ :param quiet: Enable quiet mode
61
+ :param filepath: Path to the file containing component data
62
+ :param output: Path to save detailed output
63
+ :param include: Licenses to include in the analysis
64
+ :param exclude: Licenses to exclude from the analysis
65
+ :param explicit: Explicitly defined licenses
66
+ """
67
+ super().__init__(debug, trace, quiet, filepath, output)
68
+ self.license_util.init(include, exclude, explicit)
69
+ self.filepath = filepath
70
+ self.output = output
71
+ self.status = status
72
+ self.include = include
73
+ self.exclude = exclude
74
+ self.explicit = explicit
75
+
76
+ def _validate_license(self, license_data: Dict[str, Any]) -> bool:
77
+ """
78
+ Validate that a license has all required fields.
79
+
80
+ :param license_data: Dictionary containing license information
81
+ :return: True if license is valid, False otherwise
82
+ """
83
+ for field in self.REQUIRED_LICENSE_FIELDS:
84
+ value = license_data.get(field)
85
+ if value is None:
86
+ self.print_debug(f'WARNING: {field} is empty in license: {license_data}')
87
+ return False
88
+ return True
89
+
90
+ def _append_license(self, licenses: dict, new_license) -> None:
91
+ """Add or update a license in the licenses' dictionary."""
92
+ spdxid = new_license.get("spdxid")
93
+ url = new_license.get("url")
94
+ copyleft = new_license.get("copyleft")
95
+ if spdxid not in licenses:
96
+ licenses[spdxid] = {
97
+ 'spdxid': spdxid,
98
+ 'url': url,
99
+ 'copyleft':copyleft,
100
+ 'count': new_license.get("count"),
101
+ }
102
+ else:
103
+ licenses[spdxid]['count'] += new_license.get("count")
104
+
105
+ def _get_licenses_summary_from_components(self, components: list)-> dict:
106
+ """
107
+ Get a license summary from detected components.
108
+
109
+ :param components: List of all components
110
+ :return: Dict with license summary information
111
+ """
112
+ licenses:dict = {}
113
+ licenses_with_copyleft = 0
114
+ total_licenses = 0
115
+ for component in components:
116
+ component_licenses = component.get("licenses", [])
117
+ for lic in component_licenses:
118
+ if not self._validate_license(lic):
119
+ continue
120
+ copyleft = lic.get("copyleft")
121
+ ## Increment counters
122
+ total_licenses += lic.get("count")
123
+ if copyleft:
124
+ licenses_with_copyleft += lic.get("count")
125
+ ## Add license
126
+ self._append_license(licenses, lic)
127
+ ## End for loop licenses
128
+ ## End for loop components
129
+ return {
130
+ 'licenses': list(licenses.values()),
131
+ 'total': total_licenses,
132
+ 'copyleft': licenses_with_copyleft
133
+ }
134
+
135
+
136
+ def _get_components(self):
137
+ """
138
+ Extract and process components from results and their dependencies.
139
+
140
+ This method performs the following steps:
141
+ 1. Validates that `self.results` is loaded. Returns `None` if not.
142
+ 2. Extracts file, snippet, and dependency components into a dictionary.
143
+ 3. Converts components to a list and processes their licenses.
144
+
145
+ :return: A list of processed components with license data, or `None` if `self.results` is not set.
146
+ """
147
+ if self.results is None:
148
+ return None
149
+
150
+ components: dict = {}
151
+ # Extract component and license data from file and dependency results. Both helpers mutate `components`
152
+ self._get_components_data(self.results, components)
153
+ self._get_dependencies_data(self.results, components)
154
+ return self._convert_components_to_list(components)
155
+
156
+ def run(self):
157
+ components = self._get_components()
158
+ license_summary = self._get_licenses_summary_from_components(components)
159
+ self.print_to_file_or_stdout(json.dumps(license_summary, indent=2), self.output)
160
+ return license_summary
161
+ #
162
+ # End of LicenseSummary Class
163
+ #