scanoss 1.40.0__py3-none-any.whl → 1.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. scanoss/__init__.py +1 -1
  2. scanoss/cli.py +22 -9
  3. scanoss/constants.py +3 -0
  4. scanoss/data/build_date.txt +1 -1
  5. scanoss/data/osadl-copyleft.json +133 -0
  6. scanoss/filecount.py +37 -38
  7. scanoss/gitlabqualityreport.py +33 -4
  8. scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
  9. scanoss/inspection/{dependency_track → policy_check/dependency_track}/project_violation.py +24 -24
  10. scanoss/inspection/{policy_check.py → policy_check/policy_check.py} +22 -18
  11. scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
  12. scanoss/inspection/{raw → policy_check/scanoss}/copyleft.py +42 -36
  13. scanoss/inspection/{raw → policy_check/scanoss}/undeclared_component.py +30 -29
  14. scanoss/inspection/summary/__init__.py +0 -0
  15. scanoss/inspection/{raw → summary}/component_summary.py +34 -9
  16. scanoss/inspection/{raw → summary}/license_summary.py +46 -44
  17. scanoss/inspection/{raw → summary}/match_summary.py +51 -0
  18. scanoss/inspection/utils/license_utils.py +57 -71
  19. scanoss/inspection/{raw/raw_base.py → utils/scan_result_processor.py} +47 -59
  20. scanoss/osadl.py +125 -0
  21. scanoss/scanner.py +191 -189
  22. scanoss/scanners/folder_hasher.py +24 -24
  23. scanoss/scanners/scanner_hfh.py +20 -15
  24. scanoss/threadedscanning.py +10 -0
  25. {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/METADATA +1 -1
  26. {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/RECORD +31 -26
  27. /scanoss/inspection/{raw → policy_check}/__init__.py +0 -0
  28. {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/WHEEL +0 -0
  29. {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/entry_points.txt +0 -0
  30. {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/licenses/LICENSE +0 -0
  31. {scanoss-1.40.0.dist-info → scanoss-1.41.0.dist-info}/top_level.txt +0 -0
@@ -94,6 +94,7 @@ class MatchSummary(ScanossBase):
94
94
  self.scanoss_results_path = scanoss_results_path
95
95
  self.line_range_prefix = line_range_prefix
96
96
  self.output = output
97
+ self.print_debug("Initializing MatchSummary class")
97
98
 
98
99
 
99
100
  def _get_match_summary_item(self, file_name: str, result: dict) -> MatchSummaryItem:
@@ -108,11 +109,16 @@ class MatchSummary(ScanossBase):
108
109
  :param result: SCANOSS scan result dictionary containing match details
109
110
  :return: Populated match summary item with all relevant information
110
111
  """
112
+ self.print_trace(f"Creating match summary item for file: {file_name}, id: {result.get('id')}")
113
+
111
114
  if result.get('id') == "snippet":
112
115
  # Snippet match: create URL with line range anchor
113
116
  lines = scanoss_scan_results_utils.get_lines(result.get('lines'))
114
117
  end_line = lines[len(lines) - 1] if len(lines) > 1 else lines[0]
115
118
  file_url = f"{self.line_range_prefix}/{file_name}#L{lines[0]}-L{end_line}"
119
+
120
+ self.print_trace(f"Snippet match: lines {lines[0]}-{end_line}, purl: {result.get('purl')[0]}")
121
+
116
122
  return MatchSummaryItem(
117
123
  file_url=file_url,
118
124
  file=file_name,
@@ -124,6 +130,8 @@ class MatchSummary(ScanossBase):
124
130
  lines=f"{lines[0]}-{lines[len(lines) - 1] if len(lines) > 1 else lines[0]}"
125
131
  )
126
132
  # File match: create URL without line range
133
+ self.print_trace(f"File match: {file_name}, purl: {result.get('purl')[0]}, version: {result.get('version')}")
134
+
127
135
  return MatchSummaryItem(
128
136
  file=file_name,
129
137
  file_url=f"{self.line_range_prefix}/{file_name}",
@@ -176,12 +184,19 @@ class MatchSummary(ScanossBase):
176
184
  required fields and categorizing matches into file matches and snippet matches.
177
185
  Skips invalid or incomplete results with debug messages.
178
186
  """
187
+ self.print_debug(f"Loading scan results from: {self.scanoss_results_path}")
188
+
179
189
  # Load scan results from JSON file
180
190
  scan_results = load_json_file(self.scanoss_results_path)
181
191
  gitlab_matches_summary = ComponentMatchSummary(files=[], snippet=[])
182
192
 
193
+ self.print_debug(f"Processing {len(scan_results)} files from scan results")
194
+ self.print_trace(f"Line range prefix set to: {self.line_range_prefix}")
195
+
183
196
  # Process each file and its results
184
197
  for file_name, results in scan_results.items():
198
+ self.print_trace(f"Processing file: {file_name} with {len(results)} results")
199
+
185
200
  for result in results:
186
201
  # Skip non-matches
187
202
  if result.get('id') == "none":
@@ -196,8 +211,15 @@ class MatchSummary(ScanossBase):
196
211
  summary_item = self._get_match_summary_item(file_name, result)
197
212
  if result.get('id') == "snippet":
198
213
  gitlab_matches_summary.snippet.append(summary_item)
214
+ self.print_trace(f"Added snippet match for {file_name}")
199
215
  else:
200
216
  gitlab_matches_summary.files.append(summary_item)
217
+ self.print_trace(f"Added file match for {file_name}")
218
+
219
+ self.print_debug(
220
+ f"Match summary complete: {len(gitlab_matches_summary.files)} file matches, "
221
+ f"{len(gitlab_matches_summary.snippet)} snippet matches"
222
+ )
201
223
 
202
224
  return gitlab_matches_summary
203
225
 
@@ -212,14 +234,23 @@ class MatchSummary(ScanossBase):
212
234
  :param gitlab_matches_summary: Container with categorized file and snippet matches to format
213
235
  :return: Complete Markdown document with formatted match tables
214
236
  """
237
+ self.print_debug("Generating Markdown from match summaries")
215
238
 
216
239
  if len(gitlab_matches_summary.files) == 0 and len(gitlab_matches_summary.snippet) == 0:
240
+ self.print_debug("No matches to format - returning empty string")
217
241
  return ""
218
242
 
243
+ self.print_trace(
244
+ f"Formatting {len(gitlab_matches_summary.files)} file matches and "
245
+ f"{len(gitlab_matches_summary.snippet)} snippet matches"
246
+ )
247
+
219
248
  # Define table headers
220
249
  file_match_headers = ['File', 'License', 'Similarity', 'PURL', 'Version']
221
250
  snippet_match_headers = ['File', 'License', 'Similarity', 'PURL', 'Version', 'Lines']
251
+
222
252
  # Build file matches table
253
+ self.print_trace("Building file matches table")
223
254
  file_match_rows = []
224
255
  for file_match in gitlab_matches_summary.files:
225
256
  row = [
@@ -233,6 +264,7 @@ class MatchSummary(ScanossBase):
233
264
  file_match_table = generate_table(file_match_headers, file_match_rows)
234
265
 
235
266
  # Build snippet matches table
267
+ self.print_trace("Building snippet matches table")
236
268
  snippet_match_rows = []
237
269
  for snippet_match in gitlab_matches_summary.snippet:
238
270
  row = [
@@ -262,6 +294,8 @@ class MatchSummary(ScanossBase):
262
294
  markdown += snippet_match_table
263
295
  markdown += "\n</details>\n"
264
296
 
297
+ self.print_trace(f"Markdown generation complete (length: {len(markdown)} characters)")
298
+ self.print_debug("Match summary Markdown generation complete")
265
299
  return markdown
266
300
 
267
301
  def run(self):
@@ -275,16 +309,33 @@ class MatchSummary(ScanossBase):
275
309
  3. Generates Markdown report
276
310
  4. Outputs to file or stdout
277
311
  """
312
+ self.print_debug("Starting match summary generation process")
313
+ self.print_trace(
314
+ f"Configuration - Results path: {self.scanoss_results_path}, Output: {self.output}, "
315
+ f"Line range prefix: {self.line_range_prefix}"
316
+ )
317
+
278
318
  # Load and process scan results into categorized matches
319
+ self.print_trace("Loading and processing scan results")
279
320
  matches = self._get_matches_summary()
280
321
 
281
322
  # Format matches as GitLab-compatible Markdown
323
+ self.print_trace("Generating Markdown output")
282
324
  matches_md = self._markdown(matches)
283
325
  if matches_md == "":
326
+ self.print_debug("No matches found - exiting")
284
327
  self.print_stdout("No matches found.")
285
328
  return
329
+
286
330
  # Output to file or stdout
331
+ self.print_trace("Writing output")
332
+ if self.output:
333
+ self.print_debug(f"Writing match summary to file: {self.output}")
334
+ else:
335
+ self.print_debug("Writing match summary to 'stdout'")
336
+
287
337
  self.print_to_file_or_stdout(matches_md, self.output)
338
+ self.print_debug("Match summary generation complete")
288
339
 
289
340
 
290
341
 
@@ -22,96 +22,90 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- from ...scanossbase import ScanossBase
25
+ from scanoss.osadl import Osadl
26
26
 
27
- DEFAULT_COPYLEFT_LICENSES = {
28
- 'agpl-3.0-only',
29
- 'artistic-1.0',
30
- 'artistic-2.0',
31
- 'cc-by-sa-4.0',
32
- 'cddl-1.0',
33
- 'cddl-1.1',
34
- 'cecill-2.1',
35
- 'epl-1.0',
36
- 'epl-2.0',
37
- 'gfdl-1.1-only',
38
- 'gfdl-1.2-only',
39
- 'gfdl-1.3-only',
40
- 'gpl-1.0-only',
41
- 'gpl-2.0-only',
42
- 'gpl-3.0-only',
43
- 'lgpl-2.1-only',
44
- 'lgpl-3.0-only',
45
- 'mpl-1.1',
46
- 'mpl-2.0',
47
- 'sleepycat',
48
- 'watcom-1.0',
49
- }
27
+ from ...scanossbase import ScanossBase
50
28
 
51
29
 
52
30
  class LicenseUtil(ScanossBase):
53
31
  """
54
32
  A utility class for handling software licenses, particularly copyleft licenses.
55
33
 
56
- This class provides functionality to initialize, manage, and query a set of
57
- copyleft licenses. It also offers a method to generate URLs for license information.
34
+ Uses OSADL (Open Source Automation Development Lab) authoritative copyleft data
35
+ with optional include/exclude/explicit filters.
58
36
  """
59
37
 
60
38
  BASE_SPDX_ORG_URL = 'https://spdx.org/licenses'
61
- BASE_OSADL_URL = 'https://www.osadl.org/fileadmin/checklists/unreflicenses'
62
39
 
63
40
  def __init__(self, debug: bool = False, trace: bool = True, quiet: bool = False):
64
41
  super().__init__(debug, trace, quiet)
65
- self.default_copyleft_licenses = set(DEFAULT_COPYLEFT_LICENSES)
66
- self.copyleft_licenses = set()
42
+ self.osadl = Osadl(debug=debug, trace=trace, quiet=quiet)
43
+ self.include_licenses = set()
44
+ self.exclude_licenses = set()
45
+ self.explicit_licenses = set()
67
46
 
68
47
  def init(self, include: str = None, exclude: str = None, explicit: str = None):
69
48
  """
70
- Initialize the set of copyleft licenses based on user input.
71
-
72
- This method allows for customization of the copyleft license set by:
73
- - Setting an explicit list of licenses
74
- - Including additional licenses to the default set
75
- - Excluding specific licenses from the default set
49
+ Initialize copyleft license filters.
76
50
 
77
- :param include: Comma-separated string of licenses to include
78
- :param exclude: Comma-separated string of licenses to exclude
79
- :param explicit: Comma-separated string of licenses to use exclusively
51
+ :param include: Comma-separated licenses to mark as copyleft (in addition to OSADL)
52
+ :param exclude: Comma-separated licenses to mark as NOT copyleft (override OSADL)
53
+ :param explicit: Comma-separated licenses to use exclusively (ignore OSADL)
80
54
  """
81
- if self.debug:
82
- self.print_stderr(f'Include Copyleft licenses: ${include}')
83
- self.print_stderr(f'Exclude Copyleft licenses: ${exclude}')
84
- self.print_stderr(f'Explicit Copyleft licenses: ${explicit}')
85
- if explicit:
86
- explicit = explicit.strip()
55
+ # Reset previous filters so init() can be safely called multiple times
56
+ self.include_licenses.clear()
57
+ self.exclude_licenses.clear()
58
+ self.explicit_licenses.clear()
59
+
60
+ # Parse explicit list (if provided, ignore OSADL completely)
87
61
  if explicit:
88
- exp = [item.strip().lower() for item in explicit.split(',')]
89
- self.copyleft_licenses = set(exp)
90
- self.print_debug(f'Copyleft licenses: ${self.copyleft_licenses}')
62
+ self.explicit_licenses = {lic.strip().lower() for lic in explicit.split(',') if lic.strip()}
63
+ self.print_debug(f'Explicit copyleft licenses: {self.explicit_licenses}')
91
64
  return
92
- # If no explicit licenses were set, set default ones
93
- self.copyleft_licenses = self.default_copyleft_licenses.copy()
94
- if include:
95
- include = include.strip()
65
+
66
+ # Parse include list (mark these as copyleft in addition to OSADL)
96
67
  if include:
97
- inc = [item.strip().lower() for item in include.split(',')]
98
- self.copyleft_licenses.update(inc)
99
- if exclude:
100
- exclude = exclude.strip()
68
+ self.include_licenses = {lic.strip().lower() for lic in include.split(',') if lic.strip()}
69
+ self.print_debug(f'Include licenses: {self.include_licenses}')
70
+
71
+ # Parse exclude list (mark these as NOT copyleft, overriding OSADL)
101
72
  if exclude:
102
- inc = [item.strip().lower() for item in exclude.split(',')]
103
- for lic in inc:
104
- self.copyleft_licenses.discard(lic)
105
- self.print_debug(f'Copyleft licenses: ${self.copyleft_licenses}')
73
+ self.exclude_licenses = {lic.strip().lower() for lic in exclude.split(',') if lic.strip()}
74
+ self.print_debug(f'Exclude licenses: {self.exclude_licenses}')
106
75
 
107
76
  def is_copyleft(self, spdxid: str) -> bool:
108
77
  """
109
- Check if a given license is considered copyleft.
78
+ Check if a license is copyleft.
79
+
80
+ Logic:
81
+ 1. If explicit list provided → check if license in explicit list
82
+ 2. If license in include list → return True
83
+ 3. If license in exclude list → return False
84
+ 4. Otherwise → use OSADL authoritative data
110
85
 
111
- :param spdxid: The SPDX identifier of the license to check
112
- :return: True if the license is copyleft, False otherwise
86
+ :param spdxid: SPDX license identifier
87
+ :return: True if copyleft, False otherwise
113
88
  """
114
- return spdxid.lower() in self.copyleft_licenses
89
+ if not spdxid:
90
+ self.print_debug('No license ID provided for copyleft check')
91
+ return False
92
+
93
+ spdxid_lc = spdxid.lower()
94
+
95
+ # Explicit mode: use only the explicit list
96
+ if self.explicit_licenses:
97
+ return spdxid_lc in self.explicit_licenses
98
+
99
+ # Include filter: if license in include list, force copyleft=True
100
+ if spdxid_lc in self.include_licenses:
101
+ return True
102
+
103
+ # Exclude filter: if license in exclude list, force copyleft=False
104
+ if spdxid_lc in self.exclude_licenses:
105
+ return False
106
+
107
+ # No filters matched, use OSADL authoritative data
108
+ return self.osadl.is_copyleft(spdxid)
115
109
 
116
110
  def get_spdx_url(self, spdxid: str) -> str:
117
111
  """
@@ -122,14 +116,6 @@ class LicenseUtil(ScanossBase):
122
116
  """
123
117
  return f'{self.BASE_SPDX_ORG_URL}/{spdxid}.html'
124
118
 
125
- def get_osadl_url(self, spdxid: str) -> str:
126
- """
127
- Generate the URL for the OSADL (Open Source Automation Development Lab) page of a license.
128
-
129
- :param spdxid: The SPDX identifier of the license
130
- :return: The URL of the OSADL page for the given license
131
- """
132
- return f'{self.BASE_OSADL_URL}/{spdxid}.txt'
133
119
 
134
120
 
135
121
  #
@@ -22,11 +22,10 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- from abc import abstractmethod
26
25
  from enum import Enum
27
26
  from typing import Any, Dict, TypeVar
28
27
 
29
- from ..policy_check import PolicyCheck
28
+ from ...scanossbase import ScanossBase
30
29
  from ..utils.file_utils import load_json_file
31
30
  from ..utils.license_utils import LicenseUtil
32
31
 
@@ -51,12 +50,13 @@ class ComponentID(Enum):
51
50
  #
52
51
 
53
52
  T = TypeVar('T')
54
- class RawBase(PolicyCheck[T]):
53
+ class ScanResultProcessor(ScanossBase):
55
54
  """
56
- A base class to perform inspections over scan results.
55
+ A utility class for processing and transforming scan results.
57
56
 
58
- This class provides a basic for scan results inspection, including methods for
59
- processing scan results components and licenses.
57
+ This class provides functionality for processing scan results, including methods for
58
+ loading, parsing, extracting, and aggregating component and license data from scan results.
59
+ It serves as a shared data processing layer used by both policy checks and summary generators.
60
60
 
61
61
  Inherits from:
62
62
  ScanossBase: A base class providing common functionality for SCANOSS-related operations.
@@ -67,40 +67,21 @@ class RawBase(PolicyCheck[T]):
67
67
  debug: bool = False,
68
68
  trace: bool = False,
69
69
  quiet: bool = False,
70
- format_type: str = None,
71
- filepath: str = None,
72
- output: str = None,
73
- status: str = None,
74
- name: str = None,
70
+ result_file_path: str = None,
71
+ include: str = None,
72
+ exclude: str = None,
73
+ explicit: str = None,
74
+ license_sources: list = None,
75
75
  ):
76
- super().__init__(debug, trace, quiet, format_type,status, name, output)
76
+ super().__init__(debug, trace, quiet)
77
+ self.result_file_path = result_file_path
77
78
  self.license_util = LicenseUtil()
78
- self.filepath = filepath
79
- self.output = output
79
+ self.license_util.init(include, exclude, explicit)
80
+ self.license_sources = license_sources
80
81
  self.results = self._load_input_file()
81
82
 
82
- @abstractmethod
83
- def _get_components(self):
84
- """
85
- Retrieve and process components from the preloaded results.
86
-
87
- This method performs the following steps:
88
- 1. Checks if the results have been previously loaded (self.results).
89
- 2. Extracts and processes components from the loaded results.
90
-
91
- :return: A list of processed components, or None if an error occurred during any step.
92
-
93
- Possible reasons for returning None include:
94
- - Results not loaded (self.results is None)
95
- - Failure to extract components from the results
96
-
97
- Note:
98
- - This method assumes that the results have been previously loaded and stored in self.results.
99
- - Implementations must extract components (e.g. via `_get_components_data`,
100
- `_get_dependencies_data`, or other helpers).
101
- - If `self.results` is `None`, simply return `None`.
102
- """
103
- pass
83
+ def get_results(self) -> Dict[str, Any]:
84
+ return self.results
104
85
 
105
86
  def _append_component(self, components: Dict[str, Any], new_component: Dict[str, Any]) -> Dict[str, Any]:
106
87
  """
@@ -183,9 +164,11 @@ class RawBase(PolicyCheck[T]):
183
164
  self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
184
165
  return
185
166
 
186
- licenses_order_by_source_priority = self._get_licenses_order_by_source_priority(new_component['licenses'])
167
+ # Select licenses based on configuration (filtering or priority mode)
168
+ selected_licenses = self._select_licenses(new_component['licenses'])
169
+
187
170
  # Process licenses for this component
188
- for license_item in licenses_order_by_source_priority:
171
+ for license_item in selected_licenses:
189
172
  if license_item.get('name'):
190
173
  spdxid = license_item['name']
191
174
  source = license_item.get('source')
@@ -213,7 +196,7 @@ class RawBase(PolicyCheck[T]):
213
196
  else:
214
197
  component['undeclared'] += 1
215
198
 
216
- def _get_components_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
199
+ def get_components_data(self, components: Dict[str, Any]) -> Dict[str, Any]:
217
200
  """
218
201
  Extract and process file and snippet components from results.
219
202
 
@@ -230,11 +213,11 @@ class RawBase(PolicyCheck[T]):
230
213
  which tracks the number of occurrences of each license
231
214
 
232
215
  Args:
233
- results: A dictionary containing the raw results of a component scan
216
+ components: A dictionary containing the raw results of a component scan
234
217
  Returns:
235
218
  Updated components dictionary with file and snippet data
236
219
  """
237
- for component in results.values():
220
+ for component in self.results.values():
238
221
  for c in component:
239
222
  component_id = c.get('id')
240
223
  if not component_id:
@@ -266,15 +249,13 @@ class RawBase(PolicyCheck[T]):
266
249
  # End components loop
267
250
  return components
268
251
 
269
- def _get_dependencies_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
252
+ def get_dependencies_data(self,components: Dict[str, Any]) -> Dict[str, Any]:
270
253
  """
271
254
  Extract and process dependency components from results.
272
-
273
- :param results: A dictionary containing the raw results of a component scan
274
255
  :param components: Existing components dictionary to update
275
256
  :return: Updated components dictionary with dependency data
276
257
  """
277
- for component in results.values():
258
+ for component in self.results.values():
278
259
  for c in component:
279
260
  component_id = c.get('id')
280
261
  if not component_id:
@@ -313,12 +294,12 @@ class RawBase(PolicyCheck[T]):
313
294
  Dict[str, Any]: The parsed JSON data
314
295
  """
315
296
  try:
316
- return load_json_file(self.filepath)
297
+ return load_json_file(self.result_file_path)
317
298
  except Exception as e:
318
299
  self.print_stderr(f'ERROR: Problem parsing input JSON: {e}')
319
300
  return None
320
301
 
321
- def _convert_components_to_list(self, components: dict):
302
+ def convert_components_to_list(self, components: dict):
322
303
  if components is None:
323
304
  self.print_debug(f'WARNING: Components is empty {self.results}')
324
305
  return None
@@ -332,19 +313,26 @@ class RawBase(PolicyCheck[T]):
332
313
  component['licenses'] = []
333
314
  return results_list
334
315
 
335
- def _get_licenses_order_by_source_priority(self,licenses_data):
316
+ def _select_licenses(self, licenses_data):
336
317
  """
337
- Select licenses based on source priority:
338
- 1. component_declared (highest priority)
339
- 2. license_file
340
- 3. file_header
341
- 4. scancode (lowest priority)
318
+ Select licenses based on configuration.
319
+
320
+ Two modes:
321
+ - Filtering mode: If license_sources specified, filter to those sources
322
+ - Priority mode: Otherwise, use original priority-based selection
342
323
 
343
- If any high-priority source is found, return only licenses from that source.
344
- If none found, return all licenses.
324
+ Args:
325
+ licenses_data: List of license dictionaries
345
326
 
346
- Returns: list with ordered licenses by source.
327
+ Returns:
328
+ Filtered list of licenses based on configuration
347
329
  """
330
+ # Filtering mode, when license_sources is explicitly provided
331
+ if self.license_sources:
332
+ sources_to_include = set(self.license_sources) | {'unknown'}
333
+ return [lic for lic in licenses_data
334
+ if lic.get('source') in sources_to_include or lic.get('source') is None]
335
+
348
336
  # Define priority order (highest to lowest)
349
337
  priority_sources = ['component_declared', 'license_file', 'file_header', 'scancode']
350
338
 
@@ -372,7 +360,7 @@ class RawBase(PolicyCheck[T]):
372
360
  self.print_debug("No priority sources found, returning all licenses as list")
373
361
  return licenses_data
374
362
 
375
- def _group_components_by_license(self,components):
363
+ def group_components_by_license(self,components):
376
364
  """
377
365
  Groups components by their unique component-license pairs.
378
366
 
@@ -425,5 +413,5 @@ class RawBase(PolicyCheck[T]):
425
413
 
426
414
 
427
415
  #
428
- # End of PolicyCheck Class
429
- #
416
+ # End of ScanResultProcessor Class
417
+ #
scanoss/osadl.py ADDED
@@ -0,0 +1,125 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import json
26
+ import sys
27
+
28
+ import importlib_resources
29
+
30
+ from scanoss.scanossbase import ScanossBase
31
+
32
+
33
+ class Osadl(ScanossBase):
34
+ """
35
+ OSADL data accessor class.
36
+
37
+ Provides access to OSADL (Open Source Automation Development Lab) authoritative
38
+ checklist data for license analysis.
39
+
40
+ Data is loaded once at class level and shared across all instances for efficiency.
41
+
42
+ Data source: https://www.osadl.org/fileadmin/checklists/copyleft.json
43
+ License: CC-BY-4.0
44
+ """
45
+
46
+ _shared_copyleft_data = {}
47
+ _data_loaded = False
48
+
49
+ def __init__(self, debug: bool = False, trace: bool = True, quiet: bool = False):
50
+ """
51
+ Initialize the Osadl class.
52
+ Data is loaded once at class level and shared across all instances.
53
+ """
54
+ super().__init__(debug, trace, quiet)
55
+ self._load_copyleft_data()
56
+
57
+
58
+ def _load_copyleft_data(self) -> bool:
59
+ """
60
+ Load the embedded OSADL copyleft JSON file into class-level shared data.
61
+ Data is loaded only once and shared across all instances.
62
+
63
+ :return: True if successful, False otherwise
64
+ """
65
+ if Osadl._data_loaded:
66
+ return True
67
+
68
+ # OSADL copyleft license checklist from: https://www.osadl.org/Checklists
69
+ # Data source: https://www.osadl.org/fileadmin/checklists/copyleft.json
70
+ # License: CC-BY-4.0 (Creative Commons Attribution 4.0 International)
71
+ # Copyright: (C) 2017 - 2024 Open Source Automation Development Lab (OSADL) eG
72
+ try:
73
+ f_name = importlib_resources.files(__name__) / 'data/osadl-copyleft.json'
74
+ with importlib_resources.as_file(f_name) as f:
75
+ with open(f, 'r', encoding='utf-8') as file:
76
+ data = json.load(file)
77
+ except Exception as e:
78
+ self.print_stderr(f'ERROR: Problem loading OSADL copyleft data: {e}')
79
+ return False
80
+
81
+ # Process copyleft data
82
+ copyleft = data.get('copyleft', {})
83
+ if not copyleft:
84
+ self.print_stderr('ERROR: No copyleft data found in OSADL JSON')
85
+ return False
86
+
87
+ # Store in class-level shared dictionary
88
+ for lic_id, status in copyleft.items():
89
+ # Normalize license ID (lowercase) for consistent lookup
90
+ lic_id_lc = lic_id.lower()
91
+ Osadl._shared_copyleft_data[lic_id_lc] = status
92
+
93
+ Osadl._data_loaded = True
94
+ self.print_debug(f'Loaded {len(Osadl._shared_copyleft_data)} OSADL copyleft entries')
95
+ return True
96
+
97
+ def is_copyleft(self, spdx_id: str) -> bool:
98
+ """
99
+ Check if a license is copyleft according to OSADL data.
100
+
101
+ Returns True for both strong copyleft ("Yes") and weak/restricted copyleft ("Yes (restricted)").
102
+
103
+ :param spdx_id: SPDX license identifier
104
+ :return: True if copyleft, False otherwise
105
+ """
106
+ if not spdx_id:
107
+ self.print_debug('No license ID provided for copyleft check')
108
+ return False
109
+
110
+ # Normalize lookup
111
+ spdx_id_lc = spdx_id.lower()
112
+ # Use class-level shared data
113
+ status = Osadl._shared_copyleft_data.get(spdx_id_lc)
114
+
115
+ if not status:
116
+ self.print_debug(f'No OSADL copyleft data for license: {spdx_id}')
117
+ return False
118
+
119
+ # Consider both "Yes" and "Yes (restricted)" as copyleft (case-insensitive)
120
+ return status.lower().startswith('yes')
121
+
122
+
123
+ #
124
+ # End of Osadl Class
125
+ #