scanoss 1.24.0__tar.gz → 1.25.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {scanoss-1.24.0/src/scanoss.egg-info → scanoss-1.25.1}/PKG-INFO +1 -1
  2. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/__init__.py +1 -1
  3. scanoss-1.25.1/src/scanoss/data/build_date.txt +1 -0
  4. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/copyleft.py +27 -2
  5. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/policy_check.py +79 -72
  6. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/undeclared_component.py +49 -15
  7. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/winnowing.py +64 -7
  8. {scanoss-1.24.0 → scanoss-1.25.1/src/scanoss.egg-info}/PKG-INFO +1 -1
  9. {scanoss-1.24.0 → scanoss-1.25.1}/tests/test_policy_inspect.py +10 -46
  10. scanoss-1.25.1/tests/test_winnowing.py +393 -0
  11. scanoss-1.24.0/src/scanoss/data/build_date.txt +0 -1
  12. scanoss-1.24.0/tests/test_winnowing.py +0 -82
  13. {scanoss-1.24.0 → scanoss-1.25.1}/LICENSE +0 -0
  14. {scanoss-1.24.0 → scanoss-1.25.1}/PACKAGE.md +0 -0
  15. {scanoss-1.24.0 → scanoss-1.25.1}/README.md +0 -0
  16. {scanoss-1.24.0 → scanoss-1.25.1}/pyproject.toml +0 -0
  17. {scanoss-1.24.0 → scanoss-1.25.1}/setup.cfg +0 -0
  18. {scanoss-1.24.0 → scanoss-1.25.1}/src/protoc_gen_swagger/__init__.py +0 -0
  19. {scanoss-1.24.0 → scanoss-1.25.1}/src/protoc_gen_swagger/options/__init__.py +0 -0
  20. {scanoss-1.24.0 → scanoss-1.25.1}/src/protoc_gen_swagger/options/annotations_pb2.py +0 -0
  21. {scanoss-1.24.0 → scanoss-1.25.1}/src/protoc_gen_swagger/options/annotations_pb2_grpc.py +0 -0
  22. {scanoss-1.24.0 → scanoss-1.25.1}/src/protoc_gen_swagger/options/openapiv2_pb2.py +0 -0
  23. {scanoss-1.24.0 → scanoss-1.25.1}/src/protoc_gen_swagger/options/openapiv2_pb2_grpc.py +0 -0
  24. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/__init__.py +0 -0
  25. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/common/__init__.py +0 -0
  26. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/common/v2/__init__.py +0 -0
  27. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/common/v2/scanoss_common_pb2.py +0 -0
  28. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py +0 -0
  29. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/components/__init__.py +0 -0
  30. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/components/v2/__init__.py +0 -0
  31. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/components/v2/scanoss_components_pb2.py +0 -0
  32. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py +0 -0
  33. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +0 -0
  34. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +0 -0
  35. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/dependencies/__init__.py +0 -0
  36. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/dependencies/v2/__init__.py +0 -0
  37. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +0 -0
  38. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +0 -0
  39. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/geoprovenance/__init__.py +0 -0
  40. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/geoprovenance/v2/__init__.py +0 -0
  41. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +0 -0
  42. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +0 -0
  43. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/scanning/__init__.py +0 -0
  44. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/scanning/v2/__init__.py +0 -0
  45. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +0 -0
  46. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +0 -0
  47. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/semgrep/__init__.py +0 -0
  48. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/semgrep/v2/__init__.py +0 -0
  49. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +0 -0
  50. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +0 -0
  51. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/vulnerabilities/__init__.py +0 -0
  52. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/vulnerabilities/v2/__init__.py +0 -0
  53. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +0 -0
  54. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +0 -0
  55. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/cli.py +0 -0
  56. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/components.py +0 -0
  57. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/constants.py +0 -0
  58. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/cryptography.py +0 -0
  59. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/csvoutput.py +0 -0
  60. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/cyclonedx.py +0 -0
  61. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/data/scanoss-settings-schema.json +0 -0
  62. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/data/spdx-exceptions.json +0 -0
  63. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/data/spdx-licenses.json +0 -0
  64. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/file_filters.py +0 -0
  65. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/filecount.py +0 -0
  66. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/__init__.py +0 -0
  67. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/utils/license_utils.py +0 -0
  68. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/results.py +0 -0
  69. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scancodedeps.py +0 -0
  70. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanner.py +0 -0
  71. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanners/__init__.py +0 -0
  72. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanners/container_scanner.py +0 -0
  73. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanners/folder_hasher.py +0 -0
  74. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanners/scanner_config.py +0 -0
  75. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanners/scanner_hfh.py +0 -0
  76. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanoss_settings.py +0 -0
  77. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanossapi.py +0 -0
  78. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanossbase.py +0 -0
  79. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanossgrpc.py +0 -0
  80. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scanpostprocessor.py +0 -0
  81. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/scantype.py +0 -0
  82. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/spdxlite.py +0 -0
  83. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/threadeddependencies.py +0 -0
  84. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/threadedscanning.py +0 -0
  85. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/utils/__init__.py +0 -0
  86. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/utils/abstract_presenter.py +0 -0
  87. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/utils/crc64.py +0 -0
  88. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/utils/file.py +0 -0
  89. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/utils/simhash.py +0 -0
  90. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss.egg-info/SOURCES.txt +0 -0
  91. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss.egg-info/dependency_links.txt +0 -0
  92. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss.egg-info/entry_points.txt +0 -0
  93. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss.egg-info/requires.txt +0 -0
  94. {scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss.egg-info/top_level.txt +0 -0
  95. {scanoss-1.24.0 → scanoss-1.25.1}/tests/test_csv_output.py +0 -0
  96. {scanoss-1.24.0 → scanoss-1.25.1}/tests/test_file_filters.py +0 -0
  97. {scanoss-1.24.0 → scanoss-1.25.1}/tests/test_scan_post_processor.py +0 -0
  98. {scanoss-1.24.0 → scanoss-1.25.1}/tests/test_spdxlite.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.24.0
3
+ Version: 1.25.1
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -22,4 +22,4 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- __version__ = '1.24.0'
25
+ __version__ = '1.25.1'
@@ -0,0 +1 @@
1
+ date: 20250612124028, utime: 1749732028
@@ -23,7 +23,8 @@ SPDX-License-Identifier: MIT
23
23
  """
24
24
 
25
25
  import json
26
- from typing import Dict, Any
26
+ from typing import Any, Dict
27
+
27
28
  from .policy_check import PolicyCheck, PolicyStatus
28
29
 
29
30
 
@@ -33,7 +34,7 @@ class Copyleft(PolicyCheck):
33
34
  Inspects components for copyleft licenses
34
35
  """
35
36
 
36
- def __init__(
37
+ def __init__( # noqa: PLR0913
37
38
  self,
38
39
  debug: bool = False,
39
40
  trace: bool = True,
@@ -158,6 +159,30 @@ class Copyleft(PolicyCheck):
158
159
  self.print_debug(f'Copyleft components: {filtered_components}')
159
160
  return filtered_components
160
161
 
162
+ def _get_components(self):
163
+ """
164
+ Extract and process components from results and their dependencies.
165
+
166
+ This method performs the following steps:
167
+ 1. Validates that `self.results` is loaded. Returns `None` if not.
168
+ 2. Extracts file, snippet, and dependency components into a dictionary.
169
+ 3. Converts components to a list and processes their licenses.
170
+
171
+ :return: A list of processed components with license data, or `None` if `self.results` is not set.
172
+ """
173
+ if self.results is None:
174
+ return None
175
+
176
+ components: dict = {}
177
+ # Extract component and license data from file and dependency results. Both helpers mutate `components`
178
+ self._get_components_data(self.results, components)
179
+ self._get_dependencies_data(self.results, components)
180
+ # Convert to list and process licenses
181
+ results_list = list(components.values())
182
+ for component in results_list:
183
+ component['licenses'] = list(component['licenses'].values())
184
+ return results_list
185
+
161
186
  def run(self):
162
187
  """
163
188
  Run the copyleft license inspection process.
@@ -26,9 +26,10 @@ import json
26
26
  import os.path
27
27
  from abc import abstractmethod
28
28
  from enum import Enum
29
- from typing import Callable, List, Dict, Any
30
- from .utils.license_utils import LicenseUtil
29
+ from typing import Any, Callable, Dict, List
30
+
31
31
  from ..scanossbase import ScanossBase
32
+ from .utils.license_utils import LicenseUtil
32
33
 
33
34
 
34
35
  class PolicyStatus(Enum):
@@ -87,7 +88,7 @@ class PolicyCheck(ScanossBase):
87
88
 
88
89
  VALID_FORMATS = {'md', 'json', 'jira_md'}
89
90
 
90
- def __init__(
91
+ def __init__( # noqa: PLR0913
91
92
  self,
92
93
  debug: bool = False,
93
94
  trace: bool = True,
@@ -165,6 +166,30 @@ class PolicyCheck(ScanossBase):
165
166
  """
166
167
  pass
167
168
 
169
+ @abstractmethod
170
+ def _get_components(self):
171
+ """
172
+ Retrieve and process components from the preloaded results.
173
+
174
+ This method performs the following steps:
175
+ 1. Checks if the results have been previously loaded (self.results).
176
+ 2. Extracts and processes components from the loaded results.
177
+
178
+ :return: A list of processed components, or None if an error occurred during any step.
179
+
180
+ Possible reasons for returning None include:
181
+ - Results not loaded (self.results is None)
182
+ - Failure to extract components from the results
183
+
184
+ Note:
185
+ - This method assumes that the results have been previously loaded and stored in self.results.
186
+ - Implementations must extract components (e.g. via `_get_components_data`,
187
+ `_get_dependencies_data`, or other helpers).
188
+ - If `self.results` is `None`, simply return `None`.
189
+ """
190
+ pass
191
+
192
+
168
193
  def _append_component(
169
194
  self, components: Dict[str, Any], new_component: Dict[str, Any], id: str, status: str
170
195
  ) -> Dict[str, Any]:
@@ -181,10 +206,9 @@ class PolicyCheck(ScanossBase):
181
206
  :param status: The new component status
182
207
  :return: The updated components dictionary
183
208
  """
184
-
185
209
  # Determine the component key and purl based on component type
186
210
  if id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
187
- purl = new_component['purl'][0] # Take first purl for these component types
211
+ purl = new_component['purl'][0] # Take the first purl for these component types
188
212
  else:
189
213
  purl = new_component['purl']
190
214
 
@@ -195,14 +219,13 @@ class PolicyCheck(ScanossBase):
195
219
  'licenses': {},
196
220
  'status': status,
197
221
  }
198
-
199
222
  if not new_component.get('licenses'):
200
- self.print_stderr(f'WARNING: Results missing licenses. Skipping.')
223
+ self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
201
224
  return components
202
225
  # Process licenses for this component
203
- for l in new_component['licenses']:
204
- if l.get('name'):
205
- spdxid = l['name']
226
+ for license_item in new_component['licenses']:
227
+ if license_item.get('name'):
228
+ spdxid = license_item['name']
206
229
  components[component_key]['licenses'][spdxid] = {
207
230
  'spdxid': spdxid,
208
231
  'copyleft': self.license_util.is_copyleft(spdxid),
@@ -210,71 +233,79 @@ class PolicyCheck(ScanossBase):
210
233
  }
211
234
  return components
212
235
 
213
- def _get_components_from_results(self, results: Dict[str, Any]) -> list or None:
236
+ def _get_components_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
214
237
  """
215
- Process the results dictionary to extract and format component information.
216
-
217
- This function iterates through the results dictionary, identifying components from
218
- different sources (files, snippets, and dependencies). It consolidates this information
219
- into a list of unique components, each with its associated licenses and other details.
238
+ Extract and process file and snippet components from results.
220
239
 
221
240
  :param results: A dictionary containing the raw results of a component scan
222
- :return: A list of dictionaries, each representing a unique component with its details
241
+ :param components: Existing components dictionary to update
242
+ :return: Updated components dictionary with file and snippet data
223
243
  """
224
- if results is None:
225
- self.print_stderr(f'ERROR: Results cannot be empty')
226
- return None
227
- components = {}
228
244
  for component in results.values():
229
245
  for c in component:
230
246
  component_id = c.get('id')
231
247
  if not component_id:
232
- self.print_stderr(f'WARNING: Result missing id. Skipping.')
248
+ self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
249
+ continue
250
+ ## Skip dependency
251
+ if component_id == ComponentID.DEPENDENCY.value:
233
252
  continue
234
253
  status = c.get('status')
235
- if not component_id:
236
- self.print_stderr(f'WARNING: Result missing status. Skipping.')
254
+ if not status:
255
+ self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
237
256
  continue
238
257
  if component_id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
239
258
  if not c.get('purl'):
240
- self.print_stderr(f'WARNING: Result missing purl. Skipping.')
259
+ self.print_debug(f'WARNING: Result missing purl. Skipping: {c}')
241
260
  continue
242
261
  if len(c.get('purl')) <= 0:
243
- self.print_stderr(f'WARNING: Result missing purls. Skipping.')
262
+ self.print_debug(f'WARNING: Result missing purls. Skipping: {c}')
244
263
  continue
245
264
  if not c.get('version'):
246
- self.print_stderr(f'WARNING: Result missing version. Skipping.')
265
+ self.print_msg(f'WARNING: Result missing version. Skipping: {c}')
247
266
  continue
248
267
  component_key = f'{c["purl"][0]}@{c["version"]}'
249
- # Initialize or update the component entry
250
268
  if component_key not in components:
251
269
  components = self._append_component(components, c, component_id, status)
270
+ # End component loop
271
+ # End components loop
272
+ return components
273
+
274
+ def _get_dependencies_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
275
+ """
276
+ Extract and process dependency components from results.
252
277
 
253
- if c['id'] == ComponentID.DEPENDENCY.value:
278
+ :param results: A dictionary containing the raw results of a component scan
279
+ :param components: Existing components dictionary to update
280
+ :return: Updated components dictionary with dependency data
281
+ """
282
+ for component in results.values():
283
+ for c in component:
284
+ component_id = c.get('id')
285
+ if not component_id:
286
+ self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
287
+ continue
288
+ status = c.get('status')
289
+ if not status:
290
+ self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
291
+ continue
292
+ if component_id == ComponentID.DEPENDENCY.value:
254
293
  if c.get('dependencies') is None:
255
294
  continue
256
- for d in c['dependencies']:
257
- if not d.get('purl'):
258
- self.print_stderr(f'WARNING: Result missing purl. Skipping.')
259
- continue
260
- if len(d.get('purl')) <= 0:
261
- self.print_stderr(f'WARNING: Result missing purls. Skipping.')
295
+ for dependency in c['dependencies']:
296
+ if not dependency.get('purl'):
297
+ self.print_debug(f'WARNING: Dependency result missing purl. Skipping: {dependency}')
262
298
  continue
263
- if not d.get('version'):
264
- self.print_stderr(f'WARNING: Result missing version. Skipping.')
299
+ if not dependency.get('version'):
300
+ self.print_msg(f'WARNING: Dependency result missing version. Skipping: {dependency}')
265
301
  continue
266
- component_key = f'{d["purl"]}@{d["version"]}'
302
+ component_key = f'{dependency["purl"]}@{dependency["version"]}'
267
303
  if component_key not in components:
268
- components = self._append_component(components, d, component_id, status)
269
- # End of dependencies loop
270
- # End if
271
- # End of component loop
272
- # End of results loop
273
- results = list(components.values())
274
- for component in results:
275
- component['licenses'] = list(component['licenses'].values())
276
-
277
- return results
304
+ components = self._append_component(components, dependency, component_id, status)
305
+ # End dependency loop
306
+ # End component loop
307
+ # End of result loop
308
+ return components
278
309
 
279
310
  def generate_table(self, headers, rows, centered_columns=None):
280
311
  """
@@ -380,30 +411,6 @@ class PolicyCheck(ScanossBase):
380
411
  self.print_stderr(f'ERROR: Problem parsing input JSON: {e}')
381
412
  return None
382
413
 
383
- def _get_components(self):
384
- """
385
- Retrieve and process components from the preloaded results.
386
-
387
- This method performs the following steps:
388
- 1. Checks if the results have been previously loaded (self.results).
389
- 2. Extracts and processes components from the loaded results.
390
-
391
- :return: A list of processed components, or None if an error occurred during any step.
392
- Possible reasons for returning None include:
393
- - Results not loaded (self.results is None)
394
- - Failure to extract components from the results
395
-
396
- Note:
397
- - This method assumes that the results have been previously loaded and stored in self.results.
398
- - If results is None, the method returns None without performing any further operations.
399
- - The actual processing of components is delegated to the _get_components_from_results method.
400
- """
401
- if self.results is None:
402
- return None
403
- components = self._get_components_from_results(self.results)
404
- return components
405
-
406
-
407
414
  #
408
415
  # End of PolicyCheck Class
409
416
  #
@@ -23,7 +23,8 @@ SPDX-License-Identifier: MIT
23
23
  """
24
24
 
25
25
  import json
26
- from typing import Dict, Any
26
+ from typing import Any, Dict
27
+
27
28
  from .policy_check import PolicyCheck, PolicyStatus
28
29
 
29
30
 
@@ -33,7 +34,7 @@ class UndeclaredComponent(PolicyCheck):
33
34
  Inspects for undeclared components
34
35
  """
35
36
 
36
- def __init__(
37
+ def __init__( # noqa: PLR0913
37
38
  self,
38
39
  debug: bool = False,
39
40
  trace: bool = True,
@@ -73,7 +74,7 @@ class UndeclaredComponent(PolicyCheck):
73
74
  :return: List of undeclared components
74
75
  """
75
76
  if components is None:
76
- self.print_debug(f'WARNING: No components provided!')
77
+ self.print_debug('WARNING: No components provided!')
77
78
  return None
78
79
  undeclared_components = []
79
80
  for component in components:
@@ -87,25 +88,35 @@ class UndeclaredComponent(PolicyCheck):
87
88
  """
88
89
  Get a summary of the undeclared components.
89
90
 
91
+ :param components: List of all components
92
+ :return: Component summary markdown
93
+ """
94
+
95
+ """
96
+ Get a summary of the undeclared components.
97
+
90
98
  :param components: List of all components
91
99
  :return: Component summary markdown
92
100
  """
93
101
  if len(components) > 0:
102
+ json_content = json.dumps(self._generate_scanoss_file(components), indent=2)
103
+
94
104
  if self.sbom_format == 'settings':
95
- json_str = (
96
- json.dumps(self._generate_scanoss_file(components), indent=2)
97
- .replace('\n', '\\n')
98
- .replace('"', '\\"')
105
+ return (
106
+ f'{len(components)} undeclared component(s) were found.\n'
107
+ f'Add the following snippet into your `scanoss.json` file\n'
108
+ f'{{code:json}}\n'
109
+ f'{json_content}\n'
110
+ f'{{code}}\n'
99
111
  )
100
- return f'{len(components)} undeclared component(s) were found.\nAdd the following snippet into your `scanoss.json` file\n{{code:json}}\n{json.dumps(self._generate_scanoss_file(components), indent=2)}\n{{code}}\n'
101
112
  else:
102
- json_str = (
103
- json.dumps(self._generate_scanoss_file(components), indent=2)
104
- .replace('\n', '\\n')
105
- .replace('"', '\\"')
113
+ return (
114
+ f'{len(components)} undeclared component(s) were found.\n'
115
+ f'Add the following snippet into your `sbom.json` file\n'
116
+ f'{{code:json}}\n'
117
+ f'{json_content}\n'
118
+ f'{{code}}\n'
106
119
  )
107
- return f'{len(components)} undeclared component(s) were found.\nAdd the following snippet into your `sbom.json` file\n{{code:json}}\n{json.dumps(self._generate_scanoss_file(components), indent=2)}\n{{code}}\n'
108
-
109
120
  return f'{len(components)} undeclared component(s) were found.\\n'
110
121
 
111
122
  def _get_summary(self, components: list) -> str:
@@ -190,7 +201,7 @@ class UndeclaredComponent(PolicyCheck):
190
201
  """
191
202
  unique_components = {}
192
203
  if components is None:
193
- self.print_stderr(f'WARNING: No components provided!')
204
+ self.print_stderr('WARNING: No components provided!')
194
205
  return []
195
206
 
196
207
  for component in components:
@@ -225,6 +236,29 @@ class UndeclaredComponent(PolicyCheck):
225
236
 
226
237
  return sbom
227
238
 
239
+ def _get_components(self):
240
+ """
241
+ Extract and process components from file results only.
242
+
243
+ This method performs the following steps:
244
+ 1. Validates if `self.results` is loaded. Returns `None` if not loaded.
245
+ 2. Extracts file and snippet components into a dictionary.
246
+ 3. Converts the components dictionary into a list of components.
247
+ 4. Processes the licenses for each component by converting them into a list.
248
+
249
+ :return: A list of processed components with their licenses, or `None` if `self.results` is not set.
250
+ """
251
+ if self.results is None:
252
+ return None
253
+ components: dict = {}
254
+ # Extract file and snippet components
255
+ components = self._get_components_data(self.results, components)
256
+ # Convert to list and process licenses
257
+ results_list = list(components.values())
258
+ for component in results_list:
259
+ component['licenses'] = list(component['licenses'].values())
260
+ return results_list
261
+
228
262
  def run(self):
229
263
  """
230
264
  Run the undeclared component inspection process.
@@ -32,9 +32,10 @@ import hashlib
32
32
  import pathlib
33
33
  import platform
34
34
  import re
35
+ from typing import Tuple
35
36
 
36
- from crc32c import crc32c
37
37
  from binaryornot.check import is_binary
38
+ from crc32c import crc32c
38
39
 
39
40
  from .scanossbase import ScanossBase
40
41
 
@@ -157,7 +158,7 @@ class Winnowing(ScanossBase):
157
158
  a list of WFP fingerprints with their corresponding line numbers.
158
159
  """
159
160
 
160
- def __init__(
161
+ def __init__( # noqa: PLR0913
161
162
  self,
162
163
  size_limit: bool = False,
163
164
  debug: bool = False,
@@ -197,6 +198,7 @@ class Winnowing(ScanossBase):
197
198
  self.strip_hpsm_ids = strip_hpsm_ids
198
199
  self.strip_snippet_ids = strip_snippet_ids
199
200
  self.hpsm = hpsm
201
+ self.is_windows = platform.system() == 'Windows'
200
202
  if hpsm:
201
203
  self.crc8_maxim_dow_table = []
202
204
  self.crc8_generate_table()
@@ -218,11 +220,11 @@ class Winnowing(ScanossBase):
218
220
  return byte
219
221
  if byte >= ASCII_a:
220
222
  return byte
221
- if (byte >= 65) and (byte <= 90):
223
+ if (byte >= ASCII_A) and (byte <= ASCII_Z):
222
224
  return byte + 32
223
225
  return 0
224
226
 
225
- def __skip_snippets(self, file: str, src: str) -> bool:
227
+ def __skip_snippets(self, file: str, src: str) -> bool: # noqa: PLR0911
226
228
  """
227
229
  Determine files that are not of interest based on their content or file extension
228
230
  Parameters
@@ -351,7 +353,55 @@ class Winnowing(ScanossBase):
351
353
  self.print_debug(f'Stripped snippet ids from {file}')
352
354
  return wfp
353
355
 
354
- def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:
356
+ def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool]:
357
+ """Detect the types of line endings present in file contents.
358
+
359
+ Args:
360
+ contents: File contents as bytes.
361
+
362
+ Returns:
363
+ Tuple of (has_crlf, has_lf_only, has_cr_only, has_mixed) indicating which line ending types are present.
364
+ """
365
+ has_crlf = b'\r\n' in contents
366
+ # For LF detection, we need to find LF that's not part of CRLF
367
+ content_without_crlf = contents.replace(b'\r\n', b'')
368
+ has_standalone_lf = b'\n' in content_without_crlf
369
+ # For CR detection, we need to find CR that's not part of CRLF
370
+ has_standalone_cr = b'\r' in content_without_crlf
371
+
372
+ return has_crlf, has_standalone_lf, has_standalone_cr
373
+
374
+ def __calculate_opposite_line_ending_hash(self, contents: bytes):
375
+ """Calculate hash for contents with opposite line endings.
376
+
377
+ If the file is primarily Unix (LF), calculates Windows (CRLF) hash.
378
+ If the file is primarily Windows (CRLF), calculates Unix (LF) hash.
379
+
380
+ Args:
381
+ contents: File contents as bytes.
382
+
383
+ Returns:
384
+ Hash with opposite line endings as hex string, or None if no line endings detected.
385
+ """
386
+ has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
387
+
388
+ if not has_crlf and not has_standalone_lf and not has_standalone_cr:
389
+ return None
390
+
391
+ # Normalize all line endings to LF first
392
+ normalized = contents.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
393
+
394
+ # Determine the dominant line ending type
395
+ if has_crlf and not has_standalone_lf and not has_standalone_cr:
396
+ # File is Windows (CRLF) - produce Unix (LF) hash
397
+ opposite_contents = normalized
398
+ else:
399
+ # File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
400
+ opposite_contents = normalized.replace(b'\n', b'\r\n')
401
+
402
+ return hashlib.md5(opposite_contents).hexdigest()
403
+
404
+ def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str: # noqa: PLR0912, PLR0915
355
405
  """
356
406
  Generate a Winnowing fingerprint (WFP) for the given file contents
357
407
  Parameters
@@ -371,7 +421,7 @@ class Winnowing(ScanossBase):
371
421
  content_length = len(contents)
372
422
  original_filename = file
373
423
 
374
- if platform.system() == 'Windows':
424
+ if self.is_windows:
375
425
  original_filename = file.replace('\\', '/')
376
426
  wfp_filename = repr(original_filename).strip("'") # return a utf-8 compatible version of the filename
377
427
  if self.obfuscate: # hide the real size of the file and its name, but keep the suffix
@@ -380,6 +430,13 @@ class Winnowing(ScanossBase):
380
430
  self.file_map[wfp_filename] = original_filename # Save the file name map for later (reverse lookup)
381
431
 
382
432
  wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
433
+
434
+ # Add opposite line ending hash based on line ending analysis
435
+ if not bin_file:
436
+ opposite_hash = self.__calculate_opposite_line_ending_hash(contents)
437
+ if opposite_hash is not None:
438
+ wfp += f'fh2={opposite_hash}\n'
439
+
383
440
  # We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
384
441
  if bin_file or self.skip_snippets or self.__skip_snippets(file, contents.decode('utf-8', 'ignore')):
385
442
  return wfp
@@ -467,7 +524,7 @@ class Winnowing(ScanossBase):
467
524
  for i, byte in enumerate(content):
468
525
  c = byte
469
526
  if c == ASCII_LF: # When there is a new line
470
- if len(list_normalized):
527
+ if list_normalized:
471
528
  crc_lines.append(self.crc8_buffer(list_normalized))
472
529
  list_normalized = []
473
530
  elif last_line + 1 == i:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.24.0
3
+ Version: 1.25.1
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS