PyPI - scanoss - Versions diffs - 1.24.0__tar.gz → 1.25.1__tar.gz - Mend

scanoss 1.24.0tar.gz → 1.25.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

{scanoss-1.24.0/src/scanoss.egg-info → scanoss-1.25.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scanoss
-Version: 1.24.0
+Version: 1.25.1
 Summary: Simple Python library to leverage the SCANOSS APIs
 Home-page: https://scanoss.com
 Author: SCANOSS

{scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/__init__.py RENAMED Viewed

@@ -22,4 +22,4 @@ SPDX-License-Identifier: MIT
   THE SOFTWARE.
 """
-__version__ = '1.24.0'
+__version__ = '1.25.1'

scanoss-1.25.1/src/scanoss/data/build_date.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ date: 20250612124028, utime: 1749732028

{scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/copyleft.py RENAMED Viewed

@@ -23,7 +23,8 @@ SPDX-License-Identifier: MIT
 """
 import json
-from typing import Dict, Any
+from typing import Any, Dict
 from .policy_check import PolicyCheck, PolicyStatus
@@ -33,7 +34,7 @@ class Copyleft(PolicyCheck):
     Inspects components for copyleft licenses
     """
-    def __init__(
+    def __init__( # noqa: PLR0913
         self,
         debug: bool = False,
         trace: bool = True,
@@ -158,6 +159,30 @@ class Copyleft(PolicyCheck):
         self.print_debug(f'Copyleft components: {filtered_components}')
         return filtered_components
+    def _get_components(self):
+        """
+        Extract and process components from results and their dependencies.
+        This method performs the following steps:
+        1. Validates that `self.results` is loaded. Returns `None` if not.
+        2. Extracts file, snippet, and dependency components into a dictionary.
+        3. Converts components to a list and processes their licenses.
+        :return: A list of processed components with license data, or `None` if `self.results` is not set.
+        """
+        if self.results is None:
+            return None
+        components: dict = {}
+        # Extract component and license data from file and dependency results. Both helpers mutate `components`
+        self._get_components_data(self.results, components)
+        self._get_dependencies_data(self.results, components)
+        # Convert to list and process licenses
+        results_list = list(components.values())
+        for component in results_list:
+            component['licenses'] = list(component['licenses'].values())
+        return results_list
     def run(self):
         """
         Run the copyleft license inspection process.

{scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/policy_check.py RENAMED Viewed

@@ -26,9 +26,10 @@ import json
 import os.path
 from abc import abstractmethod
 from enum import Enum
-from typing import Callable, List, Dict, Any
-from .utils.license_utils import LicenseUtil
+from typing import Any, Callable, Dict, List
 from ..scanossbase import ScanossBase
+from .utils.license_utils import LicenseUtil
 class PolicyStatus(Enum):
@@ -87,7 +88,7 @@ class PolicyCheck(ScanossBase):
     VALID_FORMATS = {'md', 'json', 'jira_md'}
-    def __init__(
+    def __init__( # noqa: PLR0913
         self,
         debug: bool = False,
         trace: bool = True,
@@ -165,6 +166,30 @@ class PolicyCheck(ScanossBase):
         """
         pass
+    @abstractmethod
+    def _get_components(self):
+        """
+        Retrieve and process components from the preloaded results.
+        This method performs the following steps:
+        1. Checks if the results have been previously loaded (self.results).
+        2. Extracts and processes components from the loaded results.
+        :return: A list of processed components, or None if an error occurred during any step.
+        Possible reasons for returning None include:
+        - Results not loaded (self.results is None)
+        - Failure to extract components from the results
+        Note:
+        - This method assumes that the results have been previously loaded and stored in self.results.
+        - Implementations must extract components (e.g. via `_get_components_data`,
+          `_get_dependencies_data`, or other helpers).
+        - If `self.results` is `None`, simply return `None`.
+        """
+    pass
     def _append_component(
         self, components: Dict[str, Any], new_component: Dict[str, Any], id: str, status: str
     ) -> Dict[str, Any]:
@@ -181,10 +206,9 @@ class PolicyCheck(ScanossBase):
         :param status: The new component status
         :return: The updated components dictionary
         """
         # Determine the component key and purl based on component type
         if id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
-            purl = new_component['purl'][0]  # Take first purl for these component types
+            purl = new_component['purl'][0]  # Take the first purl for these component types
         else:
             purl = new_component['purl']
@@ -195,14 +219,13 @@ class PolicyCheck(ScanossBase):
             'licenses': {},
             'status': status,
         }
         if not new_component.get('licenses'):
-            self.print_stderr(f'WARNING: Results missing licenses. Skipping.')
+            self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
             return components
         # Process licenses for this component
-        for l in new_component['licenses']:
-            if l.get('name'):
-                spdxid = l['name']
+        for license_item in new_component['licenses']:
+            if license_item.get('name'):
+                spdxid = license_item['name']
                 components[component_key]['licenses'][spdxid] = {
                     'spdxid': spdxid,
                     'copyleft': self.license_util.is_copyleft(spdxid),
@@ -210,71 +233,79 @@ class PolicyCheck(ScanossBase):
                 }
         return components
-    def _get_components_from_results(self, results: Dict[str, Any]) -> list or None:
+    def _get_components_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
         """
-        Process the results dictionary to extract and format component information.
-        This function iterates through the results dictionary, identifying components from
-        different sources (files, snippets, and dependencies). It consolidates this information
-        into a list of unique components, each with its associated licenses and other details.
+        Extract and process file and snippet components from results.
         :param results: A dictionary containing the raw results of a component scan
-        :return: A list of dictionaries, each representing a unique component with its details
+        :param components: Existing components dictionary to update
+        :return: Updated components dictionary with file and snippet data
         """
-        if results is None:
-            self.print_stderr(f'ERROR: Results cannot be empty')
-            return None
-        components = {}
         for component in results.values():
             for c in component:
                 component_id = c.get('id')
                 if not component_id:
-                    self.print_stderr(f'WARNING: Result missing id. Skipping.')
+                    self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
+                    continue
+                ## Skip dependency
+                if component_id == ComponentID.DEPENDENCY.value:
                     continue
                 status = c.get('status')
-                if not component_id:
-                    self.print_stderr(f'WARNING: Result missing status. Skipping.')
+                if not status:
+                    self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
                     continue
                 if component_id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
                     if not c.get('purl'):
-                        self.print_stderr(f'WARNING: Result missing purl. Skipping.')
+                        self.print_debug(f'WARNING: Result missing purl. Skipping: {c}')
                         continue
                     if len(c.get('purl')) <= 0:
-                        self.print_stderr(f'WARNING: Result missing purls. Skipping.')
+                        self.print_debug(f'WARNING: Result missing purls. Skipping: {c}')
                         continue
                     if not c.get('version'):
-                        self.print_stderr(f'WARNING: Result missing version. Skipping.')
+                        self.print_msg(f'WARNING: Result missing version. Skipping: {c}')
                         continue
                     component_key = f'{c["purl"][0]}@{c["version"]}'
-                    # Initialize or update the component entry
                     if component_key not in components:
                         components = self._append_component(components, c, component_id, status)
+            # End component loop
+        # End components loop
+        return components
+    def _get_dependencies_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Extract and process dependency components from results.
-                if c['id'] == ComponentID.DEPENDENCY.value:
+        :param results: A dictionary containing the raw results of a component scan
+        :param components: Existing components dictionary to update
+        :return: Updated components dictionary with dependency data
+        """
+        for component in results.values():
+            for c in component:
+                component_id = c.get('id')
+                if not component_id:
+                    self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
+                    continue
+                status = c.get('status')
+                if not status:
+                    self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
+                    continue
+                if component_id == ComponentID.DEPENDENCY.value:
                     if c.get('dependencies') is None:
                         continue
-                    for d in c['dependencies']:
-                        if not d.get('purl'):
-                            self.print_stderr(f'WARNING: Result missing purl. Skipping.')
-                            continue
-                        if len(d.get('purl')) <= 0:
-                            self.print_stderr(f'WARNING: Result missing purls. Skipping.')
+                    for dependency in c['dependencies']:
+                        if not dependency.get('purl'):
+                            self.print_debug(f'WARNING: Dependency result missing purl. Skipping: {dependency}')
                             continue
-                        if not d.get('version'):
-                            self.print_stderr(f'WARNING: Result missing version. Skipping.')
+                        if not dependency.get('version'):
+                            self.print_msg(f'WARNING: Dependency result missing version. Skipping: {dependency}')
                             continue
-                        component_key = f'{d["purl"]}@{d["version"]}'
+                        component_key = f'{dependency["purl"]}@{dependency["version"]}'
                         if component_key not in components:
-                            components = self._append_component(components, d, component_id, status)
-                    # End of dependencies loop
-                # End if
-            # End of component loop
-        # End of results loop
-        results = list(components.values())
-        for component in results:
-            component['licenses'] = list(component['licenses'].values())
-        return results
+                            components = self._append_component(components, dependency, component_id, status)
+                    # End dependency loop
+            # End component loop
+        # End of result loop
+        return components
     def generate_table(self, headers, rows, centered_columns=None):
         """
@@ -380,30 +411,6 @@ class PolicyCheck(ScanossBase):
                 self.print_stderr(f'ERROR: Problem parsing input JSON: {e}')
         return None
-    def _get_components(self):
-        """
-        Retrieve and process components from the preloaded results.
-        This method performs the following steps:
-        1. Checks if the results have been previously loaded (self.results).
-        2. Extracts and processes components from the loaded results.
-        :return: A list of processed components, or None if an error occurred during any step.
-                 Possible reasons for returning None include:
-                 - Results not loaded (self.results is None)
-                 - Failure to extract components from the results
-        Note:
-        - This method assumes that the results have been previously loaded and stored in self.results.
-        - If results is None, the method returns None without performing any further operations.
-        - The actual processing of components is delegated to the _get_components_from_results method.
-        """
-        if self.results is None:
-            return None
-        components = self._get_components_from_results(self.results)
-        return components
 #
 # End of PolicyCheck Class
 #

{scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/inspection/undeclared_component.py RENAMED Viewed

@@ -23,7 +23,8 @@ SPDX-License-Identifier: MIT
 """
 import json
-from typing import Dict, Any
+from typing import Any, Dict
 from .policy_check import PolicyCheck, PolicyStatus
@@ -33,7 +34,7 @@ class UndeclaredComponent(PolicyCheck):
     Inspects for undeclared components
     """
-    def __init__(
+    def __init__( # noqa: PLR0913
         self,
         debug: bool = False,
         trace: bool = True,
@@ -73,7 +74,7 @@ class UndeclaredComponent(PolicyCheck):
         :return: List of undeclared components
         """
         if components is None:
-            self.print_debug(f'WARNING: No components provided!')
+            self.print_debug('WARNING: No components provided!')
             return None
         undeclared_components = []
         for component in components:
@@ -87,25 +88,35 @@ class UndeclaredComponent(PolicyCheck):
         """
         Get a summary of the undeclared components.
+        :param components: List of all components
+        :return: Component summary markdown
+        """
+        """
+        Get a summary of the undeclared components.
         :param components: List of all components
         :return: Component summary markdown
         """
         if len(components) > 0:
+            json_content = json.dumps(self._generate_scanoss_file(components), indent=2)
             if self.sbom_format == 'settings':
-                json_str = (
-                    json.dumps(self._generate_scanoss_file(components), indent=2)
-                    .replace('\n', '\\n')
-                    .replace('"', '\\"')
+                return (
+                    f'{len(components)} undeclared component(s) were found.\n'
+                    f'Add the following snippet into your `scanoss.json` file\n'
+                    f'{{code:json}}\n'
+                    f'{json_content}\n'
+                    f'{{code}}\n'
                 )
-                return f'{len(components)} undeclared component(s) were found.\nAdd the following snippet into your `scanoss.json` file\n{{code:json}}\n{json.dumps(self._generate_scanoss_file(components), indent=2)}\n{{code}}\n'
             else:
-                json_str = (
-                    json.dumps(self._generate_scanoss_file(components), indent=2)
-                    .replace('\n', '\\n')
-                    .replace('"', '\\"')
+                return (
+                    f'{len(components)} undeclared component(s) were found.\n'
+                    f'Add the following snippet into your `sbom.json` file\n'
+                    f'{{code:json}}\n'
+                    f'{json_content}\n'
+                    f'{{code}}\n'
                 )
-                return f'{len(components)} undeclared component(s) were found.\nAdd the following snippet into your `sbom.json` file\n{{code:json}}\n{json.dumps(self._generate_scanoss_file(components), indent=2)}\n{{code}}\n'
         return f'{len(components)} undeclared component(s) were found.\\n'
     def _get_summary(self, components: list) -> str:
@@ -190,7 +201,7 @@ class UndeclaredComponent(PolicyCheck):
         """
         unique_components = {}
         if components is None:
-            self.print_stderr(f'WARNING: No components provided!')
+            self.print_stderr('WARNING: No components provided!')
             return []
         for component in components:
@@ -225,6 +236,29 @@ class UndeclaredComponent(PolicyCheck):
         return sbom
+    def _get_components(self):
+        """
+        Extract and process components from file results only.
+        This method performs the following steps:
+        1. Validates if `self.results` is loaded. Returns `None` if not loaded.
+        2. Extracts file and snippet components into a dictionary.
+        3. Converts the components dictionary into a list of components.
+        4. Processes the licenses for each component by converting them into a list.
+        :return: A list of processed components with their licenses, or `None` if `self.results` is not set.
+        """
+        if self.results is None:
+            return None
+        components: dict = {}
+        # Extract file and snippet components
+        components = self._get_components_data(self.results, components)
+        # Convert to list and process licenses
+        results_list = list(components.values())
+        for component in results_list:
+            component['licenses'] = list(component['licenses'].values())
+        return results_list
     def run(self):
         """
         Run the undeclared component inspection process.

{scanoss-1.24.0 → scanoss-1.25.1}/src/scanoss/winnowing.py RENAMED Viewed

@@ -32,9 +32,10 @@ import hashlib
 import pathlib
 import platform
 import re
+from typing import Tuple
-from crc32c import crc32c
 from binaryornot.check import is_binary
+from crc32c import crc32c
 from .scanossbase import ScanossBase
@@ -157,7 +158,7 @@ class Winnowing(ScanossBase):
     a list of WFP fingerprints with their corresponding line numbers.
     """
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         size_limit: bool = False,
         debug: bool = False,
@@ -197,6 +198,7 @@ class Winnowing(ScanossBase):
         self.strip_hpsm_ids = strip_hpsm_ids
         self.strip_snippet_ids = strip_snippet_ids
         self.hpsm = hpsm
+        self.is_windows = platform.system() == 'Windows'
         if hpsm:
             self.crc8_maxim_dow_table = []
             self.crc8_generate_table()
@@ -218,11 +220,11 @@ class Winnowing(ScanossBase):
             return byte
         if byte >= ASCII_a:
             return byte
-        if (byte >= 65) and (byte <= 90):
+        if (byte >= ASCII_A) and (byte <= ASCII_Z):
             return byte + 32
         return 0
-    def __skip_snippets(self, file: str, src: str) -> bool:
+    def __skip_snippets(self, file: str, src: str) -> bool:  # noqa: PLR0911
         """
         Determine files that are not of interest based on their content or file extension
         Parameters
@@ -351,7 +353,55 @@ class Winnowing(ScanossBase):
             self.print_debug(f'Stripped snippet ids from {file}')
         return wfp
-    def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:
+    def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool]:
+        """Detect the types of line endings present in file contents.
+        Args:
+            contents: File contents as bytes.
+        Returns:
+            Tuple of (has_crlf, has_lf_only, has_cr_only, has_mixed) indicating which line ending types are present.
+        """
+        has_crlf = b'\r\n' in contents
+        # For LF detection, we need to find LF that's not part of CRLF
+        content_without_crlf = contents.replace(b'\r\n', b'')
+        has_standalone_lf = b'\n' in content_without_crlf
+        # For CR detection, we need to find CR that's not part of CRLF
+        has_standalone_cr = b'\r' in content_without_crlf
+        return has_crlf, has_standalone_lf, has_standalone_cr
+    def __calculate_opposite_line_ending_hash(self, contents: bytes):
+        """Calculate hash for contents with opposite line endings.
+        If the file is primarily Unix (LF), calculates Windows (CRLF) hash.
+        If the file is primarily Windows (CRLF), calculates Unix (LF) hash.
+        Args:
+            contents: File contents as bytes.
+        Returns:
+            Hash with opposite line endings as hex string, or None if no line endings detected.
+        """
+        has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
+        if not has_crlf and not has_standalone_lf and not has_standalone_cr:
+            return None
+        # Normalize all line endings to LF first
+        normalized = contents.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
+        # Determine the dominant line ending type
+        if has_crlf and not has_standalone_lf and not has_standalone_cr:
+            # File is Windows (CRLF) - produce Unix (LF) hash
+            opposite_contents = normalized
+        else:
+            # File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
+            opposite_contents = normalized.replace(b'\n', b'\r\n')
+        return hashlib.md5(opposite_contents).hexdigest()
+    def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:  # noqa: PLR0912, PLR0915
         """
         Generate a Winnowing fingerprint (WFP) for the given file contents
         Parameters
@@ -371,7 +421,7 @@ class Winnowing(ScanossBase):
         content_length = len(contents)
         original_filename = file
-        if platform.system() == 'Windows':
+        if self.is_windows:
             original_filename = file.replace('\\', '/')
         wfp_filename = repr(original_filename).strip("'")  # return a utf-8 compatible version of the filename
         if self.obfuscate:  # hide the real size of the file and its name, but keep the suffix
@@ -380,6 +430,13 @@ class Winnowing(ScanossBase):
             self.file_map[wfp_filename] = original_filename  # Save the file name map for later (reverse lookup)
         wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
+        # Add opposite line ending hash based on line ending analysis
+        if not bin_file:
+            opposite_hash = self.__calculate_opposite_line_ending_hash(contents)
+            if opposite_hash is not None:
+                wfp += f'fh2={opposite_hash}\n'
         # We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
         if bin_file or self.skip_snippets or self.__skip_snippets(file, contents.decode('utf-8', 'ignore')):
             return wfp
@@ -467,7 +524,7 @@ class Winnowing(ScanossBase):
         for i, byte in enumerate(content):
             c = byte
             if c == ASCII_LF:  # When there is a new line
-                if len(list_normalized):
+                if list_normalized:
                     crc_lines.append(self.crc8_buffer(list_normalized))
                     list_normalized = []
                 elif last_line + 1 == i:

{scanoss-1.24.0 → scanoss-1.25.1/src/scanoss.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scanoss
-Version: 1.24.0
+Version: 1.25.1
 Summary: Simple Python library to leverage the SCANOSS APIs
 Home-page: https://scanoss.com
 Author: SCANOSS

scanoss 1.24.0__tar.gz → 1.25.1__tar.gz

scanoss 1.24.0tar.gz → 1.25.1tar.gz