PyPI - scanoss - Versions diffs - 1.14.0__py3-none-any.whl → 1.16.0__py3-none-any.whl - Mend

scanoss 1.14.0py3-none-any.whl → 1.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

scanoss/__init__.py +1 -1
scanoss/cli.py +219 -48
scanoss/cyclonedx.py +13 -0
scanoss/data/build_date.txt +1 -1
scanoss/results.py +301 -0
scanoss/scancodedeps.py +29 -3
scanoss/scanner.py +42 -30
scanoss/scanoss_settings.py +189 -0
scanoss/scanossapi.py +10 -20
scanoss/scanossbase.py +21 -0
scanoss/scanpostprocessor.py +159 -0
scanoss/threadeddependencies.py +75 -5
{scanoss-1.14.0.dist-info → scanoss-1.16.0.dist-info}/METADATA +1 -1
{scanoss-1.14.0.dist-info → scanoss-1.16.0.dist-info}/RECORD +18 -15
{scanoss-1.14.0.dist-info → scanoss-1.16.0.dist-info}/WHEEL +1 -1
{scanoss-1.14.0.dist-info → scanoss-1.16.0.dist-info}/LICENSE +0 -0
{scanoss-1.14.0.dist-info → scanoss-1.16.0.dist-info}/entry_points.txt +0 -0
{scanoss-1.14.0.dist-info → scanoss-1.16.0.dist-info}/top_level.txt +0 -0

scanoss/results.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""
+ SPDX-License-Identifier: MIT
+   Copyright (c) 2024, SCANOSS
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+"""
+import json
+from typing import Any, Dict, List
+from .scanossbase import ScanossBase
+MATCH_TYPES = ["file", "snippet"]
+STATUSES = ["pending", "identified"]
+AVAILABLE_FILTER_VALUES = {
+    "match_type": [e for e in MATCH_TYPES],
+    "status": [e for e in STATUSES],
+}
+ARG_TO_FILTER_MAP = {
+    "match_type": "id",
+    "status": "status",
+}
+PENDING_IDENTIFICATION_FILTERS = {
+    "match_type": ["file", "snippet"],
+    "status": ["pending"],
+}
+AVAILABLE_OUTPUT_FORMATS = ["json", "plain"]
+class Results(ScanossBase):
+    """
+    SCANOSS Results class \n
+    Handles the parsing and filtering of the scan results
+    """
+    def __init__(
+        self,
+        debug: bool = False,
+        trace: bool = False,
+        quiet: bool = False,
+        filepath: str = None,
+        match_type: str = None,
+        status: str = None,
+        output_file: str = None,
+        output_format: str = None,
+    ):
+        """Initialise the Results class
+        Args:
+            debug (bool, optional): Debug. Defaults to False.
+            trace (bool, optional): Trace. Defaults to False.
+            quiet (bool, optional): Quiet. Defaults to False.
+            filepath (str, optional): Path to the scan results file. Defaults to None.
+            match_type (str, optional): Comma separated match type filters. Defaults to None.
+            status (str, optional): Comma separated status filters. Defaults to None.
+            output_file (str, optional): Path to the output file. Defaults to None.
+            output_format (str, optional): Output format. Defaults to None.
+        """
+        super().__init__(debug, trace, quiet)
+        self.data = self._load_and_transform(filepath)
+        self.filters = self._load_filters(match_type=match_type, status=status)
+        self.output_file = output_file
+        self.output_format = output_format
+    def _load_file(self, file: str) -> Dict[str, Any]:
+        """Load the JSON file
+        Args:
+            file (str): Path to the JSON file
+        Returns:
+            Dict[str, Any]: The parsed JSON data
+        """
+        with open(file, "r") as jsonfile:
+            try:
+                return json.load(jsonfile)
+            except Exception as e:
+                self.print_stderr(f"ERROR: Problem parsing input JSON: {e}")
+    def _load_and_transform(self, file: str) -> List[Dict[str, Any]]:
+        """
+        Load the file and transform the data into a list of dictionaries with the filename and the file data
+        """
+        raw_data = self._load_file(file)
+        return self._transform_data(raw_data)
+    @staticmethod
+    def _transform_data(data: dict) -> list:
+        """Transform the data into a list of dictionaries with the filename and the file data
+        Args:
+            data (dict): The raw data
+        Returns:
+            list: The transformed data
+        """
+        result = []
+        for filename, file_data in data.items():
+            if file_data:
+                file_obj = {'filename': filename}
+                file_obj.update(file_data[0])
+                result.append(file_obj)
+        return result
+    def _load_filters(self, **kwargs):
+        """Extract and parse the filters
+        Returns:
+            dict: Parsed filters
+        """
+        filters = {}
+        for key, value in kwargs.items():
+            if value:
+                filters[key] = self._extract_comma_separated_values(value)
+        return filters
+    @staticmethod
+    def _extract_comma_separated_values(values: str):
+        return [value.strip() for value in values.split(",")]
+    def apply_filters(self):
+        """Apply the filters to the data"""
+        filtered_data = []
+        for item in self.data:
+            if self._item_matches_filters(item):
+                filtered_data.append(item)
+        self.data = filtered_data
+        return self
+    def _item_matches_filters(self, item):
+        for filter_key, filter_values in self.filters.items():
+            if not filter_values:
+                continue
+            self._validate_filter_values(filter_key, filter_values)
+            item_value = item.get(ARG_TO_FILTER_MAP[filter_key])
+            if isinstance(filter_values, list):
+                if item_value not in filter_values:
+                    return False
+            elif item_value != filter_values:
+                return False
+        return True
+    @staticmethod
+    def _validate_filter_values(filter_key: str, filter_value: List[str]):
+        if any(
+            value not in AVAILABLE_FILTER_VALUES.get(filter_key, [])
+            for value in filter_value
+        ):
+            valid_values = ", ".join(AVAILABLE_FILTER_VALUES.get(filter_key, []))
+            raise Exception(
+                f"ERROR: Invalid filter value '{filter_value}' for filter '{filter_key.value}'. "
+                f"Valid values are: {valid_values}"
+            )
+    def get_pending_identifications(self):
+        """Get files with 'pending' status and 'file' or 'snippet' match type"""
+        self.filters = PENDING_IDENTIFICATION_FILTERS
+        self.apply_filters()
+        return self
+    def has_results(self):
+        return bool(self.data)
+    def present(self, output_format: str = None, output_file: str = None):
+        """Format and present the results. If no output format is provided, the results will be printed to stdout
+        Args:
+            output_format (str, optional): Output format. Defaults to None.
+            output_file (str, optional): Output file. Defaults to None.
+        Raises:
+            Exception: Invalid output format
+        Returns:
+            None
+        """
+        file_path = output_file or self.output_file
+        fmt = output_format or self.output_format
+        if fmt and fmt not in AVAILABLE_OUTPUT_FORMATS:
+            raise Exception(
+                f"ERROR: Invalid output format '{output_format}'. Valid values are: {', '.join(AVAILABLE_OUTPUT_FORMATS)}"
+            )
+        if fmt == 'json':
+            return self._present_json(file_path)
+        elif fmt == 'plain':
+            return self._present_plain(file_path)
+        else:
+            return self._present_stdout()
+    def _present_json(self, file: str = None):
+        """Present the results in JSON format
+        Args:
+            file (str, optional): Output file. Defaults to None.
+        """
+        self.print_to_file_or_stdout(
+            json.dumps(self._format_json_output(), indent=2), file
+        )
+    def _format_json_output(self):
+        """
+        Format the output data into a JSON object
+        """
+        formatted_data = []
+        for item in self.data:
+            formatted_data.append(
+                {
+                    'file': item.get('filename'),
+                    'status': item.get('status', "N/A"),
+                    'match_type': item['id'],
+                    'matched': item.get('matched', "N/A"),
+                    'purl': (item.get('purl')[0] if item.get('purl') else "N/A"),
+                    'license': (
+                        item.get('licenses')[0].get('name', "N/A")
+                        if item.get('licenses')
+                        else "N/A"
+                    ),
+                }
+            )
+        return {'results': formatted_data, 'total': len(formatted_data)}
+    def _present_plain(self, file: str = None):
+        """Present the results in plain text format
+        Args:
+            file (str, optional): Output file. Defaults to None.
+        Returns:
+            None
+        """
+        if not self.data:
+            return self.print_stderr("No results to present")
+        self.print_to_file_or_stdout(self._format_plain_output(), file)
+    def _present_stdout(self):
+        """Present the results to stdout
+        Returns:
+            None
+        """
+        if not self.data:
+            return self.print_stderr("No results to present")
+        self.print_to_file_or_stdout(self._format_plain_output())
+    def _format_plain_output(self):
+        """
+        Format the output data into a plain text string
+        """
+        formatted = ""
+        for item in self.data:
+            formatted += f"{self._format_plain_output_item(item)} \n"
+        return formatted
+    @staticmethod
+    def _format_plain_output_item(item):
+        purls = item.get('purl', [])
+        licenses = item.get('licenses', [])
+        return (
+            f"File: {item.get('filename')}\n"
+            f"Match type: {item.get('id')}\n"
+            f"Status: {item.get('status', 'N/A')}\n"
+            f"Matched: {item.get('matched', 'N/A')}\n"
+            f"Purl: {purls[0] if purls else 'N/A'}\n"
+            f"License: {licenses[0].get('name', 'N/A') if licenses else 'N/A'}\n"
+        )

scanoss/scancodedeps.py CHANGED Viewed

@@ -59,6 +59,7 @@ class ScancodeDeps(ScanossBase):
         else:
             print(string)
     def remove_interim_file(self, output_file: str = None):
         """
         Remove the temporary Scancode interim file
@@ -105,15 +106,17 @@ class ScancodeDeps(ScanossBase):
                             continue
                     self.print_debug(f'Path: {f_path}, Packages: {len(f_packages)}')
                     purls = []
+                    scopes = []
                     for pkgs in f_packages:
                         pk_deps = pkgs.get('dependencies')
                         if not pk_deps or pk_deps == '':
                             continue
-                        self.print_debug(f'Path: {f_path}, Dependencies: {len(pk_deps)}')
                         for d in pk_deps:
                             dp = d.get('purl')
                             if not dp or dp == '':
                                 continue
                             dp = dp.replace('"', '').replace('%22', '')  # remove unwanted quotes on purls
                             dp_data = {'purl': dp}
                             rq = d.get('extracted_requirement')  # scancode format 2.0
@@ -122,15 +125,21 @@ class ScancodeDeps(ScanossBase):
                             # skip requirement if it ends with the purl (i.e. exact version) or if it's local (file)
                             if rq and rq != '' and not dp.endswith(rq) and not rq.startswith('file:'):
                                 dp_data['requirement'] = rq
+                            # Gets dependency scope
+                            scope = d.get('scope')
+                            if scope and scope != '':
+                                dp_data['scope'] = scope
                             purls.append(dp_data)
-                        # self.print_stderr(f'Path: {f_path}, Purls: {purls}')
+                        # end for loop
                     if len(purls) > 0:
                         files.append({'file': f_path, 'purls': purls})
                     # End packages
                 # End file details
         # End dependencies json
         deps = {'files': files}
-        # self.print_debug(f'Dep Data: {deps}')
         return deps
     def produce_from_file(self, json_file: str = None) -> json:
@@ -179,6 +188,7 @@ class ScancodeDeps(ScanossBase):
             return False
         self.print_msg('Producing summary...')
         deps = self.produce_from_file(output_file)
+        deps = self.__remove_dep_scope(deps)
         self.remove_interim_file(output_file)
         if not deps:
             return False
@@ -235,6 +245,22 @@ class ScancodeDeps(ScanossBase):
                 self.print_stderr(f'ERROR: Problem loading input JSON: {e}')
         return None
+    @staticmethod
+    def __remove_dep_scope(deps: json)->json:
+        """
+        :param deps: dependencies with scopes
+        :return dependencies without scopes
+        """
+        files = deps.get("files")
+        for file in files:
+            if 'purls' in file:
+                purls = file.get("purls")
+                for purl in purls:
+                    purl.pop("scope",None)
+        return {"files": files }
 #
 # End of ScancodeDeps Class
 #

scanoss/scanner.py CHANGED Viewed

@@ -37,15 +37,17 @@ from .spdxlite import SpdxLite
 from .csvoutput import CsvOutput
 from .threadedscanning import ThreadedScanning
 from .scancodedeps import ScancodeDeps
-from .threadeddependencies import ThreadedDependencies
+from .threadeddependencies import ThreadedDependencies, SCOPE
 from .scanossgrpc import ScanossGrpc
 from .scantype import ScanType
 from .scanossbase import ScanossBase
+from .scanoss_settings import ScanossSettings
+from .scanpostprocessor import ScanPostProcessor
 from . import __version__
 FAST_WINNOWING = False
 try:
-    from scanoss_winnowing.winnowing import Winnowing
+    from .winnowing import Winnowing
     FAST_WINNOWING = True
 except ModuleNotFoundError or ImportError:
@@ -95,17 +97,18 @@ class Scanner(ScanossBase):
     def __init__(self, wfp: str = None, scan_output: str = None, output_format: str = 'plain',
                  debug: bool = False, trace: bool = False, quiet: bool = False, api_key: str = None, url: str = None,
-                 sbom_path: str = None, scan_type: str = None, flags: str = None, nb_threads: int = 5,
+                 flags: str = None, nb_threads: int = 5,
                  post_size: int = 32, timeout: int = 180, no_wfp_file: bool = False,
                  all_extensions: bool = False, all_folders: bool = False, hidden_files_folders: bool = False,
                  scan_options: int = 7, sc_timeout: int = 600, sc_command: str = None, grpc_url: str = None,
                  obfuscate: bool = False, ignore_cert_errors: bool = False, proxy: str = None, grpc_proxy: str = None,
                  ca_cert: str = None, pac: PACFile = None, retry: int = 5, hpsm: bool = False,
                  skip_size: int = 0, skip_extensions=None, skip_folders=None,
-                 strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None
+                 strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None,
+                 scan_settings: ScanossSettings = None
                  ):
         """
-        Initialise scanning class, including Winnowing, ScanossApi and ThreadedScanning
+        Initialise scanning class, including Winnowing, ScanossApi, ThreadedScanning
         """
         super().__init__(debug, trace, quiet)
         if skip_folders is None:
@@ -133,7 +136,7 @@ class Scanner(ScanossBase):
                                    skip_md5_ids=skip_md5_ids
                                    )
         self.scanoss_api = ScanossApi(debug=debug, trace=trace, quiet=quiet, api_key=api_key, url=url,
-                                      sbom_path=sbom_path, scan_type=scan_type, flags=flags, timeout=timeout,
+                                      flags=flags, timeout=timeout,
                                       ver_details=ver_details, ignore_cert_errors=ignore_cert_errors,
                                       proxy=proxy, ca_cert=ca_cert, pac=pac, retry=retry
                                       )
@@ -157,6 +160,16 @@ class Scanner(ScanossBase):
         if skip_extensions:  # Append extra file extensions to skip
             self.skip_extensions.extend(skip_extensions)
+        if scan_settings:
+            self.scan_settings = scan_settings
+            self.post_processor = ScanPostProcessor(scan_settings, debug=debug, trace=trace, quiet=quiet)
+            self._maybe_set_api_sbom()
+    def _maybe_set_api_sbom(self):
+        sbom = self.scan_settings.get_sbom()
+        if sbom:
+            self.scanoss_api.set_sbom(sbom)
     def __filter_files(self, files: list) -> list:
         """
         Filter which files should be considered for processing
@@ -329,14 +342,20 @@ class Scanner(ScanossBase):
             return True
         return False
-    def scan_folder_with_options(self, scan_dir: str, deps_file: str = None, file_map: dict = None) -> bool:
+    def scan_folder_with_options(self, scan_dir: str, deps_file: str = None, file_map: dict = None,
+                                 dep_scope: SCOPE = None, dep_scope_include: str = None,
+                                 dep_scope_exclude: str = None) -> bool:
         """
         Scan the given folder for whatever scaning options that have been configured
+        :param dep_scope_exclude: comma separated list of dependency scopes to exclude
+        :param dep_scope_include: comma separated list of dependency scopes to include
+        :param dep_scope: Enum dependency scope to use
         :param scan_dir: directory to scan
         :param deps_file: pre-parsed dependency file to decorate
         :param file_map: mapping of obfuscated files back into originals
         :return: True if successful, False otherwise
         """
         success = True
         if not scan_dir:
             raise Exception(f"ERROR: Please specify a folder to scan")
@@ -348,7 +367,8 @@ class Scanner(ScanossBase):
         if self.scan_output:
             self.print_msg(f'Writing results to {self.scan_output}...')
         if self.is_dependency_scan():
-            if not self.threaded_deps.run(what_to_scan=scan_dir, deps_file=deps_file, wait=False):  # Kick off a background dependency scan
+            if not self.threaded_deps.run(what_to_scan=scan_dir, deps_file=deps_file, wait=False, dep_scope=dep_scope,
+                                          dep_scope_include= dep_scope_include, dep_scope_exclude=dep_scope_exclude):  # Kick off a background dependency scan
                 success = False
         if self.is_file_or_snippet_scan():
             if not self.scan_folder(scan_dir):
@@ -524,43 +544,34 @@ class Scanner(ScanossBase):
                                 raw_output += ",\n  \"%s\":[%s]" % (file, json.dumps(dep_file, indent=2))
                     # End for loop
         raw_output += "\n}"
-        parsed_json = None
         try:
-            parsed_json = json.loads(raw_output)
+            raw_results = json.loads(raw_output)
         except Exception as e:
-            self.print_stderr(f'Warning: Problem decoding parsed json: {e}')
+            raise Exception(f'ERROR: Problem decoding parsed json: {e}')
+        results = self.post_processor.load_results(raw_results).post_process()
         if self.output_format == 'plain':
-            if parsed_json:
-                self.__log_result(json.dumps(parsed_json, indent=2, sort_keys=True))
-            else:
-                self.__log_result(raw_output)
+            self.__log_result(json.dumps(results, indent=2, sort_keys=True))
         elif self.output_format == 'cyclonedx':
             cdx = CycloneDx(self.debug, self.scan_output)
-            if parsed_json:
-                success = cdx.produce_from_json(parsed_json)
-            else:
-                success = cdx.produce_from_str(raw_output)
+            success = cdx.produce_from_json(results)
         elif self.output_format == 'spdxlite':
             spdxlite = SpdxLite(self.debug, self.scan_output)
-            if parsed_json:
-                success = spdxlite.produce_from_json(parsed_json)
-            else:
-                success = spdxlite.produce_from_str(raw_output)
+            success = spdxlite.produce_from_json(results)
         elif self.output_format == 'csv':
             csvo = CsvOutput(self.debug, self.scan_output)
-            if parsed_json:
-                success = csvo.produce_from_json(parsed_json)
-            else:
-                success = csvo.produce_from_str(raw_output)
+            success = csvo.produce_from_json(results)
         else:
             self.print_stderr(f'ERROR: Unknown output format: {self.output_format}')
             success = False
         return success
-    def scan_file_with_options(self, file: str, deps_file: str = None, file_map: dict = None) -> bool:
+    def scan_file_with_options(self, file: str, deps_file: str = None, file_map: dict = None, dep_scope: SCOPE = None,
+                               dep_scope_include: str = None, dep_scope_exclude: str = None) -> bool:
         """
         Scan the given file for whatever scaning options that have been configured
+        :param dep_scope:
         :param file: file to scan
         :param deps_file: pre-parsed dependency file to decorate
         :param file_map: mapping of obfuscated files back into originals
@@ -577,7 +588,8 @@ class Scanner(ScanossBase):
         if self.scan_output:
             self.print_msg(f'Writing results to {self.scan_output}...')
         if self.is_dependency_scan():
-            if not self.threaded_deps.run(what_to_scan=file, deps_file=deps_file, wait=False):  # Kick off a background dependency scan
+            if not self.threaded_deps.run(what_to_scan=file, deps_file=deps_file, wait=False, dep_scope=dep_scope,
+                                          dep_scope_include=dep_scope_include, dep_scope_exclude=dep_scope_exclude):  # Kick off a background dependency scan
                 success = False
         if self.is_file_or_snippet_scan():
             if not self.scan_file(file):
@@ -713,7 +725,7 @@ class Scanner(ScanossBase):
         else:
             Scanner.print_stderr(f'Warning: No files found to scan from: {filtered_files}')
         return success
     def scan_files_with_options(self, files: [], deps_file: str = None, file_map: dict = None) -> bool:
         """
         Scan the given list of files for whatever scaning options that have been configured

scanoss 1.14.0__py3-none-any.whl → 1.16.0__py3-none-any.whl

scanoss 1.14.0py3-none-any.whl → 1.16.0py3-none-any.whl