PyPI - scanoss - Versions diffs - 1.12.2__py3-none-any.whl → 1.43.1__py3-none-any.whl - Mend

scanoss 1.12.2py3-none-any.whl → 1.43.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

protoc_gen_swagger/__init__.py +13 -13
protoc_gen_swagger/options/__init__.py +13 -13
protoc_gen_swagger/options/annotations_pb2.py +18 -12
protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
scanoss/__init__.py +18 -18
scanoss/api/__init__.py +17 -17
scanoss/api/common/__init__.py +17 -17
scanoss/api/common/v2/__init__.py +17 -17
scanoss/api/common/v2/scanoss_common_pb2.py +49 -20
scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
scanoss/api/components/__init__.py +17 -17
scanoss/api/components/v2/__init__.py +17 -17
scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -21
scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +766 -13
scanoss/api/dependencies/__init__.py +17 -17
scanoss/api/dependencies/v2/__init__.py +17 -17
scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -29
scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +94 -8
scanoss/api/geoprovenance/__init__.py +23 -0
scanoss/api/geoprovenance/v2/__init__.py +23 -0
scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +92 -0
scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +381 -0
scanoss/api/licenses/__init__.py +23 -0
scanoss/api/licenses/v2/__init__.py +23 -0
scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
scanoss/api/scanning/__init__.py +17 -17
scanoss/api/scanning/v2/__init__.py +17 -17
scanoss/api/scanning/v2/scanoss_scanning_pb2.py +42 -13
scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +86 -7
scanoss/api/semgrep/__init__.py +17 -17
scanoss/api/semgrep/v2/__init__.py +17 -17
scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
scanoss/api/vulnerabilities/__init__.py +17 -17
scanoss/api/vulnerabilities/v2/__init__.py +17 -17
scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
scanoss/cli.py +2359 -370
scanoss/components.py +187 -94
scanoss/constants.py +22 -0
scanoss/cryptography.py +308 -0
scanoss/csvoutput.py +91 -58
scanoss/cyclonedx.py +221 -63
scanoss/data/build_date.txt +1 -1
scanoss/data/osadl-copyleft.json +133 -0
scanoss/data/scanoss-settings-schema.json +254 -0
scanoss/delta.py +197 -0
scanoss/export/__init__.py +23 -0
scanoss/export/dependency_track.py +227 -0
scanoss/file_filters.py +582 -0
scanoss/filecount.py +75 -69
scanoss/gitlabqualityreport.py +214 -0
scanoss/header_filter.py +563 -0
scanoss/inspection/__init__.py +23 -0
scanoss/inspection/policy_check/__init__.py +0 -0
scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
scanoss/inspection/policy_check/policy_check.py +222 -0
scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
scanoss/inspection/policy_check/scanoss/copyleft.py +243 -0
scanoss/inspection/policy_check/scanoss/undeclared_component.py +309 -0
scanoss/inspection/summary/__init__.py +0 -0
scanoss/inspection/summary/component_summary.py +170 -0
scanoss/inspection/summary/license_summary.py +191 -0
scanoss/inspection/summary/match_summary.py +341 -0
scanoss/inspection/utils/file_utils.py +44 -0
scanoss/inspection/utils/license_utils.py +123 -0
scanoss/inspection/utils/markdown_utils.py +63 -0
scanoss/inspection/utils/scan_result_processor.py +417 -0
scanoss/osadl.py +125 -0
scanoss/results.py +275 -0
scanoss/scancodedeps.py +87 -38
scanoss/scanner.py +431 -539
scanoss/scanners/__init__.py +23 -0
scanoss/scanners/container_scanner.py +476 -0
scanoss/scanners/folder_hasher.py +358 -0
scanoss/scanners/scanner_config.py +73 -0
scanoss/scanners/scanner_hfh.py +252 -0
scanoss/scanoss_settings.py +337 -0
scanoss/scanossapi.py +140 -101
scanoss/scanossbase.py +59 -22
scanoss/scanossgrpc.py +799 -251
scanoss/scanpostprocessor.py +294 -0
scanoss/scantype.py +22 -21
scanoss/services/dependency_track_service.py +132 -0
scanoss/spdxlite.py +532 -174
scanoss/threadeddependencies.py +148 -47
scanoss/threadedscanning.py +53 -37
scanoss/utils/__init__.py +23 -0
scanoss/utils/abstract_presenter.py +103 -0
scanoss/utils/crc64.py +96 -0
scanoss/utils/file.py +84 -0
scanoss/utils/scanoss_scan_results_utils.py +41 -0
scanoss/utils/simhash.py +198 -0
scanoss/winnowing.py +241 -63
{scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/METADATA +18 -9
scanoss-1.43.1.dist-info/RECORD +110 -0
{scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/WHEEL +1 -1
scanoss-1.12.2.dist-info/RECORD +0 -58
{scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
{scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info/licenses}/LICENSE +0 -0
{scanoss-1.12.2.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0

scanoss/spdxlite.py CHANGED Viewed

@@ -1,34 +1,37 @@
 """
- SPDX-License-Identifier: MIT
-   Copyright (c) 2021, SCANOSS
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to deal
-   in the Software without restriction, including without limitation the rights
-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-   copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-   The above copyright notice and this permission notice shall be included in
-   all copies or substantial portions of the Software.
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-   THE SOFTWARE.
+SPDX-License-Identifier: MIT
+  Copyright (c) 2021, SCANOSS
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
 """
-import json
-import os.path
-import sys
-import hashlib
 import datetime
 import getpass
+import hashlib
+import json
+import os.path
 import re
-import pkg_resources
+import sys
+import importlib_resources
+from packageurl import PackageURL
 from . import __version__
@@ -68,76 +71,192 @@ class SpdxLite:
         :param data: json - JSON object
         :return: summary dictionary
         """
-        if not data:
+        if data is None:
             self.print_stderr('ERROR: No JSON data provided to parse.')
             return None
-        self.print_debug(f'Processing raw results into summary format...')
+        if len(data) == 0:
+            self.print_debug('Warning: Empty scan results provided. Returning empty summary.')
+            return {}
+        self.print_debug('Processing raw results into summary format...')
+        return self._process_files(data)
+    def _process_files(self, data: json) -> dict:
+        """
+            Process raw results and build a component summary.
+            Args:
+                data: JSON data containing raw results
+            Returns:
+                dict: The built summary dictionary
+        """
         summary = {}
-        for f in data:
-            file_details = data.get(f)
-            # print(f'File: {f}: {file_details}\n')
-            for d in file_details:
-                id_details = d.get("id")
-                if not id_details or id_details == 'none':  # Ignore files with no ids
-                    continue
-                purl = None
-                if id_details == 'dependency':  # Process dependency data
-                    dependencies = d.get("dependencies")
-                    if not dependencies:
-                        self.print_stderr(f'Warning: No Dependencies found for {f}: {file_details}')
-                        continue
-                    for deps in dependencies:
-                        # print(f'File: {f} Deps: {deps}')
-                        purl = deps.get("purl")
-                        if not purl:
-                            self.print_stderr(f'Warning: No PURL found for {f}: {deps}')
-                            continue
-                        if summary.get(purl):
-                            self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
-                            continue
-                        fd = {}
-                        for field in ['component', 'version', 'url']:
-                            fd[field] = deps.get(field, '')
-                        licenses = deps.get('licenses')
-                        fdl = []
-                        dc = []
-                        for lic in licenses:
-                            name = lic.get("name")
-                            if name not in dc:  # Only save the license name once
-                                fdl.append({'id': name})
-                                dc.append(name)
-                        fd['licenses'] = fdl
-                        summary[purl] = fd
-                else:  # Normal file id type
-                    purls = d.get('purl')
-                    if not purls:
-                        self.print_stderr(f'Purl block missing for {f}: {file_details}')
-                        continue
-                    for p in purls:
-                        self.print_debug(f'Purl: {p}')
-                        purl = p
-                        break
-                    if not purl:
-                        self.print_stderr(f'Warning: No PURL found for {f}: {file_details}')
-                        continue
-                    if summary.get(purl):
-                        self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
-                        continue
-                    fd = {}
-                    for field in ['id', 'vendor', 'component', 'version', 'latest', 'url']:
-                        fd[field] = d.get(field)
-                    licenses = d.get('licenses')
-                    fdl = []
-                    dc = []
-                    for lic in licenses:
-                        name = lic.get("name")
-                        if name not in dc:  # Only save the license name once
-                            fdl.append({'id': name})
-                            dc.append(name)
-                    fd['licenses'] = fdl
-                    summary[purl] = fd
+        for file_path in data:
+            file_details = data.get(file_path)
+            # summary is passed by reference and modified inside the function
+            self._process_entries(file_path, file_details, summary)
         return summary
+    def _process_entries(self, file_path: str, file_details: list, summary: dict):
+        """
+        Process entries for a single file.
+        Args:
+            file_path: Path to the file being processed
+            file_details: Results of the file
+            summary: Reference to summary dictionary that will be modified in place
+        """
+        for entry in file_details:
+            id_details = entry.get('id')
+            if not id_details or id_details == 'none':
+                continue
+            if id_details == 'dependency':
+                self._process_dependency_entry(file_path, entry, summary)
+            else:
+                self._process_file_entry(file_path, entry, summary)
+    def _process_dependency_entry(self, file_path: str, entry: dict, summary: dict):
+        """
+        Process a dependency type entry.
+        Args:
+            file_path: Path to the file being processed
+            entry: The dependency entry to process
+            summary: Reference to summary dictionary that will be modified in place
+        """
+        dependencies = entry.get('dependencies')
+        if not dependencies:
+            self.print_stderr(f'Warning: No Dependencies found for {file_path}')
+            return
+        for dep in dependencies:
+            purl = dep.get('purl')
+            if not self._is_valid_purl(file_path, dep, purl, summary):
+                continue
+            # Modifying the summary dictionary directly as it's passed by reference
+            summary[purl] = self._create_dependency_summary(dep)
+    def _process_file_entry(self, file_path: str, entry: dict, summary: dict):
+        """
+        Process file entry.
+        Args:
+            file_path: Path to the file being processed
+            entry: Process file match entry
+            summary: Reference to summary dictionary that will be modified in place
+        """
+        purls = entry.get('purl')
+        if not purls:
+            self.print_stderr(f'Purl block missing for {file_path}')
+            return
+        purl = purls[0] if purls else None
+        if not self._is_valid_purl(file_path, entry, purl, summary):
+            return
+        summary[purl] = self._create_file_summary(entry)
+    def _is_valid_purl(self, file_path: str, entry: dict, purl: str, summary: dict) -> bool:
+        """
+        Check if purl is valid and not already processed.
+        Args:
+            file_path: Path to the file being processed
+            entry: The entry containing the PURL
+            purl: The PURL to validate
+            summary: Reference to summary dictionary to check for existing entries
+        Returns:
+            bool: True if purl is valid and not already processed
+        """
+        if not purl:
+            self.print_stderr(f'Warning: No PURL found for {file_path}: {entry}')
+            return False
+        if summary.get(purl):
+            self.print_debug(f'Component {purl} already stored: {summary.get(purl)}')
+            return False
+        return True
+    def _create_dependency_summary(self, dep: dict) -> dict:
+        """
+        Create summary for dependency entry.
+        This method extracts relevant fields from a dependency entry and creates a
+        standardized summary dictionary. It handles fields like component, version,
+        and URL, with special processing for licenses.
+        Args:
+            dep (dict): The dependency entry containing component information
+        Returns:
+            dict: A new summary dictionary containing the extracted and processed fields
+        """
+        summary = {}
+        for field in ['component', 'version', 'url']:
+            summary[field] = dep.get(field, '')
+        summary['licenses'] = self._process_licenses(dep.get('licenses'))
+        return summary
+    def _create_file_summary(self, entry: dict) -> dict:
+        """
+        Create summary for file entry.
+        This method extracts set of fields from file entry and creates a standardized summary dictionary.
+        Args:
+            entry (dict): The file entry containing the metadata to summarize
+        Returns:
+            dict: A new summary dictionary containing all extracted and processed fields
+        """
+        summary = {}
+        fields = ['id', 'vendor', 'component', 'version', 'latest',
+                  'url', 'url_hash', 'download_url']
+        for field in fields:
+            summary[field] = entry.get(field)
+        summary['licenses'] = self._process_licenses(entry.get('licenses'))
+        return summary
+    def _process_licenses(self, licenses: list) -> list:
+        """
+            Process license information and remove duplicates.
+            This method filters license information to include only licenses from trusted sources
+            ('component_declared', 'license_file', 'file_header'). Licenses with an unspecified
+            source (None or '') are allowed. Non-empty, non-allowed sources are excluded. It also
+            removes any duplicate license names.
+            The result is a simplified list of license dictionaries containing only the 'id' field.
+            Args:
+                licenses (list): A list of license dictionaries, each containing at least 'name'
+                                 and 'source' fields. Can be None or empty.
+            Returns:
+                list: A filtered and deduplicated list of license dictionaries, where each
+                      dictionary contains only an 'id' field matching the original license name.
+                      Returns an empty list if input is None or empty.
+            """
+        if not licenses:
+            return []
+        processed_licenses = []
+        seen_names = set()
+        for license_info in licenses:
+            name = license_info.get('name')
+            source = license_info.get('source')
+            if source not in (None, '') and source not in ("component_declared", "license_file", "file_header"):
+                continue
+            if name and name not in seen_names:
+                processed_licenses.append({'id': name})
+                seen_names.add(name)
+        return processed_licenses
     def produce_from_file(self, json_file: str, output_file: str = None) -> bool:
         """
         Parse plain/raw input JSON file and produce SPDX Lite output
@@ -163,101 +282,339 @@ class SpdxLite:
         :return: True if successful, False otherwise
         """
         raw_data = self.parse(data)
-        if not raw_data:
+        if raw_data is None:
             self.print_stderr('ERROR: No SPDX data returned for the JSON string provided.')
             return False
+        if len(raw_data) == 0:
+            self.print_debug('Warning: Empty scan results - generating minimal SPDX Lite document with no packages.')
         self.load_license_data()
-        # Using this SPDX version as the spec
-        # https://github.com/spdx/spdx-spec/blob/development/v2.2.2/examples/SPDXJSONExample-v2.2.spdx.json
-        # Validate using:
-        # pip3 install jsonschema
-        # jsonschema -i spdxlite.json  <(curl https://raw.githubusercontent.com/spdx/spdx-spec/v2.2/schemas/spdx-schema.json)
-        # Validation can also be done online here: https://tools.spdx.org/app/validate/
+        spdx_document = self._create_base_document(raw_data)
+        self._process_packages(raw_data, spdx_document)
+        return self._write_output(spdx_document, output_file)
+    def _create_base_document(self, raw_data: dict) -> dict:
+        """
+            Create the base SPDX document structure.
+            This method initializes a new SPDX document with standard fields required by
+            the SPDX 2.2 specification. It generates a unique document namespace using
+            a hash of the raw data and current timestamp.
+            Args:
+                raw_data (dict): The raw component data used to create a unique identifier
+                                for the document namespace
+            Returns:
+                dict: A dictionary containing the base SPDX document structure with the
+                      following fields:
+                      - spdxVersion: The SPDX specification version
+                      - dataLicense: The license for the SPDX document itself
+                      - SPDXID: The document's unique identifier
+                      - name: The name of the SBOM
+                      - creationInfo: Information about when and how the document was created
+                      - documentNamespace: A unique URI for this document
+                      - documentDescribes: List of packages described (initially empty)
+                      - hasExtractedLicensingInfos: List of licenses (initially empty)
+                      - packages: List of package information (initially empty)
+        """
         now = datetime.datetime.utcnow()
         md5hex = hashlib.md5(f'{raw_data}-{now}'.encode('utf-8')).hexdigest()
-        data = {
+        return {
             'spdxVersion': 'SPDX-2.2',
             'dataLicense': 'CC0-1.0',
-            'SPDXID': f'SPDXRef-{md5hex}',
+            'SPDXID': 'SPDXRef-DOCUMENT',
             'name': 'SCANOSS-SBOM',
-            'creationInfo': {
-                'created': now.strftime('%Y-%m-%dT%H:%M:%S') + now.strftime('.%f')[:4] + 'Z',
-                'creators': [f'Tool: SCANOSS-PY: {__version__}', f'Person: {getpass.getuser()}']
-            },
+            'creationInfo': self._create_creation_info(now),
             'documentNamespace': f'https://spdx.org/spdxdocs/scanoss-py-{__version__}-{md5hex}',
             'documentDescribes': [],
             'hasExtractedLicensingInfos': [],
-            'packages': []
+            'packages': [],
         }
-        lic_refs = set()  # Hash Set of non-SPDX license references
-        for purl in raw_data:
-            comp = raw_data.get(purl)
-            licenses = comp.get('licenses')
-            lic_text = 'NOASSERTION'
-            if licenses:
-                lic_set = set()
-                for lic in licenses:
-                    lc_id = lic.get('id')
-                    if lc_id:
-                        spdx_id = self.get_spdx_license_id(lc_id)
-                        if not spdx_id:
-                            if not lc_id.startswith('LicenseRef'):
-                                lc_id = f'LicenseRef-{lc_id}'  # Make sure it has a license ref in its name
-                            lic_refs.add(lc_id)  # save non-SPDX license for later reference
-                        lic_set.add(spdx_id if spdx_id else lc_id)
-                if len(lic_set) > 0:
-                    lic_text = ' AND '.join(lic_set)
-                if len(lic_set) > 1:
-                    lic_text = f'({lic_text})'  # wrap the names in () if there is more than one
-            comp_name = comp.get('component')
-            comp_ver = comp.get('version')
-            purl_ver = f'{purl}@{comp_ver}'
-            purl_hash = hashlib.md5(f'{purl_ver}'.encode('utf-8')).hexdigest()
-            purl_spdx = f'SPDXRef-{purl_hash}'
-            data['documentDescribes'].append(purl_spdx)
-            data['packages'].append({
-                'name': comp_name,
-                'SPDXID': purl_spdx,
-                'versionInfo': comp_ver,
-                'downloadLocation': 'NOASSERTION',  # TODO Add actual download location
-                'homepage': comp.get('url', ''),
-                'licenseDeclared': lic_text,
-                'licenseConcluded': 'NOASSERTION',
-                'filesAnalyzed': False,
-                'copyrightText': 'NOASSERTION',
-                'externalRefs': [{
+    def _create_creation_info(self, timestamp: datetime.datetime) -> dict:
+        """
+            Create the creation info section of an SPDX document.
+            This method generates the creation information required by the SPDX specification,
+            including timestamps, creator information, and document type.
+            Args:
+                timestamp (datetime.datetime): The UTC timestamp representing when the
+                                              document was created
+            Returns:
+                dict: A dictionary containing creation information with the following fields:
+                      - created: ISO 8601 formatted timestamp
+                      - creators: List of entities involved in creating the document
+                        (tool, person, and organization)
+                      - comment: Additional information about the SBOM type
+        """
+        return {
+            'created': timestamp.strftime('%Y-%m-%dT%H:%M:%SZ'),
+            'creators': [
+                f'Tool: SCANOSS-PY: {__version__}',
+                f'Person: {getpass.getuser()}',
+                'Organization: SCANOSS'
+            ],
+            'comment': 'SBOM Build information - SBOM Type: Build',
+        }
+    def _process_packages(self, raw_data: dict, spdx_document: dict):
+        """
+            Process packages and add them to the SPDX document.
+            This method iterates through the raw component data, creates package information
+            for each component, and adds them to the SPDX document. It also collects
+            license references to be processed separately.
+            Args:
+                raw_data (dict): Dictionary of package data indexed by PURL
+                                (Package URL identifiers)
+                spdx_document (dict): Reference to the SPDX document being built,
+                                     which will be modified in place
+            Note:
+                This method modifies the spdx_document dictionary in place by:
+                1. Adding package information to the 'packages' list
+                2. Adding package SPDXIDs to the 'documentDescribes' list
+                3. Indirectly populating 'hasExtractedLicensingInfos' via _process_license_refs()
+        """
+        lic_refs = set()
+        for purl, comp in raw_data.items():
+            package_info = self._create_package_info(purl, comp, lic_refs)
+            spdx_document['packages'].append(package_info)
+            spdx_document['documentDescribes'].append(package_info['SPDXID'])
+        self._process_license_refs(lic_refs, spdx_document)
+    def _create_package_info(self, purl: str, comp: dict, lic_refs: set) -> dict:
+        """
+            Create package information for SPDX document.
+            This method generates a complete package information entry following the SPDX
+            specification format. It creates a unique identifier for the package based on
+            its PURL and version, processes license information, and formats all required
+            fields for the SPDX document.
+            Args:
+                purl (str): Package URL identifier for the component
+                comp (dict): Component information dictionary containing metadata like
+                            component name, version, URLs, and license information
+                lic_refs (set): Reference to a set that will be populated with license
+                               references found in this package. This set is modified in place.
+            Returns:
+                dict: A dictionary containing all required SPDX package fields including:
+                      - name: Component name
+                      - SPDXID: Unique identifier for this package within the document
+                      - versionInfo: Component version
+                      - downloadLocation: URL where the package can be downloaded
+                      - homepage: Component homepage URL
+                      - licenseDeclared: Formatted license expression
+                      - licenseConcluded: NOASSERTION as automated conclusion isn't possible
+                      - filesAnalyzed: False as files are not individually analyzed
+                      - copyrightText: NOASSERTION as copyright text isn't available
+                      - supplier: Organization name from vendor information
+                      - externalRefs: Package URL reference for package manager integration
+                      - checksums: MD5 hash of the package if available
+        """
+        lic_text = self._process_package_licenses(comp.get('licenses', []), lic_refs)
+        comp_ver = comp.get('version')
+        purl_ver = f'{purl}@{comp_ver}'
+        purl_hash = hashlib.md5(purl_ver.encode('utf-8')).hexdigest()
+        return {
+            'name': comp.get('component'),
+            'SPDXID': f'SPDXRef-{purl_hash}',
+            'versionInfo': comp_ver,
+            'downloadLocation': comp.get('download_url') or comp.get('url'),
+            'homepage': comp.get('url', ''),
+            'licenseDeclared': lic_text,
+            'licenseConcluded': 'NOASSERTION',
+            'filesAnalyzed': False,
+            'copyrightText': 'NOASSERTION',
+            'supplier': f'Organization: {comp.get("vendor", "NOASSERTION")}',
+            'externalRefs': [
+                {
                     'referenceCategory': 'PACKAGE-MANAGER',
-                    'referenceLocator': purl_ver,
+                    'referenceLocator': PackageURL.from_string(purl_ver).to_string(),
                     'referenceType': 'purl'
-                }]
-            })
-        # End purls for loop
-        for lic_ref in lic_refs:  # Insert all the non-SPDX license references
+                }
+            ],
+            'checksums': [
+                {
+                    'algorithm': 'MD5',
+                    'checksumValue': comp.get('url_hash') or '0' * 32
+                }
+            ],
+        }
+    def _process_package_licenses(self, licenses: list, lic_refs: set) -> str:
+        """
+           Process licenses and return license text formatted for SPDX.
+           This method processes a list of license objects, extracts valid license IDs,
+           converts them to SPDX format, and combines them into a properly formatted
+           license expression.
+           Args:
+               licenses (list): List of license dictionaries, each containing at least
+                               an 'id' field
+               lic_refs (set): Reference to a set that will collect license references.
+                              This set is modified in place.
+           Returns:
+               str: A formatted license expression string following SPDX syntax.
+                    Returns 'NOASSERTION' if no valid licenses are found.
+        """
+        if not licenses:
+            return 'NOASSERTION'
+        lic_set = set()
+        for lic in licenses:
+            lc_id = lic.get('id')
+            self._process_license_id(lc_id, lic_refs, lic_set)
+        return self._format_license_text(lic_set)
+    def _process_license_id(self, lc_id: str, lic_refs: set, lic_set: set):
+        """
+         Process individual license ID and add to appropriate sets.
+         This method attempts to convert a license ID to its SPDX equivalent.
+         If not found in the SPDX license list, it's formatted as a LicenseRef
+         and added to the license references set.
+         Args:
+             lc_id (str): The license ID to process
+             lic_refs (set): Reference to a set that collects license references
+                            for later processing. Modified in place.
+             lic_set (set): Reference to a set collecting all license IDs for
+         """
+        spdx_id = self.get_spdx_license_id(lc_id)
+        if not spdx_id:
+            if not lc_id.startswith('LicenseRef'):
+                lc_id = f'LicenseRef-{lc_id}'
+            lic_refs.add(lc_id)
+        lic_set.add(spdx_id if spdx_id else lc_id)
+    def _format_license_text(self, lic_set: set) -> str:
+        """
+            Format the license text with proper SPDX syntax.
+            This method combines multiple license IDs with the 'AND' operator
+            according to SPDX specification rules. If multiple licenses are present,
+            the expression is enclosed in parentheses.
+            Args:
+                lic_set (set): Set of license IDs to format
+            Returns:
+                str: A properly formatted SPDX license expression.
+                     Returns 'NOASSERTION' if the set is empty.
+        """
+        if not lic_set:
+            return 'NOASSERTION'
+        lic_text = ' AND '.join(lic_set)
+        if len(lic_set) > 1:
+            lic_text = f'({lic_text})'
+        return lic_text
+    def _process_license_refs(self, lic_refs: set, spdx_document: dict):
+        """
+            Process and add license references to the SPDX document.
+            This method processes each license reference in the provided set
+            and adds corresponding license information to the SPDX document's
+            extracted licensing information section.
+            Args:
+                lic_refs (set): Set of license references to process
+                spdx_document (dict): Reference to the SPDX document being built,
+                                     which will be modified in place
+            Note:
+                This method modifies the spdx_document dictionary in place by adding
+                entries to the 'hasExtractedLicensingInfos' list.
+        """
+        for lic_ref in lic_refs:
+            license_info = self._parse_license_ref(lic_ref)
+            spdx_document['hasExtractedLicensingInfos'].append(license_info)
+    def _parse_license_ref(self, lic_ref: str) -> dict:
+        """
+            Parse license reference and create info dictionary for SPDX document.
+            This method extracts information from a license reference identifier
+            and formats it into the structure required by the SPDX specification
+            for extracted licensing information.
+            Args:
+                lic_ref (str): License reference identifier to parse
+            Returns:
+                dict: Dictionary containing required SPDX fields for extracted license info:
+                      - licenseId: The unique identifier for this license
+                      - name: A readable name for the license
+                      - extractedText: A placeholder for the actual license text
+                      - comment: Information about how the license was detected
+        """
+        source, name = self._extract_license_info(lic_ref)
+        source_text = f' by {source}.' if source else '.'
+        return {
+            'licenseId': lic_ref,
+            'name': name.replace('-', ' '),
+            'extractedText': 'Detected license, please review component source code.',
+            'comment': f'Detected license{source_text}',
+        }
+    def _extract_license_info(self, lic_ref: str):
+        """
+            Extract source and name from license reference.
+            This method parses a license reference string to extract the source
+            (e.g., scancode, scanoss) and the actual license name using regular
+            expressions.
+            Args:
+                lic_ref (str): License reference identifier to parse
+            Returns:
+                tuple: A tuple containing (source, name) where:
+                       - source (str): The tool or system that identified the license
+                       - name (str): The actual license name
+        """
+        match = re.search(r'^LicenseRef-(scancode-|scanoss-|)(\S+)$', lic_ref, re.IGNORECASE)
+        if match:
+            source = match.group(1).replace('-', '')
+            name = match.group(2)
+        else:
             source = ''
-            match = re.search(r'^LicenseRef-(scancode-|scanoss-|)(\S+)$', lic_ref, re.IGNORECASE)
-            if match:
-                source = match.group(1).replace('-', '')  # source for the custom license
-                name = match.group(2)  # license name (without references, etc.)
-            else:
-                name = lic_ref
-            name = name.replace('-', ' ')
-            source = f' by {source}.' if source else '.'
-            data['hasExtractedLicensingInfos'].append({
-                'licenseId': lic_ref,
-                'name': name,
-                'extractedText': 'Detected license, please review component source code.',
-                'comment': f'Detected license{source}'
-            })
-        # End license refs for loop
-        file = sys.stdout
+            name = lic_ref
+        return source, name
+    def _write_output(self, data: dict, output_file: str = None) -> bool:
+        """Write the SPDX document to output."""
+        try:
+            file = self._get_output_file(output_file)
+            print(json.dumps(data, indent=2), file=file)
+            if output_file:
+                file.close()
+            return True
+        except Exception as e:
+            self.print_stderr(f'Error writing output: {str(e)}')
+            return False
+    def _get_output_file(self, output_file: str = None):
+        """Get the appropriate output file handle."""
         if not output_file and self.output_file:
             output_file = self.output_file
-        if output_file:
-            file = open(output_file, 'w')
-        print(json.dumps(data, indent=2), file=file)
-        if output_file:
-            file.close()
-        return True
+        return open(output_file, 'w') if output_file else sys.stdout
     def produce_from_str(self, json_str: str, output_file: str = None) -> bool:
         """
@@ -298,9 +655,10 @@ class SpdxLite:
         :return: True if successful, False otherwise
         """
         try:
-            f_name = pkg_resources.resource_filename(__name__, filename)
-            with open(f_name, 'r') as f:
-                data = json.loads(f.read())
+            f_name = importlib_resources.files(__name__) / filename
+            with importlib_resources.as_file(f_name) as f:
+                with open(f, 'r', encoding='utf-8') as file:
+                    data = json.load(file)
         except Exception as e:
             self.print_stderr(f'ERROR: Problem parsing SPDX license input JSON: {e}')
             return False
@@ -318,8 +676,6 @@ class SpdxLite:
                             self._spdx_licenses[lic_id_short] = lic_id
                     if lic_name:
                         self._spdx_lic_names[lic_name] = lic_id
-            # self.print_stderr(f'Licenses: {self._spdx_licenses}')
-            # self.print_stderr(f'Lookup: {self._spdx_lic_lookup}')
         return True
     def get_spdx_license_id(self, lic_name: str) -> str:
@@ -346,6 +702,8 @@ class SpdxLite:
             return lic_id
         self.print_debug(f'Warning: Failed to find valid SPDX license identifier for: {lic_name}')
         return None
 #
 # End of SpdxLite Class
 #

scanoss 1.12.2__py3-none-any.whl → 1.43.1__py3-none-any.whl

scanoss 1.12.2py3-none-any.whl → 1.43.1py3-none-any.whl