PyPI - scanoss - Versions diffs - 1.27.1__py3-none-any.whl → 1.43.1__py3-none-any.whl - Mend

scanoss 1.27.1py3-none-any.whl → 1.43.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

protoc_gen_swagger/options/annotations_pb2.py +18 -12
protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
scanoss/__init__.py +1 -1
scanoss/api/common/v2/scanoss_common_pb2.py +49 -22
scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -47
scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +650 -33
scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -37
scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +64 -12
scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +74 -31
scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +252 -13
scanoss/api/licenses/__init__.py +23 -0
scanoss/api/licenses/v2/__init__.py +23 -0
scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
scanoss/api/scanning/v2/scanoss_scanning_pb2.py +32 -21
scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +49 -8
scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
scanoss/cli.py +1000 -186
scanoss/components.py +80 -50
scanoss/constants.py +7 -1
scanoss/cryptography.py +89 -55
scanoss/csvoutput.py +13 -7
scanoss/cyclonedx.py +141 -9
scanoss/data/build_date.txt +1 -1
scanoss/data/osadl-copyleft.json +133 -0
scanoss/delta.py +197 -0
scanoss/export/__init__.py +23 -0
scanoss/export/dependency_track.py +227 -0
scanoss/file_filters.py +2 -163
scanoss/filecount.py +37 -38
scanoss/gitlabqualityreport.py +214 -0
scanoss/header_filter.py +563 -0
scanoss/inspection/policy_check/__init__.py +0 -0
scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
scanoss/inspection/{policy_check.py → policy_check/policy_check.py} +65 -72
scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
scanoss/inspection/{copyleft.py → policy_check/scanoss/copyleft.py} +89 -73
scanoss/inspection/{undeclared_component.py → policy_check/scanoss/undeclared_component.py} +52 -46
scanoss/inspection/summary/__init__.py +0 -0
scanoss/inspection/summary/component_summary.py +170 -0
scanoss/inspection/{license_summary.py → summary/license_summary.py} +62 -12
scanoss/inspection/summary/match_summary.py +341 -0
scanoss/inspection/utils/file_utils.py +44 -0
scanoss/inspection/utils/license_utils.py +57 -71
scanoss/inspection/utils/markdown_utils.py +63 -0
scanoss/inspection/{inspect_base.py → utils/scan_result_processor.py} +53 -67
scanoss/osadl.py +125 -0
scanoss/scanner.py +135 -253
scanoss/scanners/folder_hasher.py +47 -32
scanoss/scanners/scanner_hfh.py +50 -18
scanoss/scanoss_settings.py +33 -3
scanoss/scanossapi.py +23 -25
scanoss/scanossbase.py +1 -1
scanoss/scanossgrpc.py +543 -289
scanoss/services/dependency_track_service.py +132 -0
scanoss/spdxlite.py +11 -4
scanoss/threadeddependencies.py +19 -18
scanoss/threadedscanning.py +10 -0
scanoss/utils/scanoss_scan_results_utils.py +41 -0
scanoss/winnowing.py +71 -19
{scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/METADATA +8 -5
scanoss-1.43.1.dist-info/RECORD +110 -0
scanoss/inspection/component_summary.py +0 -94
scanoss-1.27.1.dist-info/RECORD +0 -87
{scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/WHEEL +0 -0
{scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
{scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/licenses/LICENSE +0 -0
{scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0

scanoss/services/dependency_track_service.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""
+SPDX-License-Identifier: MIT
+  Copyright (c) 2025, SCANOSS
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+"""
+import requests
+from ..scanossbase import ScanossBase
+HTTP_OK = 200
+class DependencyTrackService(ScanossBase):
+    def __init__(
+            self,
+            api_key: str,
+            url: str,
+            debug: bool = False,
+            trace: bool = False,
+            quiet: bool = False,
+    ):
+        super().__init__(debug=debug, trace=trace, quiet=quiet)
+        if not url:
+            raise ValueError("Error: Dependency Track URL is required")
+        self.url = url.strip().rstrip('/')
+        if not api_key:
+            raise ValueError("Error: Dependency Track API key is required")
+        self.api_key = api_key
+    def get_project_by_name_version(self, name, version):
+        """
+        Get project information by name and version from Dependency Track
+        Args:
+            name: Project name to search for
+            version: Project version to search for
+        Returns:
+            dict: Project data if found, None otherwise
+        """
+        if not name or not version:
+            self.print_stderr('Error: Missing name or version.')
+            return None
+        # Use the project search endpoint
+        params = {
+            'name': name,
+            'version': version
+        }
+        self.print_debug(f'Searching for project by: {params}')
+        return self.get_dep_track_data(f'{self.url}/api/v1/project/lookup', params)
+    def get_project_status(self, upload_token):
+        """
+        Get Dependency Track project processing status.
+        Queries the Dependency Track API to check if the project upload
+        processing is complete using the upload token.
+        Returns:
+            dict: Project status information or None if request fails
+        """
+        if not upload_token:
+            self.print_stderr('Error: Missing upload token. Cannot search for project status.')
+            return None
+        self.print_trace(f'URL: {self.url} Upload token: {upload_token}')
+        return self.get_dep_track_data(f'{self.url}/api/v1/event/token/{upload_token}')
+    def get_project_violations(self,project_id:str):
+        """
+        Get project violations from Dependency Track.
+        Waits for project processing to complete, then retrieves all policy
+        violations for the specified project ID.
+        Returns:
+        List of policy violations or None if the request fails
+        """
+        if not project_id:
+            self.print_stderr('Error: Missing project id. Cannot search for project violations.')
+            return None
+        # Return the result as-is - None indicates API failure, empty list means no violations
+        return self.get_dep_track_data(f'{self.url}/api/v1/violation/project/{project_id}')
+    def get_project_by_id(self, project_id:str):
+        """
+        Get a Dependency Track project by id.
+        Queries the Dependency Track API to get a project by id
+        Returns:
+            dict
+        """
+        if not project_id:
+            self.print_stderr('Error: Missing project id. Cannot search for project.')
+            return None
+        self.print_trace(f'URL: {self.url}, UUID: {project_id}')
+        return self.get_dep_track_data(f'{self.url}/api/v1/project/{project_id}')
+    def get_dep_track_data(self, uri, params=None):
+        if not uri:
+            self.print_stderr('Error: Missing URI. Cannot search for project.')
+            return None
+        req_headers = {'X-Api-Key': self.api_key, 'Content-Type': 'application/json'}
+        try:
+            if params:
+                response = requests.get(uri, headers=req_headers, params=params)
+            else:
+                response = requests.get(uri, headers=req_headers)
+            response.raise_for_status()  # Raises an HTTPError for bad responses
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            self.print_stderr(f"Error: Problem getting project data: {e}")
+        return None

scanoss/spdxlite.py CHANGED Viewed

@@ -71,9 +71,12 @@ class SpdxLite:
         :param data: json - JSON object
         :return: summary dictionary
         """
-        if not data:
+        if data is None:
             self.print_stderr('ERROR: No JSON data provided to parse.')
             return None
+        if len(data) == 0:
+            self.print_debug('Warning: Empty scan results provided. Returning empty summary.')
+            return {}
         self.print_debug('Processing raw results into summary format...')
         return self._process_files(data)
@@ -223,7 +226,9 @@ class SpdxLite:
             Process license information and remove duplicates.
             This method filters license information to include only licenses from trusted sources
-            ('component_declared' or 'license_file') and removes any duplicate license names.
+            ('component_declared', 'license_file', 'file_header'). Licenses with an unspecified
+            source (None or '') are allowed. Non-empty, non-allowed sources are excluded. It also
+            removes any duplicate license names.
             The result is a simplified list of license dictionaries containing only the 'id' field.
             Args:
@@ -244,7 +249,7 @@ class SpdxLite:
         for license_info in licenses:
             name = license_info.get('name')
             source = license_info.get('source')
-            if source not in ("component_declared", "license_file", "file_header"):
+            if source not in (None, '') and source not in ("component_declared", "license_file", "file_header"):
                 continue
             if name and name not in seen_names:
                 processed_licenses.append({'id': name})
@@ -277,9 +282,11 @@ class SpdxLite:
         :return: True if successful, False otherwise
         """
         raw_data = self.parse(data)
-        if not raw_data:
+        if raw_data is None:
             self.print_stderr('ERROR: No SPDX data returned for the JSON string provided.')
             return False
+        if len(raw_data) == 0:
+            self.print_debug('Warning: Empty scan results - generating minimal SPDX Lite document with no packages.')
         self.load_license_data()
         spdx_document = self._create_base_document(raw_data)

scanoss/threadeddependencies.py CHANGED Viewed

@@ -22,12 +22,12 @@ SPDX-License-Identifier: MIT
   THE SOFTWARE.
 """
-import threading
-import queue
 import json
-from enum import Enum
-from typing import Dict, Optional, Set
+import queue
+import threading
 from dataclasses import dataclass
+from enum import Enum
+from typing import Dict
 from .scancodedeps import ScancodeDeps
 from .scanossbase import ScanossBase
@@ -63,7 +63,7 @@ class ThreadedDependencies(ScanossBase):
     inputs: queue.Queue = queue.Queue()
     output: queue.Queue = queue.Queue()
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         sc_deps: ScancodeDeps,
         grpc_api: ScanossGrpc,
@@ -180,13 +180,15 @@ class ThreadedDependencies(ScanossBase):
             return self.filter_dependencies(
                 deps, lambda purl: (exclude and purl not in exclude) or (not exclude and purl in include)
             )
+        return None
-    def scan_dependencies(
+    def scan_dependencies(  # noqa: PLR0912
         self, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None
     ) -> None:
         """
         Scan for dependencies from the given file/dir or from an input file (from the input queue).
         """
+        # TODO refactor to simplify branches based on PLR0912
         current_thread = threading.get_ident()
         self.print_trace(f'Starting dependency worker {current_thread}...')
         try:
@@ -194,18 +196,17 @@ class ThreadedDependencies(ScanossBase):
             deps = None
             if what_to_scan.startswith(DEP_FILE_PREFIX):  # We have a pre-parsed dependency file, load it
                 deps = self.sc_deps.load_from_file(what_to_scan.strip(DEP_FILE_PREFIX))
-            else:  # Search the file/folder for dependency files to parse
-                if not self.sc_deps.run_scan(what_to_scan=what_to_scan):
-                    self._errors = True
-                else:
-                    deps = self.sc_deps.produce_from_file()
-                    if dep_scope is not None:
-                        self.print_debug(f'Filtering {dep_scope.name} dependencies')
-                    if dep_scope_include is not None:
-                        self.print_debug(f"Including dependencies with '{dep_scope_include.split(',')}' scopes")
-                    if dep_scope_exclude is not None:
-                        self.print_debug(f"Excluding dependencies with '{dep_scope_exclude.split(',')}' scopes")
-                    deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
+            elif not self.sc_deps.run_scan(what_to_scan=what_to_scan):
+                self._errors = True
+            else:
+                deps = self.sc_deps.produce_from_file()
+                if dep_scope is not None:
+                    self.print_debug(f'Filtering {dep_scope.name} dependencies')
+                if dep_scope_include is not None:
+                    self.print_debug(f"Including dependencies with '{dep_scope_include.split(',')}' scopes")
+                if dep_scope_exclude is not None:
+                    self.print_debug(f"Excluding dependencies with '{dep_scope_exclude.split(',')}' scopes")
+                deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
             if not self._errors:
                 if deps is None:

scanoss/threadedscanning.py CHANGED Viewed

@@ -22,6 +22,7 @@ SPDX-License-Identifier: MIT
   THE SOFTWARE.
 """
+import atexit
 import os
 import queue
 import sys
@@ -77,6 +78,8 @@ class ThreadedScanning(ScanossBase):
         if nb_threads > MAX_ALLOWED_THREADS:
             self.print_msg(f'Warning: Requested threads too large: {nb_threads}. Reducing to {MAX_ALLOWED_THREADS}')
             self.nb_threads = MAX_ALLOWED_THREADS
+        # Register cleanup to ensure progress bar is finished on exit
+        atexit.register(self.complete_bar)
     @staticmethod
     def __count_files_in_wfp(wfp: str):
@@ -101,6 +104,13 @@ class ThreadedScanning(ScanossBase):
         if self.bar:
             self.bar.finish()
+    def __del__(self):
+        """Ensure progress bar is cleaned up when object is destroyed"""
+        try:
+            self.complete_bar()
+        except Exception:
+            pass  # Ignore errors during cleanup
     def set_bar(self, bar: Bar) -> None:
         """
         Set the Progress Bar to display progress while scanning

scanoss/utils/scanoss_scan_results_utils.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""
+SPDX-License-Identifier: MIT
+  Copyright (c) 2025, SCANOSS
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+"""
+def get_lines(lines: str) -> list:
+    """
+       Parse line range string into a list of line numbers.
+       Converts SCANOSS line notation (e.g., '10-20,25-30') into a flat list
+       of individual line numbers for processing.
+       :param lines: Comma-separated line ranges in SCANOSS format (e.g., '10-20,25-30')
+       :return: Flat list of all line numbers extracted from the ranges
+    """
+    lines_list = []
+    lines = lines.split(',')
+    for line in lines:
+        line_parts = line.split('-')
+        for part in line_parts:
+            lines_list.append(int(part))
+    return lines_list

scanoss/winnowing.py CHANGED Viewed

@@ -37,6 +37,7 @@ from typing import Tuple
 from binaryornot.check import is_binary
 from crc32c import crc32c
+from .header_filter import HeaderFilter
 from .scanossbase import ScanossBase
 # Winnowing configuration. DO NOT CHANGE.
@@ -172,6 +173,8 @@ class Winnowing(ScanossBase):
         strip_hpsm_ids=None,
         strip_snippet_ids=None,
         skip_md5_ids=None,
+        skip_headers: bool = False,
+        skip_headers_limit: int = 0,
     ):
         """
         Instantiate Winnowing class
@@ -198,7 +201,9 @@ class Winnowing(ScanossBase):
         self.strip_hpsm_ids = strip_hpsm_ids
         self.strip_snippet_ids = strip_snippet_ids
         self.hpsm = hpsm
+        self.skip_headers = skip_headers
         self.is_windows = platform.system() == 'Windows'
+        self.header_filter = HeaderFilter(debug=debug, trace=trace, quiet=quiet, skip_limit=skip_headers_limit)
         if hpsm:
             self.crc8_maxim_dow_table = []
             self.crc8_generate_table()
@@ -353,6 +358,48 @@ class Winnowing(ScanossBase):
             self.print_debug(f'Stripped snippet ids from {file}')
         return wfp
+    def __strip_lines_until_offset(self, file: str, wfp: str, line_offset: int) -> str:
+        """
+        Strip lines from the WFP up to and including the line_offset
+        :param file: name of fingerprinted file
+        :param wfp: WFP to clean
+        :param line_offset: line number offset to strip up to
+        :return: updated WFP
+        """
+        # No offset specified, return original WFP
+        if line_offset <= 0:
+            return wfp
+        lines = wfp.split('\n')
+        filtered_lines = []
+        start_line_added = False
+        for line in lines:
+            # Check if a line contains snippet data (format: line_number=hash,hash,...)
+            line_details = line.split('=')
+            if line_details[0].isdigit():
+                try:
+                    line_num = int(line_details[0])
+                    # Keep lines that are after the offset
+                    # (line_offset is the last line previous to real code)
+                    if line_num > line_offset:
+                        # Add the start_line tag before the first snippet line
+                        if not start_line_added:
+                            filtered_lines.append(f'start_line={line_offset}')
+                            start_line_added = True
+                        filtered_lines.append(line)
+                except (ValueError, IndexError) as e:
+                    self.print_stderr(f'Error decoding line number from line {line} in {file}: {e}')
+                    # Keep non-snippet lines (like file=, hpsm=, etc.)
+                    filtered_lines.append(line)
+            else:
+                # Keep non-snippet lines (like file=, hpsm=, etc.)
+                filtered_lines.append(line)
+        # End for loop comment
+        wfp = '\n'.join(filtered_lines)
+        if start_line_added:
+            self.print_debug(f'Stripped lines up to offset {line_offset} from {file}')
+        return wfp
     def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool]:
         """Detect the types of line endings present in file contents.
@@ -362,13 +409,14 @@ class Winnowing(ScanossBase):
         Returns:
             Tuple of (has_crlf, has_lf_only, has_cr_only, has_mixed) indicating which line ending types are present.
         """
+        if not contents:
+            self.print_debug('Warning: No file contents provided')
         has_crlf = b'\r\n' in contents
         # For LF detection, we need to find LF that's not part of CRLF
         content_without_crlf = contents.replace(b'\r\n', b'')
         has_standalone_lf = b'\n' in content_without_crlf
         # For CR detection, we need to find CR that's not part of CRLF
         has_standalone_cr = b'\r' in content_without_crlf
         return has_crlf, has_standalone_lf, has_standalone_cr
     def __calculate_opposite_line_ending_hash(self, contents: bytes):
@@ -384,13 +432,11 @@ class Winnowing(ScanossBase):
             Hash with opposite line endings as hex string, or None if no line endings detected.
         """
         has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
         if not has_crlf and not has_standalone_lf and not has_standalone_cr:
+            self.print_debug('No line endings detected in file contents')
             return None
-        # Normalize all line endings to LF first
+        # Normalise all line endings to LF first
         normalized = contents.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
         # Determine the dominant line ending type
         if has_crlf and not has_standalone_lf and not has_standalone_cr:
             # File is Windows (CRLF) - produce Unix (LF) hash
@@ -398,7 +444,7 @@ class Winnowing(ScanossBase):
         else:
             # File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
             opposite_contents = normalized.replace(b'\n', b'\r\n')
+        # Return the MD5 hash of the opposite contents
         return hashlib.md5(opposite_contents).hexdigest()
     def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:  # noqa: PLR0912, PLR0915
@@ -420,27 +466,26 @@ class Winnowing(ScanossBase):
         # Print file line
         content_length = len(contents)
         original_filename = file
         if self.is_windows:
             original_filename = file.replace('\\', '/')
         wfp_filename = repr(original_filename).strip("'")  # return a utf-8 compatible version of the filename
-        if self.obfuscate:  # hide the real size of the file and its name, but keep the suffix
+        # hide the real size of the file and its name but keep the suffix
+        if self.obfuscate:
             wfp_filename = f'{self.ob_count}{pathlib.Path(original_filename).suffix}'
             self.ob_count = self.ob_count + 1
             self.file_map[wfp_filename] = original_filename  # Save the file name map for later (reverse lookup)
+        # Construct the WFP header
         wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
-        # Add opposite line ending hash based on line ending analysis
+        # Add the opposite line ending hash based on line ending analysis
         if not bin_file:
             opposite_hash = self.__calculate_opposite_line_ending_hash(contents)
             if opposite_hash is not None:
                 wfp += f'fh2={opposite_hash}\n'
         # We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
-        if bin_file or self.skip_snippets or self.__skip_snippets(file, contents.decode('utf-8', 'ignore')):
+        decoded_contents = contents.decode('utf-8', 'ignore')
+        if bin_file or self.skip_snippets or self.__skip_snippets(file, decoded_contents):
             return wfp
-        # Add HPSM
+        # Add HPSM (calculated from original contents, not filtered)
         if self.hpsm:
             hpsm = self.__strip_hpsm(file, self.calc_hpsm(contents))
             if len(hpsm) > 0:
@@ -448,7 +493,7 @@ class Winnowing(ScanossBase):
         # Initialize variables
         gram = ''
         window = []
-        line = 1
+        line = 1  # Line counter for WFP generation
         last_hash = MAX_CRC32
         last_line = 0
         output = ''
@@ -503,12 +548,19 @@ class Winnowing(ScanossBase):
                 wfp += output + '\n'
             else:
                 self.print_debug(f'Warning: skipping output in WFP for {file} - "{output}"')
+        # Warn if we don't have any WFP content
         if wfp is None or wfp == '':
             self.print_stderr(f'Warning: No WFP content data for {file}')
-        elif self.strip_snippet_ids:
-            wfp = self.__strip_snippets(file, wfp)
+        else:
+            # Apply line filter to remove headers, comments, and imports from the beginning (if enabled)
+            if self.skip_headers:
+                line_offset = self.header_filter.filter(file, decoded_contents)
+                if line_offset > 0:
+                    wfp = self.__strip_lines_until_offset(file, wfp, line_offset)
+            # Strip snippet IDs from the WFP (if enabled)
+            if self.strip_snippet_ids:
+                wfp = self.__strip_snippets(file, wfp)
+        # Return the WFP contents
         return wfp
     def calc_hpsm(self, content):

{scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scanoss
-Version: 1.27.1
+Version: 1.43.1
 Summary: Simple Python library to leverage the SCANOSS APIs
 Home-page: https://scanoss.com
 Author: SCANOSS
@@ -13,15 +13,16 @@ Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Programming Language :: Python :: 3
-Requires-Python: >=3.7
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: requests
 Requires-Dist: crc32c>=2.2
 Requires-Dist: binaryornot
 Requires-Dist: progress
-Requires-Dist: grpcio>1.42.0
-Requires-Dist: protobuf>3.19.1
+Requires-Dist: grpcio>=1.73.1
+Requires-Dist: protobuf>=6.3.1
+Requires-Dist: protoc-gen-openapiv2
 Requires-Dist: pypac
 Requires-Dist: pyOpenSSL
 Requires-Dist: google-api-core
@@ -30,6 +31,8 @@ Requires-Dist: packageurl-python
 Requires-Dist: pathspec
 Requires-Dist: jsonschema
 Requires-Dist: crc
+Requires-Dist: protoc-gen-openapiv2
+Requires-Dist: cyclonedx-python-lib[validation]
 Provides-Extra: fast-winnowing
 Requires-Dist: scanoss_winnowing>=0.5.0; extra == "fast-winnowing"
 Dynamic: license-file
@@ -174,7 +177,7 @@ if __name__ == "__main__":
 ```
 ## Requirements
-Python 3.7 or higher.
+Python 3.9 or higher.
 ## Source code
 The source for this package can be found [here](https://github.com/scanoss/scanoss.py).

scanoss 1.27.1__py3-none-any.whl → 1.43.1__py3-none-any.whl

scanoss 1.27.1py3-none-any.whl → 1.43.1py3-none-any.whl